BaseVariableWidthViewVector.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.vector;
import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.vector.util.DataSizeRoundingUtil.roundUpToMultipleOf16;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.OutOfMemoryException;
import org.apache.arrow.memory.ReusableBuffer;
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.ByteFunctionHelpers;
import org.apache.arrow.memory.util.CommonUtil;
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.compare.VectorVisitor;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.util.CallBack;
import org.apache.arrow.vector.util.OversizedAllocationException;
import org.apache.arrow.vector.util.TransferPair;
/**
* BaseVariableWidthViewVector is a base class providing functionality for strings/bytes types in
* view format.
*/
public abstract class BaseVariableWidthViewVector extends BaseValueVector
implements VariableWidthFieldVector {
// A single element of a view comprises 16 bytes
public static final int ELEMENT_SIZE = 16;
public static final int INITIAL_VIEW_VALUE_ALLOCATION = 4096;
private static final int INITIAL_BYTE_COUNT = INITIAL_VIEW_VALUE_ALLOCATION * ELEMENT_SIZE;
private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE);
private int lastValueCapacity;
private long lastValueAllocationSizeInBytes;
/*
* Variable Width View Vector comprises the following format
*
* Short strings, length <= 12
* | Bytes 0-3 | Bytes 4-15 |
* |------------|---------------------------------------|
* | length | data (padded with 0) |
* |------------|---------------------------------------|
*
* Long strings, length > 12
* | Bytes 0-3 | Bytes 4-7 | Bytes 8-11 | Bytes 12-15 |
* |------------|------------|------------|-------------|
* | length | prefix | buf.index | offset |
* |------------|------------|------------|-------------|
*
* */
// 12 byte unsigned int to track inline views
public static final int INLINE_SIZE = 12;
// The first 4 bytes of view are allocated for length
public static final int LENGTH_WIDTH = 4;
// The second 4 bytes of view are allocated for prefix width
public static final int PREFIX_WIDTH = 4;
// The third 4 bytes of view are allocated for buffer index
public static final int BUF_INDEX_WIDTH = 4;
public static final byte[] EMPTY_BYTE_ARRAY = new byte[] {};
protected ArrowBuf validityBuffer;
// The view buffer is used to store the variable width view elements
protected ArrowBuf viewBuffer;
// The external buffer which stores the long strings
protected List<ArrowBuf> dataBuffers;
protected int initialDataBufferSize;
protected int valueCount;
protected int lastSet;
protected final Field field;
/**
* Constructs a new instance.
*
* @param field The field materialized by this vector
* @param allocator The allocator to use for creating/resizing buffers
*/
public BaseVariableWidthViewVector(Field field, final BufferAllocator allocator) {
super(allocator);
this.field = field;
lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
lastValueCapacity = INITIAL_VIEW_VALUE_ALLOCATION;
valueCount = 0;
lastSet = -1;
validityBuffer = allocator.getEmpty();
viewBuffer = allocator.getEmpty();
dataBuffers = new ArrayList<>();
}
@Override
public String getName() {
return field.getName();
}
/* TODO:
* see if getNullCount() can be made faster -- O(1)
*/
/* TODO:
* Once the entire hierarchy has been refactored, move common functions
* like getNullCount(), splitAndTransferValidityBuffer to top level
* base class BaseValueVector.
*
* Along with this, some class members (validityBuffer) can also be
* abstracted out to top level base class.
*
* Right now BaseValueVector is the top level base class for other
* vector types in ValueVector hierarchy (non-nullable) and those
* vectors have not yet been refactored/removed so moving things to
* the top class as of now is not a good idea.
*/
/* TODO:
* Implement TransferPair functionality
* https://github.com/apache/arrow/issues/40932
*
*/
/**
* Get buffer that manages the validity (NULL or NON-NULL nature) of elements in the vector.
* Consider it as a buffer for internal bit vector data structure.
*
* @return buffer
*/
@Override
public ArrowBuf getValidityBuffer() {
return validityBuffer;
}
/**
* Get the buffer that stores the data for elements in the vector.
*
* @return buffer
*/
@Override
public ArrowBuf getDataBuffer() {
return viewBuffer;
}
/**
* Get the buffers that store the data for views in the vector.
*
* @return list of ArrowBuf
*/
public List<ArrowBuf> getDataBuffers() {
return dataBuffers;
}
/**
* BaseVariableWidthViewVector doesn't support offset buffer.
*
* @return throws UnsupportedOperationException
*/
@Override
public ArrowBuf getOffsetBuffer() {
throw new UnsupportedOperationException(
"Offset buffer is not supported in BaseVariableWidthViewVector");
}
/**
* BaseVariableWidthViewVector doesn't support offset buffer.
*
* @return throws UnsupportedOperationException
*/
@Override
public long getOffsetBufferAddress() {
throw new UnsupportedOperationException(
"Offset buffer is not supported in BaseVariableWidthViewVector");
}
/**
* Get the memory address of buffer that manages the validity (NULL or NON-NULL nature) of
* elements in the vector.
*
* @return starting address of the buffer
*/
@Override
public long getValidityBufferAddress() {
return validityBuffer.memoryAddress();
}
/**
* Get the memory address of buffer that stores the data for elements in the vector.
*
* @return starting address of the buffer
*/
@Override
public long getDataBufferAddress() {
return viewBuffer.memoryAddress();
}
/**
* Sets the desired value capacity for the vector. This function doesn't allocate any memory for
* the vector.
*
* @param valueCount desired number of elements in the vector
*/
@Override
public void setInitialCapacity(int valueCount) {
final long size = (long) valueCount * ELEMENT_SIZE;
checkDataBufferSize(size);
lastValueAllocationSizeInBytes = (int) size;
lastValueCapacity = valueCount;
}
/**
* Sets the desired value capacity for the vector. This function doesn't allocate any memory for
* the vector.
*
* @param valueCount desired number of elements in the vector
* @param density average number of bytes per variable width view element
*/
@Override
public void setInitialCapacity(int valueCount, double density) {
final long size = (long) valueCount * ELEMENT_SIZE;
initialDataBufferSize = (int) (valueCount * density);
checkDataBufferSize(size);
lastValueAllocationSizeInBytes = (int) size;
lastValueCapacity = valueCount;
}
/**
* Get the density of this ListVector.
*
* @return density
*/
public double getDensity() {
if (valueCount == 0) {
return 0.0D;
}
final double totalListSize = getTotalValueLengthUpToIndex(valueCount);
return totalListSize / valueCount;
}
/**
* Get the current capacity which does not exceed either validity buffer or value buffer. Note:
* Here the `getValueCapacity` has a relationship with the value buffer.
*
* @return number of elements that vector can hold.
*/
@Override
public int getValueCapacity() {
final int validityCapacity = getValidityBufferValueCapacity();
final int valueBufferCapacity = Math.max(capAtMaxInt(viewBuffer.capacity() / ELEMENT_SIZE), 0);
return Math.min(valueBufferCapacity, validityCapacity);
}
private int getValidityBufferValueCapacity() {
return capAtMaxInt(validityBuffer.capacity() * 8);
}
/** zero out the vector and the data in associated buffers. */
public void zeroVector() {
initValidityBuffer();
viewBuffer.setZero(0, viewBuffer.capacity());
clearDataBuffers();
}
/* zero out the validity buffer */
private void initValidityBuffer() {
validityBuffer.setZero(0, validityBuffer.capacity());
}
/** Reset the vector to initial state. Note that this method doesn't release any memory. */
@Override
public void reset() {
zeroVector();
lastSet = -1;
valueCount = 0;
}
/** Close the vector and release the associated buffers. */
@Override
public void close() {
clear();
}
/** Same as {@link #close()}. */
@Override
public void clear() {
validityBuffer = releaseBuffer(validityBuffer);
viewBuffer = releaseBuffer(viewBuffer);
clearDataBuffers();
lastSet = -1;
valueCount = 0;
}
/** Release the data buffers and clear the list. */
public void clearDataBuffers() {
for (ArrowBuf buffer : dataBuffers) {
releaseBuffer(buffer);
}
dataBuffers.clear();
}
/**
* Get the inner vectors.
*
* @return the inner vectors for this field as defined by the TypeLayout
* @deprecated This API will be removed as the current implementations no longer support inner
* vectors.
*/
@Deprecated
@Override
public List<BufferBacked> getFieldInnerVectors() {
throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
}
/**
* Initialize the children in schema for this Field. This operation is a NO-OP for scalar types
* since they don't have any children.
*
* @param children the schema
* @throws IllegalArgumentException if children is a non-empty list for scalar types.
*/
@Override
public void initializeChildrenFromFields(List<Field> children) {
if (!children.isEmpty()) {
throw new IllegalArgumentException("primitive type vector cannot have children");
}
}
/**
* Get the inner child vectors.
*
* @return list of child vectors for complex types, empty list for scalar vector types
*/
@Override
public List<FieldVector> getChildrenFromFields() {
return Collections.emptyList();
}
/**
* Load the buffers of this vector with provided source buffers. The caller manages the source
* buffers and populates them before invoking this method.
*
* @param fieldNode the fieldNode indicating the value count
* @param ownBuffers the buffers for this Field (own buffers only, children not included)
*/
@Override
public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
ArrowBuf bitBuf = ownBuffers.get(0);
ArrowBuf viewBuf = ownBuffers.get(1);
List<ArrowBuf> dataBufs = ownBuffers.subList(2, ownBuffers.size());
this.clear();
this.viewBuffer = viewBuf.getReferenceManager().retain(viewBuf, allocator);
this.validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuf, allocator);
for (ArrowBuf dataBuf : dataBufs) {
this.dataBuffers.add(dataBuf.getReferenceManager().retain(dataBuf, allocator));
}
lastSet = fieldNode.getLength() - 1;
valueCount = fieldNode.getLength();
}
/**
* Get the buffers belonging to this vector.
*
* @return the inner buffers.
*/
@Override
public List<ArrowBuf> getFieldBuffers() {
List<ArrowBuf> result = new ArrayList<>(2 + dataBuffers.size());
setReaderAndWriterIndex();
result.add(validityBuffer);
result.add(viewBuffer);
// append data buffers
result.addAll(dataBuffers);
return result;
}
/** Set the reader and writer indexes for the inner buffers. */
private void setReaderAndWriterIndex() {
validityBuffer.readerIndex(0);
viewBuffer.readerIndex(0);
if (valueCount == 0) {
validityBuffer.writerIndex(0);
viewBuffer.writerIndex(0);
} else {
validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
viewBuffer.writerIndex(valueCount * ELEMENT_SIZE);
}
}
/** Same as {@link #allocateNewSafe()}. */
@Override
public void allocateNew() {
allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
}
/**
* Allocate memory for the vector. We internally use a default value count of 4096 to allocate
* memory for at least these many elements in the vector. See {@link #allocateNew(long, int)} for
* allocating memory for specific number of elements in the vector.
*
* @return false if memory allocation fails, true otherwise.
*/
@Override
public boolean allocateNewSafe() {
try {
allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
return true;
} catch (Exception e) {
return false;
}
}
/**
* Allocate memory for the vector to support storing at least the provided number of elements in
* the vector. This method must be called prior to using the ValueVector.
*
* @param totalBytes desired total memory capacity
* @param valueCount the desired number of elements in the vector
* @throws OutOfMemoryException if memory allocation fails
*/
@Override
public void allocateNew(long totalBytes, int valueCount) {
assert totalBytes >= 0;
checkDataBufferSize(totalBytes);
/* we are doing a new allocation -- release the current buffers */
clear();
try {
allocateBytes(totalBytes, valueCount);
} catch (Exception e) {
clear();
throw e;
}
}
@Override
public void allocateNew(int valueCount) {
allocateNew(lastValueAllocationSizeInBytes, valueCount);
}
/* Check if the data buffer size is within bounds. */
private void checkDataBufferSize(long size) {
if (size > MAX_BUFFER_SIZE || size < 0) {
throw new OversizedAllocationException(
"Memory required for vector "
+ "is ("
+ size
+ "), which is overflow or more than max allowed ("
+ MAX_BUFFER_SIZE
+ "). "
+ "You could consider using LargeVarCharVector/LargeVarBinaryVector for large strings/large bytes types");
}
}
/* allocate the inner buffers */
private void allocateBytes(final long valueBufferSize, final int valueCount) {
/* allocate data buffer */
viewBuffer = allocator.buffer(valueBufferSize);
viewBuffer.readerIndex(0);
validityBuffer = allocator.buffer((valueCount + 7) / 8);
initValidityBuffer();
lastValueCapacity = getValueCapacity();
lastValueAllocationSizeInBytes = capAtMaxInt(viewBuffer.capacity());
}
/**
* Resize the vector to increase the capacity. The internal behavior is to double the current
* value capacity.
*/
@Override
public void reAlloc() {
reallocViewBuffer();
reallocViewDataBuffer();
reallocValidityBuffer();
}
/**
* Reallocate the view buffer. View Buffer stores the views for VIEWVARCHAR or VIEWVARBINARY
* elements in the vector. The behavior is to double the size of buffer.
*
* @throws OversizedAllocationException if the desired new size is more than max allowed
* @throws OutOfMemoryException if the internal memory allocation fails
*/
public void reallocViewBuffer() {
long currentViewBufferCapacity = viewBuffer.capacity();
long newAllocationSize = currentViewBufferCapacity * 2;
if (newAllocationSize == 0) {
if (lastValueAllocationSizeInBytes > 0) {
newAllocationSize = lastValueAllocationSizeInBytes;
} else {
newAllocationSize = INITIAL_BYTE_COUNT * 2L;
}
}
reallocViewBuffer(newAllocationSize);
}
/** Reallocate the data buffer associated with view buffer. */
public void reallocViewDataBuffer() {
long currentDataBufferCapacity = 0;
if (!dataBuffers.isEmpty()) {
currentDataBufferCapacity = dataBuffers.get(dataBuffers.size() - 1).capacity();
}
long newAllocationSize = currentDataBufferCapacity * 2;
if (newAllocationSize == 0) {
if (lastValueAllocationSizeInBytes > 0) {
newAllocationSize = lastValueAllocationSizeInBytes;
} else {
newAllocationSize = INITIAL_BYTE_COUNT * 2L;
}
}
reallocViewDataBuffer(newAllocationSize);
}
/**
* Reallocate the view buffer to given size. View Buffer stores the views for VIEWVARCHAR or
* VIEWVARBINARY elements in the vector. The actual allocated size may be larger than the request
* one because it will round up the provided value to the nearest power of two.
*
* @param desiredAllocSize the desired new allocation size
* @throws OversizedAllocationException if the desired new size is more than max allowed
* @throws OutOfMemoryException if the internal memory allocation fails
*/
public void reallocViewBuffer(long desiredAllocSize) {
if (desiredAllocSize == 0) {
return;
}
long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
assert newAllocationSize >= 1;
checkDataBufferSize(newAllocationSize);
// for each set operation, we have to allocate 16 bytes
// here we are adjusting the desired allocation-based allocation size
// to align with the 16bytes requirement.
newAllocationSize = roundUpToMultipleOf16(newAllocationSize);
final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
newBuf.setBytes(0, viewBuffer, 0, viewBuffer.capacity());
viewBuffer.getReferenceManager().release();
viewBuffer = newBuf;
lastValueAllocationSizeInBytes = viewBuffer.capacity();
}
/**
* Reallocate the data buffer for views.
*
* @param desiredAllocSize allocation size in bytes
*/
public void reallocViewDataBuffer(long desiredAllocSize) {
if (desiredAllocSize == 0) {
return;
}
if (dataBuffers.isEmpty()) {
return;
}
ArrowBuf currentBuf = dataBuffers.get(dataBuffers.size() - 1);
if (currentBuf.capacity() - currentBuf.writerIndex() >= desiredAllocSize) {
return;
}
final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize);
assert newAllocationSize >= 1;
checkDataBufferSize(newAllocationSize);
final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
dataBuffers.add(newBuf);
}
/** Reallocate Validity buffer. */
public void reallocValidityBuffer() {
int targetValidityCount = capAtMaxInt((validityBuffer.capacity() * 8) * 2);
if (targetValidityCount == 0) {
if (lastValueCapacity > 0) {
targetValidityCount = lastValueCapacity;
} else {
targetValidityCount = 2 * INITIAL_VALUE_ALLOCATION;
}
}
long validityBufferSize = computeValidityBufferSize(targetValidityCount);
final ArrowBuf newValidityBuffer = allocator.buffer(validityBufferSize);
newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity());
newValidityBuffer.setZero(
validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity());
validityBuffer.getReferenceManager().release();
validityBuffer = newValidityBuffer;
lastValueCapacity = getValueCapacity();
}
private long computeValidityBufferSize(int valueCount) {
return (valueCount + 7) / 8;
}
/**
* Get the size (number of bytes) of underlying view buffer.
*
* @return number of bytes in the view buffer
*/
@Override
public int getByteCapacity() {
return capAtMaxInt(viewBuffer.capacity());
}
@Override
public int sizeOfValueBuffer() {
throw new UnsupportedOperationException(
"sizeOfValueBuffer is not supported for BaseVariableWidthViewVector");
}
/**
* Get the size (number of bytes) of underlying elements in the view buffer.
*
* @return number of bytes used by data in the view buffer
*/
public int sizeOfViewBufferElements() {
if (valueCount == 0) {
return 0;
}
int totalSize = 0;
for (int i = 0; i < valueCount; i++) {
totalSize += getValueLength(i);
}
return totalSize;
}
/**
* Get the size (number of bytes) of underlying buffers used by this vector.
*
* @return size of underlying buffers.
*/
@Override
public int getBufferSize() {
return getBufferSizeFor(this.valueCount);
}
/**
* Get the potential buffer size for a particular number of records.
*
* @param valueCount desired number of elements in the vector
* @return estimated size of underlying buffers if the vector holds a given number of elements
*/
@Override
public int getBufferSizeFor(final int valueCount) {
if (valueCount == 0) {
return 0;
}
final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
final int viewBufferSize = valueCount * ELEMENT_SIZE;
final int dataBufferSize = getDataBufferSize();
return validityBufferSize + viewBufferSize + dataBufferSize;
}
private int getDataBufferSize() {
int dataBufferSize = 0;
for (ArrowBuf buf : dataBuffers) {
dataBufferSize += (int) buf.writerIndex();
}
return dataBufferSize;
}
/**
* Get information about how this field is materialized.
*
* @return the field corresponding to this vector
*/
@Override
public Field getField() {
return field;
}
/**
* Return the underlying buffers associated with this vector. Note that this doesn't impact the
* reference counts for this buffer, so it only should be used for in-context access. Also note
* that this buffer changes regularly, thus external classes shouldn't hold a reference to it
* (unless they change it).
*
* @param clear Whether to clear vector before returning, the buffers will still be refcounted but
* the returned array will be the only reference to them
* @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
*/
@Override
public ArrowBuf[] getBuffers(boolean clear) {
final ArrowBuf[] buffers;
setReaderAndWriterIndex();
if (getBufferSize() == 0) {
buffers = new ArrowBuf[0];
} else {
final int dataBufferSize = dataBuffers.size();
// validity and view buffers
final int fixedBufferSize = 2;
buffers = new ArrowBuf[fixedBufferSize + dataBufferSize];
buffers[0] = validityBuffer;
buffers[1] = viewBuffer;
for (int i = fixedBufferSize; i < fixedBufferSize + dataBufferSize; i++) {
buffers[i] = dataBuffers.get(i - fixedBufferSize);
}
}
if (clear) {
for (final ArrowBuf buffer : buffers) {
buffer.getReferenceManager().retain();
}
clear();
}
return buffers;
}
/** Validate the scalar values held by this vector. */
public void validateScalars() {
// No validation by default.
}
/**
* Construct a transfer pair of this vector and another vector of the same type.
*
* @param field The field materialized by this vector.
* @param allocator allocator for the target vector
* @param callBack not used
* @return TransferPair
*/
@Override
public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) {
return getTransferPair(field, allocator);
}
/**
* Construct a transfer pair of this vector and another vector of the same type.
*
* @param ref name of the target vector
* @param allocator allocator for the target vector
* @param callBack not used
* @return TransferPair
*/
@Override
public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
return getTransferPair(ref, allocator);
}
/**
* Construct a transfer pair of this vector and another vector of the same type.
*
* @param allocator allocator for the target vector
* @return TransferPair
*/
@Override
public TransferPair getTransferPair(BufferAllocator allocator) {
return getTransferPair(getName(), allocator);
}
/**
* Construct a transfer pair of this vector and another vector of the same type.
*
* @param ref name of the target vector
* @param allocator allocator for the target vector
* @return TransferPair
*/
@Override
public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
/**
* Construct a transfer pair of this vector and another vector of the same type.
*
* @param field The field materialized by this vector.
* @param allocator allocator for the target vector
* @return TransferPair
*/
@Override
public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator);
/**
* Transfer this vector's data to another vector. The memory associated with this vector is
* transferred to the allocator of target vector for accounting and management purposes.
*
* @param target destination vector for transfer
*/
public void transferTo(BaseVariableWidthViewVector target) {
compareTypes(target, "transferTo");
target.clear();
target.validityBuffer = transferBuffer(validityBuffer, target.allocator);
target.viewBuffer = transferBuffer(viewBuffer, target.allocator);
target.dataBuffers = new ArrayList<>(dataBuffers.size());
for (int i = 0; i < dataBuffers.size(); i++) {
target.dataBuffers.add(transferBuffer(dataBuffers.get(i), target.allocator));
}
target.setLastSet(this.lastSet);
if (this.valueCount > 0) {
target.setValueCount(this.valueCount);
}
clear();
}
/**
* Slice this vector at desired index and length and transfer the corresponding data to the target
* vector.
*
* @param startIndex start position of the split in source vector.
* @param length length of the split.
* @param target destination vector
*/
public void splitAndTransferTo(int startIndex, int length, BaseVariableWidthViewVector target) {
Preconditions.checkArgument(
startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
"Invalid parameters startIndex: %s, length: %s for valueCount: %s",
startIndex,
length,
valueCount);
compareTypes(target, "splitAndTransferTo");
target.clear();
if (length > 0) {
splitAndTransferValidityBuffer(startIndex, length, target);
splitAndTransferViewBufferAndDataBuffer(startIndex, length, target);
target.setLastSet(length - 1);
target.setValueCount(length);
}
}
/* allocate validity buffer */
private void allocateValidityBuffer(final long size) {
final int curSize = (int) size;
validityBuffer = allocator.buffer(curSize);
validityBuffer.readerIndex(0);
initValidityBuffer();
}
/*
* Transfer the validity.
*/
private void splitAndTransferValidityBuffer(
int startIndex, int length, BaseVariableWidthViewVector target) {
if (length <= 0) {
return;
}
final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
final int byteSizeTarget = getValidityBufferSizeFromCount(length);
final int offset = startIndex % 8;
if (offset == 0) {
// slice
if (target.validityBuffer != null) {
target.validityBuffer.getReferenceManager().release();
}
final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator);
return;
}
/* Copy data
* When the first bit starts from the middle of a byte (offset != 0),
* copy data from src BitVector.
* Each byte in the target is composed by a part in i-th byte,
* another part in (i+1)-th byte.
*/
target.allocateValidityBuffer(byteSizeTarget);
for (int i = 0; i < byteSizeTarget - 1; i++) {
byte b1 =
BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset);
byte b2 =
BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset);
target.validityBuffer.setByte(i, (b1 + b2));
}
/* Copying the last piece is done in the following manner:
* if the source vector has 1 or more bytes remaining, we copy
* the last piece as a byte formed by shifting data
* from the current byte and the next byte.
*
* if the source vector has no more bytes remaining
* (we are at the last byte), we copy the last piece as a byte
* by shifting data from the current byte.
*/
if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
byte b1 =
BitVectorHelper.getBitsFromCurrentByte(
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
byte b2 =
BitVectorHelper.getBitsFromNextByte(
this.validityBuffer, firstByteSource + byteSizeTarget, offset);
target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
} else {
byte b1 =
BitVectorHelper.getBitsFromCurrentByte(
this.validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
target.validityBuffer.setByte(byteSizeTarget - 1, b1);
}
}
/**
* In split and transfer, the view buffer and the data buffer will be allocated. Then the values
* will be copied from the source vector to the target vector. Allocation and setting are
* preferred over transfer since the buf index and buf offset needs to be overwritten when large
* strings are added.
*
* @param startIndex starting index
* @param length number of elements to be copied
* @param target target vector
*/
private void splitAndTransferViewBufferAndDataBuffer(
int startIndex, int length, BaseVariableWidthViewVector target) {
if (length == 0) {
return;
}
if (target.viewBuffer != null) {
target.viewBuffer.getReferenceManager().release();
}
// allocate target view buffer
target.viewBuffer = target.allocator.buffer(length * ELEMENT_SIZE);
for (int i = startIndex; i < startIndex + length; i++) {
final int stringLength = getValueLength(i);
// keeping track of writing index in the target view buffer
int writePosition = (i - startIndex) * ELEMENT_SIZE;
// keeping track of reading index in the source view buffer
int readPosition = i * ELEMENT_SIZE;
// set length
target.viewBuffer.setInt(writePosition, stringLength);
if (stringLength <= INLINE_SIZE) {
// handle inline buffer
writePosition += LENGTH_WIDTH;
readPosition += LENGTH_WIDTH;
// set data by copying the required portion from the source buffer
target.viewBuffer.setBytes(writePosition, viewBuffer, readPosition, stringLength);
} else {
// handle non-inline buffer
final int readBufIndex =
viewBuffer.getInt(((long) i * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
final int readBufOffset =
viewBuffer.getInt(
((long) i * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH);
final ArrowBuf dataBuf = dataBuffers.get(readBufIndex);
// allocate data buffer
ArrowBuf currentDataBuf = target.allocateOrGetLastDataBuffer(stringLength);
final long currentOffSet = currentDataBuf.writerIndex();
writePosition += LENGTH_WIDTH;
readPosition += LENGTH_WIDTH;
// set prefix
target.viewBuffer.setBytes(writePosition, viewBuffer, readPosition, PREFIX_WIDTH);
writePosition += PREFIX_WIDTH;
// set buf id
target.viewBuffer.setInt(writePosition, target.dataBuffers.size() - 1);
writePosition += BUF_INDEX_WIDTH;
// set offset
target.viewBuffer.setInt(writePosition, (int) currentOffSet);
currentDataBuf.setBytes(currentOffSet, dataBuf, readBufOffset, stringLength);
currentDataBuf.writerIndex(currentOffSet + stringLength);
}
}
}
/*----------------------------------------------------------------*
| |
| common getters and setters |
| |
*----------------------------------------------------------------*/
/**
* Get the number of elements that are null in the vector.
*
* @return the number of null elements.
*/
@Override
public int getNullCount() {
return BitVectorHelper.getNullCount(validityBuffer, valueCount);
}
/**
* Check if the given index is within the current value capacity of the vector.
*
* @param index position to check
* @return true if the index is within the current value capacity
*/
public boolean isSafe(int index) {
return index < getValueCapacity();
}
/**
* Check if an element at given index is null.
*
* @param index position of an element
* @return true if an element at given index is null
*/
@Override
public boolean isNull(int index) {
return (isSet(index) == 0);
}
/**
* Same as {@link #isNull(int)}.
*
* @param index position of an element
* @return 1 if element at given index is not null, 0 otherwise
*/
public int isSet(int index) {
final int byteIndex = index >> 3;
final byte b = validityBuffer.getByte(byteIndex);
final int bitIndex = index & 7;
return (b >> bitIndex) & 0x01;
}
/**
* Get the value count of vector. This will always be zero unless setValueCount(int) has been
* called prior to calling this.
*
* @return valueCount for the vector
*/
@Override
public int getValueCount() {
return valueCount;
}
/**
* Sets the value count for the vector.
*
* @param valueCount value count
*/
@Override
public void setValueCount(int valueCount) {
assert valueCount >= 0;
this.valueCount = valueCount;
while (valueCount > getValueCapacity()) {
reallocViewBuffer();
reallocValidityBuffer();
}
lastSet = valueCount - 1;
setReaderAndWriterIndex();
}
/**
* Create holes in the vector upto the given index (exclusive). Holes will be created from the
* current last-set position in the vector.
*
* @param index target index
*/
@Override
public void fillEmpties(int index) {
handleSafe(index, EMPTY_BYTE_ARRAY.length);
lastSet = index - 1;
}
/**
* Set the index of the last non-null element in the vector. It is important to call this method
* with appropriate value before calling {@link #setValueCount(int)}.
*
* @param value desired index of last non-null element.
*/
@Override
public void setLastSet(int value) {
lastSet = value;
}
/**
* Get the index of the last non-null element in the vector.
*
* @return index of the last non-null element
*/
@Override
public int getLastSet() {
return lastSet;
}
/**
* Mark the particular position in the vector as non-null.
*
* @param index position of the element.
*/
@Override
public void setIndexDefined(int index) {
// We need to check and reallocate the validity buffer
while (index >= getValueCapacity()) {
reallocValidityBuffer();
}
BitVectorHelper.setBit(validityBuffer, index);
}
/**
* Sets the value length for an element.
*
* @param index position of the element to set
* @param length length of the element
*/
@Override
public void setValueLengthSafe(int index, int length) {
assert index >= 0;
handleSafe(index, length);
lastSet = index;
}
/**
* Get the length of the element at specified index.
*
* @param index position of an element to get
* @return greater than length 0 for a non-null element, 0 otherwise
*/
@Override
public int getValueLength(int index) {
assert index >= 0;
if (index < 0 || index >= viewBuffer.capacity() / ELEMENT_SIZE) {
throw new IndexOutOfBoundsException("Index out of bounds: " + index);
}
if (isSet(index) == 0) {
return 0;
}
return viewBuffer.getInt(((long) index * ELEMENT_SIZE));
}
/**
* Set the variable length element at the specified index to the supplied byte array. This is same
* as using {@link #set(int, byte[], int, int)} with start as Zero and length as #value.length
*
* @param index position of the element to set
* @param value array of bytes to write
*/
public void set(int index, byte[] value) {
assert index >= 0;
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, value, 0, value.length);
lastSet = index;
}
/**
* Same as {@link #set(int, byte[])} except that it handles the case where index and length of a
* new element are beyond the existing capacity of the vector.
*
* @param index position of the element to set
* @param value array of bytes to write
*/
@Override
public void setSafe(int index, byte[] value) {
assert index >= 0;
// check if the current index can be populated
handleSafe(index, value.length);
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, value, 0, value.length);
lastSet = index;
}
/**
* Set the variable length element at the specified index to the supplied byte array.
*
* @param index position of the element to set
* @param value array of bytes to write
* @param start start index in an array of bytes
* @param length length of data in an array of bytes
*/
public void set(int index, byte[] value, int start, int length) {
assert index >= 0;
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, value, start, length);
lastSet = index;
}
/**
* Same as {@link #set(int, byte[], int, int)} except that it handles the case where index and
* length of a new element are beyond the existing capacity of the vector.
*
* @param index position of the element to set
* @param value array of bytes to write
* @param start start index in an array of bytes
* @param length length of data in an array of bytes
*/
public void setSafe(int index, byte[] value, int start, int length) {
assert index >= 0;
handleSafe(index, length);
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, value, start, length);
lastSet = index;
}
/**
* Set the variable length element at the specified index to the content in supplied ByteBuffer.
*
* @param index position of the element to set
* @param value ByteBuffer with data
* @param start start index in ByteBuffer
* @param length length of data in ByteBuffer
*/
public void set(int index, ByteBuffer value, int start, int length) {
assert index >= 0;
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, value.array(), start, length);
lastSet = index;
}
/**
* Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the case where index and
* length of a new element are beyond the existing capacity of the vector.
*
* @param index position of the element to set
* @param value ByteBuffer with data
* @param start start index in ByteBuffer
* @param length length of data in ByteBuffer
*/
public void setSafe(int index, ByteBuffer value, int start, int length) {
assert index >= 0;
handleSafe(index, length);
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, value.array(), start, length);
lastSet = index;
}
/**
* Set the element at the given index to null.
*
* @param index position of an element
*/
@Override
public void setNull(int index) {
// We need to check and reallocate the validity buffer
while (index >= getValueCapacity()) {
reallocValidityBuffer();
}
BitVectorHelper.unsetBit(validityBuffer, index);
}
/**
* Store the given value at a particular position in the vector. isSet indicates whether the value
* is NULL or not.
*
* @param index position of the new value
* @param isSet Zero for NULL value, 1 otherwise
* @param start start position of data in buffer
* @param end end position of data in buffer
* @param buffer data buffer containing the variable width element to be stored in the vector
*/
public void set(int index, int isSet, int start, int end, ArrowBuf buffer) {
assert index >= 0;
final int dataLength = end - start;
BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
setBytes(index, buffer, start, dataLength);
lastSet = index;
}
/**
* Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case when index
* is greater than or equal to current value capacity of the vector.
*
* @param index position of the new value
* @param isSet Zero for NULL value, 1 otherwise
* @param start start position of data in buffer
* @param end end position of data in buffer
* @param buffer data buffer containing the variable width element to be stored in the vector
*/
public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) {
assert index >= 0;
final int dataLength = end - start;
handleSafe(index, dataLength);
BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
setBytes(index, buffer, start, dataLength);
lastSet = index;
}
/**
* Store the given value at a particular position in the vector. isSet indicates whether the value
* is NULL or not.
*
* @param index position of the new value
* @param start start position of data in buffer
* @param length length of data in buffer
* @param buffer data buffer containing the variable width element to be stored in the vector
*/
public void set(int index, int start, int length, ArrowBuf buffer) {
assert index >= 0;
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, buffer, start, length);
lastSet = index;
}
/**
* Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case when index
* is greater than or equal to current value capacity of the vector.
*
* @param index position of the new value
* @param start start position of data in buffer
* @param length length of data in buffer
* @param buffer data buffer containing the variable width element to be stored in the vector
*/
public void setSafe(int index, int start, int length, ArrowBuf buffer) {
assert index >= 0;
handleSafe(index, length);
BitVectorHelper.setBit(validityBuffer, index);
setBytes(index, buffer, start, length);
lastSet = index;
}
/*----------------------------------------------------------------*
| |
| helper methods for setters |
| |
*----------------------------------------------------------------*/
protected ArrowBuf allocateOrGetLastDataBuffer(int length) {
long dataBufferSize;
if (initialDataBufferSize > 0) {
dataBufferSize = Math.max(initialDataBufferSize, length);
} else {
dataBufferSize = Math.max(lastValueAllocationSizeInBytes, length);
}
if (dataBuffers.isEmpty()
|| dataBuffers.get(dataBuffers.size() - 1).capacity()
- dataBuffers.get(dataBuffers.size() - 1).writerIndex()
< length) {
ArrowBuf newBuf = allocator.buffer(dataBufferSize);
dataBuffers.add(newBuf);
}
return dataBuffers.get(dataBuffers.size() - 1);
}
/**
* This method is used to create a view buffer for a variable width vector. It handles both inline
* and data buffers.
*
* <p>If the length of the value is less than or equal to {@link #INLINE_SIZE}, the value is
* stored in the valueBuffer directly as an inline buffer. The valueBuffer stores the length of
* the value followed by the value itself. If the length of the value is greater than {@link
* #INLINE_SIZE}, a new buffer is allocated and added to dataBuffers to hold the value. The
* viewBuffer in this case stores the length of the value, a prefix of the value, the index of the
* new buffer in dataBuffers, and the offset of the value in the new buffer.
*
* @param index The index at which the new value will be inserted.
* @param value The byte array that contains the data to be inserted.
* @param start The start index in the byte array from where the data for the new value begins.
* @param length The length of the data in the byte array that belongs to the new value.
*/
protected final void setBytes(int index, byte[] value, int start, int length) {
int writePosition = index * ELEMENT_SIZE;
// to clear the memory segment of view being written to
// this is helpful in case of overwriting the value
viewBuffer.setZero(writePosition, ELEMENT_SIZE);
if (length <= INLINE_SIZE) {
// allocate inline buffer
// set length
viewBuffer.setInt(writePosition, length);
writePosition += LENGTH_WIDTH;
// set data
viewBuffer.setBytes(writePosition, value, start, length);
} else {
// allocate data buffer
ArrowBuf currentBuf = allocateOrGetLastDataBuffer(length);
// set length
viewBuffer.setInt(writePosition, length);
writePosition += LENGTH_WIDTH;
// set prefix
viewBuffer.setBytes(writePosition, value, start, PREFIX_WIDTH);
writePosition += PREFIX_WIDTH;
// set buf id
viewBuffer.setInt(writePosition, dataBuffers.size() - 1);
writePosition += BUF_INDEX_WIDTH;
// set offset
viewBuffer.setInt(writePosition, (int) currentBuf.writerIndex());
currentBuf.setBytes(currentBuf.writerIndex(), value, start, length);
currentBuf.writerIndex(currentBuf.writerIndex() + length);
}
}
/**
* This method is used to create a view buffer for a variable width vector. Similar to {@link
* #setBytes(int index, byte[] value, int start, int length)}
*
* @param index The index at which the new value will be inserted.
* @param valueBuf The byte array that contains the data to be inserted.
* @param start The start index in the byte array from where the data for the new value begins.
* @param length The length of the data in the byte array that belongs to the new value.
*/
protected final void setBytes(int index, ArrowBuf valueBuf, int start, int length) {
int writePosition = index * ELEMENT_SIZE;
// to clear the memory segment of view being written to
// this is helpful in case of overwriting the value
viewBuffer.setZero(writePosition, ELEMENT_SIZE);
if (length <= INLINE_SIZE) {
// allocate inline buffer
// set length
viewBuffer.setInt(writePosition, length);
writePosition += LENGTH_WIDTH;
// set data
viewBuffer.setBytes(writePosition, valueBuf, start, length);
} else {
// allocate data buffer
ArrowBuf currentBuf = allocateOrGetLastDataBuffer(length);
// set length
viewBuffer.setInt(writePosition, length);
writePosition += LENGTH_WIDTH;
// set prefix
viewBuffer.setBytes(writePosition, valueBuf, start, PREFIX_WIDTH);
writePosition += PREFIX_WIDTH;
// set buf id
viewBuffer.setInt(writePosition, dataBuffers.size() - 1);
writePosition += BUF_INDEX_WIDTH;
// set offset
viewBuffer.setInt(writePosition, (int) currentBuf.writerIndex());
currentBuf.setBytes(currentBuf.writerIndex(), valueBuf, start, length);
currentBuf.writerIndex(currentBuf.writerIndex() + length);
}
}
/**
* Get the total length of the elements up to the given index.
*
* @param index The index of the element in the vector.
* @return The total length up to the element at the given index.
*/
public final int getTotalValueLengthUpToIndex(int index) {
int totalLength = 0;
for (int i = 0; i < index - 1; i++) {
totalLength += getValueLength(i);
}
return totalLength;
}
protected final void handleSafe(int index, int dataLength) {
final long lastSetCapacity = lastSet < 0 ? 0 : (long) index * ELEMENT_SIZE;
final long targetCapacity = roundUpToMultipleOf16(lastSetCapacity + dataLength);
// for views, we need each buffer with 16 byte alignment, so we need to check the last written
// index
// in the viewBuffer and allocate a new buffer which has 16 byte alignment for adding new
// values.
long writePosition = (long) index * ELEMENT_SIZE;
if (viewBuffer.capacity() <= writePosition || viewBuffer.capacity() < targetCapacity) {
/*
* Everytime we want to increase the capacity of the viewBuffer, we need to make sure that the new capacity
* meets 16 byte alignment.
* If the targetCapacity is larger than the writePosition, we may not necessarily
* want to allocate the targetCapacity to viewBuffer since when it is >={@link #INLINE_SIZE} either way
* we are writing to the dataBuffer.
*/
reallocViewBuffer(Math.max(writePosition, targetCapacity));
}
while (index >= getValueCapacity()) {
reallocValidityBuffer();
}
}
/**
* Copy a cell value from a particular index in source vector to a particular position in this
* vector.
*
* @param fromIndex position to copy from in source vector
* @param thisIndex position to copy to in this vector
* @param from source vector
*/
@Override
public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
Preconditions.checkArgument(getMinorType() == from.getMinorType());
if (from.isNull(fromIndex)) {
BitVectorHelper.unsetBit(validityBuffer, thisIndex);
} else {
final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE);
BitVectorHelper.setBit(validityBuffer, thisIndex);
final int start = thisIndex * ELEMENT_SIZE;
final int copyStart = fromIndex * ELEMENT_SIZE;
from.getDataBuffer().getBytes(start, viewBuffer, copyStart, ELEMENT_SIZE);
if (viewLength > INLINE_SIZE) {
final int bufIndex =
from.getDataBuffer()
.getInt(((long) fromIndex * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
final int dataOffset =
from.getDataBuffer()
.getInt(
((long) fromIndex * ELEMENT_SIZE)
+ LENGTH_WIDTH
+ PREFIX_WIDTH
+ BUF_INDEX_WIDTH);
final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex);
final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength);
thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength);
thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength);
}
}
lastSet = thisIndex;
}
/**
* Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the
* capacity of the vector needs to be expanded before copy.
*
* @param fromIndex position to copy from in source vector
* @param thisIndex position to copy to in this vector
* @param from source vector
*/
@Override
public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
Preconditions.checkArgument(getMinorType() == from.getMinorType());
if (from.isNull(fromIndex)) {
handleSafe(thisIndex, 0);
BitVectorHelper.unsetBit(validityBuffer, thisIndex);
} else {
final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE);
handleSafe(thisIndex, viewLength);
BitVectorHelper.setBit(validityBuffer, thisIndex);
final int start = thisIndex * ELEMENT_SIZE;
final int copyStart = fromIndex * ELEMENT_SIZE;
from.getDataBuffer().getBytes(start, viewBuffer, copyStart, ELEMENT_SIZE);
if (viewLength > INLINE_SIZE) {
final int bufIndex =
from.getDataBuffer()
.getInt(((long) fromIndex * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
final int dataOffset =
from.getDataBuffer()
.getInt(
((long) fromIndex * ELEMENT_SIZE)
+ LENGTH_WIDTH
+ PREFIX_WIDTH
+ BUF_INDEX_WIDTH);
final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex);
final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength);
thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength);
thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength);
}
}
lastSet = thisIndex;
}
@Override
public ArrowBufPointer getDataPointer(int index) {
return getDataPointer(index, new ArrowBufPointer());
}
@Override
public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
if (isNull(index)) {
reuse.set(null, 0, 0);
} else {
int length = getValueLength(index);
if (length < INLINE_SIZE) {
int start = index * ELEMENT_SIZE + LENGTH_WIDTH;
reuse.set(viewBuffer, start, length);
} else {
final int bufIndex =
viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
ArrowBuf dataBuf = dataBuffers.get(bufIndex);
reuse.set(dataBuf, 0, length);
}
}
return reuse;
}
@Override
public int hashCode(int index) {
return hashCode(index, null);
}
@Override
public int hashCode(int index, ArrowBufHasher hasher) {
if (isNull(index)) {
return ArrowBufPointer.NULL_HASH_CODE;
}
final int length = getValueLength(index);
if (length < INLINE_SIZE) {
int start = index * ELEMENT_SIZE + LENGTH_WIDTH;
return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, start + length);
} else {
final int bufIndex =
viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
final int dataOffset =
viewBuffer.getInt(
((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH);
ArrowBuf dataBuf = dataBuffers.get(bufIndex);
return ByteFunctionHelpers.hash(hasher, dataBuf, dataOffset, dataOffset + length);
}
}
/**
* Retrieves the data of a variable-width element at a given index in the vector.
*
* <p>If the length of the data is greater than {@link #INLINE_SIZE}, the data is stored in an
* inline buffer. The method retrieves the buffer index and data offset from the viewBuffer, and
* then retrieves the data from the corresponding buffer in the dataBuffers list.
*
* <p>If the length of the data is less than or equal to {@link #INLINE_SIZE}, the data is stored
* directly in the viewBuffer. The method retrieves the data directly from the viewBuffer.
*
* @param index position of the element in the vector
* @return byte array containing the data of the element
*/
protected byte[] getData(int index) {
final int dataLength = getValueLength(index);
byte[] result = new byte[dataLength];
if (dataLength > INLINE_SIZE) {
// data is in the data buffer
// get buffer index
final int bufferIndex =
viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
// get data offset
final int dataOffset =
viewBuffer.getInt(
((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH);
dataBuffers.get(bufferIndex).getBytes(dataOffset, result, 0, dataLength);
} else {
// data is in the view buffer
viewBuffer.getBytes((long) index * ELEMENT_SIZE + BUF_INDEX_WIDTH, result, 0, dataLength);
}
return result;
}
protected void getData(int index, ReusableBuffer<?> buffer) {
final int dataLength = getValueLength(index);
if (dataLength > INLINE_SIZE) {
// data is in the data buffer
// get buffer index
final int bufferIndex =
viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH);
// get data offset
final int dataOffset =
viewBuffer.getInt(
((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH);
ArrowBuf dataBuf = dataBuffers.get(bufferIndex);
buffer.set(dataBuf, dataOffset, dataLength);
} else {
// data is in the value buffer
buffer.set(viewBuffer, ((long) index * ELEMENT_SIZE) + BUF_INDEX_WIDTH, dataLength);
}
}
@Override
public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
return visitor.visit(this, value);
}
/**
* Retrieves the export buffer count for the C Data Interface.
*
* <p>For Variadic types, an additional buffer is kept to store the size of each variadic buffer
* since that information cannot be retrieved in the C Data import.
*
* <p>In the C Data Interface, the binary view import expects at least three buffers. The variadic
* size buffer is merely allocated to determine the number of elements per each variadic buffer,
* and it is not part of the imported data.
*
* <p>The count is set to 3 + dataBuffers.size(). Three is formed by validity, view, and variadic
* size buffer.
*
* @return the number of buffers to be exported
*/
@Override
public int getExportedCDataBufferCount() {
return 3 + dataBuffers.size();
}
/**
* Get the data buffer of the vector. Note that an additional buffer is appended to store the size
* of each variadic buffer's size.
*
* @param buffers list of buffers to be exported
* @param buffersPtr buffer to store the pointers to the exported buffers
* @param nullValue null value
*/
@Override
public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr, long nullValue) {
exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true);
exportBuffer(viewBuffer, buffers, buffersPtr, nullValue, true);
// allocating additional space to keep the number of variadic buffers
ArrowBuf variadicSizeBuffer = allocator.buffer((long) Long.BYTES * dataBuffers.size());
// variadicSizeBuffer.setZero(0, variadicSizeBuffer.capacity());
// export data buffers
for (int i = 0; i < dataBuffers.size(); i++) {
ArrowBuf dataBuf = dataBuffers.get(i);
// calculate sizes for variadic size buffer
variadicSizeBuffer.setLong((long) i * Long.BYTES, dataBuf.capacity());
exportBuffer(dataBuf, buffers, buffersPtr, nullValue, true);
}
// export variadic size buffer
exportBuffer(variadicSizeBuffer, buffers, buffersPtr, nullValue, false);
}
}