ValidateVectorBufferVisitor.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.arrow.vector.validate;

import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;

import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.BaseLargeVariableWidthVector;
import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.BaseVariableWidthViewVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.ExtensionTypeVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.NullVector;
import org.apache.arrow.vector.TypeLayout;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.compare.VectorVisitor;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.types.pojo.ArrowType;

/** Visitor to validate vector buffers. */
public class ValidateVectorBufferVisitor implements VectorVisitor<Void, Void> {

  private void validateVectorCommon(ValueVector vector) {
    ArrowType arrowType = vector.getField().getType();
    validateOrThrow(
        vector.getValueCount() >= 0,
        "Vector valueCount %s is negative.",
        vector.getValueCapacity());

    if (vector instanceof FieldVector) {
      FieldVector fieldVector = (FieldVector) vector;
      // TODO: https://github.com/apache/arrow/issues/41734
      int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType);
      validateOrThrow(
          fieldVector.getFieldBuffers().size() == typeBufferCount,
          "Expected %s buffers in vector of type %s, got %s.",
          typeBufferCount,
          vector.getField().getType().toString(),
          fieldVector.getFieldBuffers().size());
    }
  }

  private void validateValidityBuffer(ValueVector vector, int valueCount) {
    ArrowBuf validityBuffer = vector.getValidityBuffer();
    validateOrThrow(validityBuffer != null, "The validity buffer is null.");
    validateOrThrow(
        validityBuffer.capacity() * 8 >= valueCount,
        "Not enough capacity for the validity buffer. Minimum capacity %s, actual capacity %s.",
        (valueCount + 7) / 8,
        validityBuffer.capacity());
  }

  private void validateOffsetBuffer(ValueVector vector, long minCapacity) {
    ArrowBuf offsetBuffer = vector.getOffsetBuffer();
    validateOrThrow(offsetBuffer != null, "The offset buffer is null.");
    validateOrThrow(
        offsetBuffer.capacity() >= minCapacity,
        "Not enough capacity for the offset buffer. Minimum capacity %s, actual capacity %s.",
        minCapacity,
        offsetBuffer.capacity());
  }

  private void validateFixedWidthDataBuffer(ValueVector vector, int valueCount, int bitWidth) {
    ArrowBuf dataBuffer = vector.getDataBuffer();
    validateOrThrow(dataBuffer != null, "The fixed width data buffer is null.");
    validateOrThrow(
        (long) bitWidth * valueCount <= dataBuffer.capacity() * 8L,
        "Not enough capacity for fixed width data buffer. Minimum capacity %s, actual capacity %s.",
        ((long) bitWidth * valueCount + 7L) / 8L,
        dataBuffer.capacity());
  }

  private void validateDataBuffer(ValueVector vector, long minCapacity) {
    ArrowBuf dataBuffer = vector.getDataBuffer();
    validateOrThrow(dataBuffer != null, "The data buffer is null.");
    validateOrThrow(
        dataBuffer.capacity() >= minCapacity,
        "Not enough capacity for data buffer. Minimum capacity %s, actual capacity %s.",
        minCapacity,
        dataBuffer.capacity());
  }

  private void validateTypeBuffer(ArrowBuf typeBuf, long minCapacity) {
    validateOrThrow(typeBuf != null, "The type buffer is null.");
    validateOrThrow(
        typeBuf.capacity() >= minCapacity,
        "Not enough capacity for type buffer. Minimum capacity %s, actual capacity %s.",
        minCapacity,
        typeBuf.capacity());
  }

  @Override
  public Void visit(BaseFixedWidthVector vector, Void value) {
    int bitWidth = (vector instanceof BitVector) ? 1 : vector.getTypeWidth() * 8;
    int valueCount = vector.getValueCount();
    validateVectorCommon(vector);
    validateValidityBuffer(vector, valueCount);
    validateFixedWidthDataBuffer(vector, valueCount, bitWidth);
    return null;
  }

  @Override
  public Void visit(BaseVariableWidthVector vector, Void value) {
    int valueCount = vector.getValueCount();
    validateVectorCommon(vector);
    validateValidityBuffer(vector, valueCount);
    long minOffsetCapacity =
        valueCount == 0 ? 0L : (long) (valueCount + 1) * BaseVariableWidthVector.OFFSET_WIDTH;
    validateOffsetBuffer(vector, minOffsetCapacity);
    int lastOffset =
        valueCount == 0
            ? 0
            : vector.getOffsetBuffer().getInt(valueCount * BaseVariableWidthVector.OFFSET_WIDTH);
    validateDataBuffer(vector, lastOffset);
    return null;
  }

  @Override
  public Void visit(BaseLargeVariableWidthVector vector, Void value) {
    int valueCount = vector.getValueCount();
    validateVectorCommon(vector);
    validateValidityBuffer(vector, valueCount);
    long minOffsetCapacity =
        valueCount == 0 ? 0L : (long) (valueCount + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH;
    validateOffsetBuffer(vector, minOffsetCapacity);
    long lastOffset =
        valueCount == 0
            ? 0L
            : vector
                .getOffsetBuffer()
                .getLong((long) valueCount * BaseLargeVariableWidthVector.OFFSET_WIDTH);
    validateDataBuffer(vector, lastOffset);
    return null;
  }

  @Override
  public Void visit(BaseVariableWidthViewVector vector, Void value) {
    throw new UnsupportedOperationException("View vectors are not supported.");
  }

  @Override
  public Void visit(ListVector vector, Void value) {
    int valueCount = vector.getValueCount();
    validateVectorCommon(vector);
    validateValidityBuffer(vector, valueCount);
    long minOffsetCapacity =
        valueCount == 0 ? 0L : (long) (valueCount + 1) * ListVector.OFFSET_WIDTH;
    validateOffsetBuffer(vector, minOffsetCapacity);

    FieldVector dataVector = vector.getDataVector();
    int lastOffset =
        valueCount == 0
            ? 0
            : vector.getOffsetBuffer().getInt(valueCount * BaseVariableWidthVector.OFFSET_WIDTH);
    int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
    validateOrThrow(
        dataVectorLength >= lastOffset,
        "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s",
        lastOffset + 1,
        dataVectorLength);

    if (dataVector != null) {
      dataVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(FixedSizeListVector vector, Void value) {
    int valueCount = vector.getValueCount();
    validateVectorCommon(vector);
    validateValidityBuffer(vector, valueCount);
    FieldVector dataVector = vector.getDataVector();
    int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
    validateOrThrow(
        dataVectorLength >= valueCount * vector.getListSize(),
        "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s.",
        valueCount * vector.getListSize(),
        dataVectorLength);
    if (dataVector != null) {
      dataVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(LargeListVector vector, Void value) {
    int valueCount = vector.getValueCount();
    validateVectorCommon(vector);
    validateValidityBuffer(vector, valueCount);
    long minOffsetCapacity =
        valueCount == 0 ? 0L : (long) (valueCount + 1) * LargeListVector.OFFSET_WIDTH;
    validateOffsetBuffer(vector, minOffsetCapacity);

    FieldVector dataVector = vector.getDataVector();
    long lastOffset =
        valueCount == 0
            ? 0
            : vector
                .getOffsetBuffer()
                .getLong(valueCount * BaseLargeVariableWidthVector.OFFSET_WIDTH);
    int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
    validateOrThrow(
        dataVectorLength >= lastOffset,
        "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s",
        lastOffset + 1,
        dataVectorLength);

    if (dataVector != null) {
      dataVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(NonNullableStructVector vector, Void value) {
    int valueCount = vector.getValueCount();
    validateVectorCommon(vector);
    validateValidityBuffer(vector, valueCount);
    for (ValueVector subVector : vector.getChildrenFromFields()) {
      validateOrThrow(
          valueCount == subVector.getValueCount(),
          "Struct vector length not equal to child vector length. Struct vector length %s, child vector length %s",
          valueCount,
          subVector.getValueCount());
      subVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(UnionVector vector, Void value) {
    int valueCount = vector.getValueCount();
    validateVectorCommon(vector);
    validateTypeBuffer(vector.getTypeBuffer(), valueCount * UnionVector.TYPE_WIDTH);
    for (ValueVector subVector : vector.getChildrenFromFields()) {
      validateOrThrow(
          valueCount == subVector.getValueCount(),
          "Union vector length not equal to child vector length. Union vector length %s, child vector length %s",
          valueCount,
          subVector.getValueCount());
      subVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(DenseUnionVector vector, Void value) {
    int valueCount = vector.getValueCount();
    validateVectorCommon(vector);
    validateOffsetBuffer(vector, (long) valueCount * DenseUnionVector.OFFSET_WIDTH);
    validateTypeBuffer(vector.getTypeBuffer(), valueCount * DenseUnionVector.TYPE_WIDTH);
    for (ValueVector subVector : vector.getChildrenFromFields()) {
      subVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(NullVector vector, Void value) {
    return null;
  }

  @Override
  public Void visit(ExtensionTypeVector<?> vector, Void value) {
    vector.getUnderlyingVector().accept(this, value);
    return null;
  }
}