ValidateVectorDataVisitor.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.arrow.vector.validate;

import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;

import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.BaseLargeVariableWidthVector;
import org.apache.arrow.vector.BaseVariableWidthVector;
import org.apache.arrow.vector.BaseVariableWidthViewVector;
import org.apache.arrow.vector.ExtensionTypeVector;
import org.apache.arrow.vector.NullVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.compare.VectorVisitor;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.UnionVector;

/** Utility for validating vector data. */
public class ValidateVectorDataVisitor implements VectorVisitor<Void, Void> {

  private void validateOffsetBuffer(ValueVector vector, int valueCount) {
    if (valueCount == 0) {
      return;
    }
    ArrowBuf offsetBuffer = vector.getOffsetBuffer();

    // verify that the values in the offset buffer is non-decreasing
    int prevValue = offsetBuffer.getInt(0);
    for (int i = 1; i <= valueCount; i++) {
      int curValue = offsetBuffer.getInt(i * 4);
      validateOrThrow(
          curValue >= 0,
          "The value at position %s of the offset buffer is negative: %s.",
          i,
          curValue);
      validateOrThrow(
          curValue >= prevValue,
          "The values in positions %s and %s of the offset buffer are decreasing: %s, %s.",
          i - 1,
          i,
          prevValue,
          curValue);
      prevValue = curValue;
    }
  }

  private void validateLargeOffsetBuffer(ValueVector vector, int valueCount) {
    if (valueCount == 0) {
      return;
    }
    ArrowBuf offsetBuffer = vector.getOffsetBuffer();

    // verify that the values in the large offset buffer is non-decreasing
    long prevValue = offsetBuffer.getLong(0);
    for (int i = 1; i <= valueCount; i++) {
      long curValue = offsetBuffer.getLong((long) i * 8);
      validateOrThrow(
          curValue >= 0L,
          "The value at position %s of the large offset buffer is negative: %s.",
          i,
          curValue);
      validateOrThrow(
          curValue >= prevValue,
          "The values in positions %s and %s of the large offset buffer are decreasing: %s, %s.",
          i - 1,
          i,
          prevValue,
          curValue);
      prevValue = curValue;
    }
  }

  private void validateTypeBuffer(ArrowBuf typeBuf, int valueCount) {
    for (int i = 0; i < valueCount; i++) {
      validateOrThrow(
          typeBuf.getByte(i) >= 0,
          "The type id at position %s is negative: %s.",
          i,
          typeBuf.getByte(i));
    }
  }

  @Override
  public Void visit(BaseFixedWidthVector vector, Void value) {
    vector.validateScalars();
    return null;
  }

  @Override
  public Void visit(BaseVariableWidthVector vector, Void value) {
    validateOffsetBuffer(vector, vector.getValueCount());
    vector.validateScalars();
    return null;
  }

  @Override
  public Void visit(BaseLargeVariableWidthVector vector, Void value) {
    validateLargeOffsetBuffer(vector, vector.getValueCount());
    vector.validateScalars();
    return null;
  }

  @Override
  public Void visit(BaseVariableWidthViewVector vector, Void value) {
    throw new UnsupportedOperationException("View vectors are not supported.");
  }

  @Override
  public Void visit(ListVector vector, Void value) {
    validateOffsetBuffer(vector, vector.getValueCount());
    ValueVector innerVector = vector.getDataVector();
    if (innerVector != null) {
      innerVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(FixedSizeListVector vector, Void value) {
    validateOffsetBuffer(vector, vector.getValueCount());
    ValueVector innerVector = vector.getDataVector();
    if (innerVector != null) {
      innerVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(LargeListVector vector, Void value) {
    validateLargeOffsetBuffer(vector, vector.getValueCount());
    ValueVector innerVector = vector.getDataVector();
    if (innerVector != null) {
      innerVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(NonNullableStructVector vector, Void value) {
    for (ValueVector subVector : vector.getChildrenFromFields()) {
      subVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(UnionVector vector, Void value) {
    validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount());
    for (ValueVector subVector : vector.getChildrenFromFields()) {
      subVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(DenseUnionVector vector, Void value) {
    validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount());

    // validate offset buffer
    for (int i = 0; i < vector.getValueCount(); i++) {
      int offset = vector.getOffset(i);
      byte typeId = vector.getTypeId(i);
      ValueVector subVector = vector.getVectorByType(typeId);
      validateOrThrow(
          offset < subVector.getValueCount(),
          "Dense union vector offset exceeds sub-vector boundary. Vector offset %s, sub vector size %s",
          offset,
          subVector.getValueCount());
    }

    for (ValueVector subVector : vector.getChildrenFromFields()) {
      subVector.accept(this, null);
    }
    return null;
  }

  @Override
  public Void visit(NullVector vector, Void value) {
    ValidateUtil.validateOrThrow(
        vector.getNullCount() == vector.getValueCount(),
        "NullVector should have only null entries.");
    return null;
  }

  @Override
  public Void visit(ExtensionTypeVector<?> vector, Void value) {
    vector.getUnderlyingVector().accept(this, value);
    return null;
  }
}