ValueVectorUtility.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.vector.util;
import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
import java.util.function.BiFunction;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.validate.ValidateVectorBufferVisitor;
import org.apache.arrow.vector.validate.ValidateVectorDataVisitor;
import org.apache.arrow.vector.validate.ValidateVectorTypeVisitor;
/** Utility methods for {@link ValueVector}. */
public class ValueVectorUtility {
private ValueVectorUtility() {}
/**
* Get the toString() representation of vector suitable for debugging. Note since vectors may have
* millions of values, this method only shows max 20 values. Examples as below (v represents
* value):
* <li>vector with 0 value: []
* <li>vector with 5 values (no more than 20 values): [v0, v1, v2, v3, v4]
* <li>vector with 100 values (more than 20 values): [v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, ...,
* v90, v91, v92, v93, v94, v95, v96, v97, v98, v99]
*/
public static <V extends ValueVector> String getToString(V vector, int start, int end) {
return getToString(vector, start, end, (v, i) -> v.getObject(i));
}
/**
* Get the toString() representation of vector suitable for debugging. Note since vectors may have
* millions of values, this method only shows at most 20 values.
*
* @param vector the vector for which to get toString representation.
* @param start the starting index, inclusive.
* @param end the end index, exclusive.
* @param valueToString the function to transform individual elements to strings.
*/
public static <V extends ValueVector> String getToString(
V vector, int start, int end, BiFunction<V, Integer, Object> valueToString) {
Preconditions.checkNotNull(vector);
final int length = end - start;
Preconditions.checkArgument(length >= 0);
Preconditions.checkArgument(start >= 0);
Preconditions.checkArgument(end <= vector.getValueCount());
if (length == 0) {
return "[]";
}
final int window = 10;
boolean skipComma = false;
StringBuilder sb = new StringBuilder();
sb.append('[');
for (int i = start; i < end; i++) {
if (skipComma) {
skipComma = false;
}
if (i - start >= window && i < end - window) {
sb.append("...");
i = end - window - 1;
skipComma = true;
} else {
sb.append(valueToString.apply(vector, i));
}
if (i == end - 1) {
sb.append(']');
} else {
if (!skipComma) {
sb.append(',');
}
sb.append(' ');
}
}
return sb.toString();
}
/** Utility to validate vector in O(1) time. */
public static void validate(ValueVector vector) {
Preconditions.checkNotNull(vector);
ValidateVectorTypeVisitor typeVisitor = new ValidateVectorTypeVisitor();
vector.accept(typeVisitor, null);
ValidateVectorBufferVisitor bufferVisitor = new ValidateVectorBufferVisitor();
vector.accept(bufferVisitor, null);
}
/** Utility to validate vector in O(n) time, where n is the value count. */
public static void validateFull(ValueVector vector) {
validate(vector);
ValidateVectorDataVisitor dataVisitor = new ValidateVectorDataVisitor();
vector.accept(dataVisitor, null);
}
/** Utility to validate vector schema root in O(1) time. */
public static void validate(VectorSchemaRoot root) {
Preconditions.checkNotNull(root);
int valueCount = root.getRowCount();
validateOrThrow(
valueCount >= 0, "The row count of vector schema root %s is negative.", valueCount);
for (ValueVector childVec : root.getFieldVectors()) {
validateOrThrow(
valueCount == childVec.getValueCount(),
"Child vector and vector schema root have different value counts. "
+ "Child vector value count %s, vector schema root value count %s",
childVec.getValueCount(),
valueCount);
validate(childVec);
}
}
/** Utility to validate vector in O(n) time, where n is the value count. */
public static void validateFull(VectorSchemaRoot root) {
Preconditions.checkNotNull(root);
int valueCount = root.getRowCount();
validateOrThrow(
valueCount >= 0, "The row count of vector schema root %s is negative.", valueCount);
for (ValueVector childVec : root.getFieldVectors()) {
validateOrThrow(
valueCount == childVec.getValueCount(),
"Child vector and vector schema root have different value counts. "
+ "Child vector value count %s, vector schema root value count %s",
childVec.getValueCount(),
valueCount);
validateFull(childVec);
}
}
/** Pre allocate memory for BaseFixedWidthVector. */
public static void preAllocate(VectorSchemaRoot root, int targetSize) {
for (ValueVector vector : root.getFieldVectors()) {
if (vector instanceof BaseFixedWidthVector) {
((BaseFixedWidthVector) vector).allocateNew(targetSize);
}
}
}
/** Ensure capacity for BaseFixedWidthVector. */
public static void ensureCapacity(VectorSchemaRoot root, int targetCapacity) {
for (ValueVector vector : root.getFieldVectors()) {
if (vector instanceof BaseFixedWidthVector) {
while (vector.getValueCapacity() < targetCapacity) {
vector.reAlloc();
}
}
}
}
}