TestVectorReAlloc.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.vector;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.nio.charset.StandardCharsets;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.complex.impl.NullableStructWriter;
import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
import org.apache.arrow.vector.complex.impl.UnionListWriter;
import org.apache.arrow.vector.holders.NullableIntHolder;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.util.DataSizeRoundingUtil;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
public class TestVectorReAlloc {
private BufferAllocator allocator;
@BeforeEach
public void init() {
allocator = new RootAllocator(Long.MAX_VALUE);
}
@AfterEach
public void terminate() throws Exception {
allocator.close();
}
@Test
public void testFixedType() {
try (final UInt4Vector vector = new UInt4Vector("", allocator)) {
vector.setInitialCapacity(512);
vector.allocateNew();
assertTrue(vector.getValueCapacity() >= 512);
int initialCapacity = vector.getValueCapacity();
try {
vector.set(initialCapacity, 0);
fail("Expected out of bounds exception");
} catch (Exception e) {
// ok
}
vector.reAlloc();
assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
vector.set(initialCapacity, 100);
assertEquals(100, vector.get(initialCapacity));
}
}
@Test
public void testNullableType() {
try (final VarCharVector vector = new VarCharVector("", allocator)) {
vector.setInitialCapacity(512);
vector.allocateNew();
assertTrue(vector.getValueCapacity() >= 512);
int initialCapacity = vector.getValueCapacity();
try {
vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
fail("Expected out of bounds exception");
} catch (Exception e) {
// ok
}
vector.reAlloc();
assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
assertEquals(new String(vector.get(initialCapacity), StandardCharsets.UTF_8), "foo");
}
}
@Test
public void testListType() {
try (final ListVector vector = ListVector.empty("", allocator)) {
vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
vector.setInitialCapacity(512);
vector.allocateNew();
assertEquals(512, vector.getValueCapacity());
try {
vector.getInnerValueCountAt(2014);
fail("Expected out of bounds exception");
} catch (Exception e) {
// ok
}
vector.reAlloc();
assertEquals(1024, vector.getValueCapacity());
assertEquals(0, vector.getOffsetBuffer().getInt(2014 * ListVector.OFFSET_WIDTH));
}
}
@Test
public void testStructType() {
try (final StructVector vector = StructVector.empty("", allocator)) {
vector.addOrGet("", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
vector.setInitialCapacity(512);
vector.allocateNew();
assertEquals(512, vector.getValueCapacity());
try {
vector.getObject(513);
fail("Expected out of bounds exception");
} catch (Exception e) {
// ok
}
vector.reAlloc();
assertEquals(1024, vector.getValueCapacity());
assertNull(vector.getObject(513));
}
}
@Test
public void testVariableWidthTypeSetNullValues() {
// Test ARROW-11223 bug is fixed
try (final BaseVariableWidthVector v1 = new VarCharVector("var1", allocator)) {
v1.setInitialCapacity(512);
v1.allocateNew();
int numNullValues1 = v1.getValueCapacity() + 1;
for (int i = 0; i < numNullValues1; i++) {
v1.setNull(i);
}
assertTrue(v1.getBufferSizeFor(numNullValues1) > 0);
}
try (final BaseLargeVariableWidthVector v2 = new LargeVarCharVector("var2", allocator)) {
v2.setInitialCapacity(512);
v2.allocateNew();
int numNullValues2 = v2.getValueCapacity() + 1;
for (int i = 0; i < numNullValues2; i++) {
v2.setNull(i);
}
assertTrue(v2.getBufferSizeFor(numNullValues2) > 0);
}
}
@Test
public void testFixedAllocateAfterReAlloc() throws Exception {
try (final IntVector vector = new IntVector("", allocator)) {
/*
* Allocate the default size, and then, reAlloc. This should double the allocation.
*/
vector.allocateNewSafe(); // Initial allocation
vector.reAlloc(); // Double the allocation size.
int savedValueCapacity = vector.getValueCapacity();
/*
* Clear and allocate again.
*/
vector.clear();
vector.allocateNewSafe();
/*
* Verify that the buffer sizes haven't changed.
*/
assertEquals(vector.getValueCapacity(), savedValueCapacity);
}
}
@Test
public void testVariableAllocateAfterReAlloc() throws Exception {
try (final VarCharVector vector = new VarCharVector("", allocator)) {
/*
* Allocate the default size, and then, reAlloc. This should double the allocation.
*/
vector.allocateNewSafe(); // Initial allocation
vector.reAlloc(); // Double the allocation size.
int savedValueCapacity = vector.getValueCapacity();
long savedValueBufferSize = vector.valueBuffer.capacity();
/*
* Clear and allocate again.
*/
vector.clear();
vector.allocateNewSafe();
/*
* Verify that the buffer sizes haven't changed.
*/
assertEquals(vector.getValueCapacity(), savedValueCapacity);
assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize);
}
}
@Test
public void testLargeVariableAllocateAfterReAlloc() throws Exception {
try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
/*
* Allocate the default size, and then, reAlloc. This should double the allocation.
*/
vector.allocateNewSafe(); // Initial allocation
vector.reAlloc(); // Double the allocation size.
int savedValueCapacity = vector.getValueCapacity();
long savedValueBufferSize = vector.valueBuffer.capacity();
/*
* Clear and allocate again.
*/
vector.clear();
vector.allocateNewSafe();
/*
* Verify that the buffer sizes haven't changed.
*/
assertEquals(vector.getValueCapacity(), savedValueCapacity);
assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize);
}
}
@Test
public void testVarCharAllocateNew() throws Exception {
final int count = 6000;
try (final VarCharVector vector = new VarCharVector("", allocator)) {
vector.allocateNew(count);
// verify that the validity buffer and value buffer have capacity for at least 'count'
// elements.
assertTrue(
vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
assertTrue(
vector.getOffsetBuffer().capacity()
>= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH);
}
}
@Test
public void testLargeVarCharAllocateNew() throws Exception {
final int count = 6000;
try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
vector.allocateNew(count);
// verify that the validity buffer and value buffer have capacity for at least 'count'
// elements.
assertTrue(
vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
assertTrue(
vector.getOffsetBuffer().capacity()
>= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH);
}
}
@Test
public void testVarCharAllocateNewUsingHelper() throws Exception {
final int count = 6000;
try (final VarCharVector vector = new VarCharVector("", allocator)) {
AllocationHelper.allocateNew(vector, count);
// verify that the validity buffer and value buffer have capacity for at least 'count'
// elements.
assertTrue(
vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
assertTrue(
vector.getOffsetBuffer().capacity()
>= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH);
}
}
@Test
public void testLargeVarCharAllocateNewUsingHelper() throws Exception {
final int count = 6000;
try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
AllocationHelper.allocateNew(vector, count);
// verify that the validity buffer and value buffer have capacity for at least 'count'
// elements.
assertTrue(
vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
assertTrue(
vector.getOffsetBuffer().capacity()
>= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH);
}
}
@Test
public void testFixedRepeatedClearAndSet() throws Exception {
try (final IntVector vector = new IntVector("", allocator)) {
vector.allocateNewSafe(); // Initial allocation
vector.clear(); // clear vector.
vector.setSafe(0, 10);
int savedValueCapacity = vector.getValueCapacity();
for (int i = 0; i < 1024; ++i) {
vector.clear(); // clear vector.
vector.setSafe(0, 10);
}
// should be deterministic, and not cause a run-away increase in capacity.
assertEquals(vector.getValueCapacity(), savedValueCapacity);
}
}
@Test
public void testVariableRepeatedClearAndSet() throws Exception {
try (final VarCharVector vector = new VarCharVector("", allocator)) {
vector.allocateNewSafe(); // Initial allocation
vector.clear(); // clear vector.
vector.setSafe(0, "hello world".getBytes(StandardCharsets.UTF_8));
int savedValueCapacity = vector.getValueCapacity();
for (int i = 0; i < 1024; ++i) {
vector.clear(); // clear vector.
vector.setSafe(0, "hello world".getBytes(StandardCharsets.UTF_8));
}
// should be deterministic, and not cause a run-away increase in capacity.
assertEquals(vector.getValueCapacity(), savedValueCapacity);
}
}
@Test
public void testRepeatedValueVectorClearAndSet() throws Exception {
try (final ListVector vector =
new ListVector("", allocator, FieldType.nullable(MinorType.INT.getType()), null)) {
vector.allocateNewSafe(); // Initial allocation
UnionListWriter writer = vector.getWriter();
vector.clear(); // clear vector.
writer.setPosition(0); // optional
writer.startList();
writer.writeInt(0);
writer.endList();
int savedValueCapacity = vector.getValueCapacity();
for (int i = 0; i < 1024; ++i) {
vector.clear(); // clear vector.
writer.setPosition(0); // optional
writer.startList();
writer.writeInt(i);
writer.endList();
}
// should be deterministic, and not cause a run-away increase in capacity.
assertEquals(vector.getValueCapacity(), savedValueCapacity);
}
}
@Test
public void testStructVectorClearAndSet() throws Exception {
try (final StructVector vector = StructVector.empty("v", allocator)) {
vector.allocateNewSafe(); // Initial allocation
NullableStructWriter writer = vector.getWriter();
vector.clear(); // clear vector.
writer.setPosition(0); // optional
writer.start();
writer.integer("int").writeInt(0);
writer.end();
int savedValueCapacity = vector.getValueCapacity();
for (int i = 0; i < 1024; ++i) {
vector.clear(); // clear vector.
writer.setPosition(0); // optional
writer.start();
writer.integer("int").writeInt(i);
writer.end();
}
// should be deterministic, and not cause a run-away increase in capacity.
assertEquals(vector.getValueCapacity(), savedValueCapacity);
}
}
@Test
public void testFixedSizeListVectorClearAndSet() {
try (final FixedSizeListVector vector =
new FixedSizeListVector(
"", allocator, FieldType.nullable(new ArrowType.FixedSizeList(2)), null)) {
vector.allocateNewSafe(); // Initial allocation
UnionFixedSizeListWriter writer = vector.getWriter();
vector.clear(); // clear vector.
writer.setPosition(0); // optional
writer.startList();
writer.writeInt(0);
writer.writeInt(1);
writer.endList();
int savedValueCapacity = vector.getValueCapacity();
for (int i = 0; i < 1024; ++i) {
vector.clear(); // clear vector.
writer.setPosition(0); // optional
writer.startList();
writer.writeInt(i);
writer.writeInt(i + 1);
writer.endList();
}
// should be deterministic, and not cause a run-away increase in capacity.
assertEquals(vector.getValueCapacity(), savedValueCapacity);
}
}
@Test
public void testUnionVectorClearAndSet() {
try (final UnionVector vector =
new UnionVector("", allocator, /* field type */ null, /* call-back */ null)) {
vector.allocateNewSafe(); // Initial allocation
NullableIntHolder holder = new NullableIntHolder();
holder.isSet = 1;
holder.value = 1;
vector.clear(); // clear vector.
vector.setType(0, MinorType.INT);
vector.setSafe(0, holder);
int savedValueCapacity = vector.getValueCapacity();
for (int i = 0; i < 1024; ++i) {
vector.clear(); // clear vector.
vector.setType(0, MinorType.INT);
vector.setSafe(0, holder);
}
// should be deterministic, and not cause a run-away increase in capacity.
assertEquals(vector.getValueCapacity(), savedValueCapacity);
}
}
@Test
public void testDenseUnionVectorClearAndSet() {
try (final DenseUnionVector vector = new DenseUnionVector("", allocator, null, null)) {
vector.allocateNewSafe(); // Initial allocation
NullableIntHolder holder = new NullableIntHolder();
holder.isSet = 1;
holder.value = 1;
byte intTypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
vector.clear();
vector.setTypeId(0, intTypeId);
vector.setSafe(0, holder);
int savedValueCapacity = vector.getValueCapacity();
for (int i = 0; i < 1024; ++i) {
vector.clear();
vector.setTypeId(0, intTypeId);
vector.setSafe(0, holder);
}
// should be deterministic, and not cause a run-away increase in capacity.
assertEquals(vector.getValueCapacity(), savedValueCapacity);
}
}
}