TestLargeVarCharVector.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.arrow.vector;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.OutOfMemoryException;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.holders.NullableLargeVarCharHolder;
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.OversizedAllocationException;
import org.apache.arrow.vector.util.Text;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class TestLargeVarCharVector {

  private static final byte[] STR1 = "AAAAA1".getBytes(StandardCharsets.UTF_8);
  private static final byte[] STR2 = "BBBBBBBBB2".getBytes(StandardCharsets.UTF_8);
  private static final byte[] STR3 = "CCCC3".getBytes(StandardCharsets.UTF_8);
  private static final byte[] STR4 = "DDDDDDDD4".getBytes(StandardCharsets.UTF_8);
  private static final byte[] STR5 = "EEE5".getBytes(StandardCharsets.UTF_8);
  private static final byte[] STR6 = "FFFFF6".getBytes(StandardCharsets.UTF_8);

  private BufferAllocator allocator;

  @BeforeEach
  public void prepare() {
    allocator = new RootAllocator(Integer.MAX_VALUE);
  }

  @AfterEach
  public void shutdown() {
    allocator.close();
  }

  @Test
  public void testTransfer() {
    try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000);
        BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 1000000, 1000000);
        LargeVarCharVector v1 = new LargeVarCharVector("v1", childAllocator1);
        LargeVarCharVector v2 = new LargeVarCharVector("v2", childAllocator2); ) {
      v1.allocateNew();
      v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11);
      v1.setValueCount(4001);

      long memoryBeforeTransfer = childAllocator1.getAllocatedMemory();

      v1.makeTransferPair(v2).transfer();

      assertEquals(0, childAllocator1.getAllocatedMemory());
      assertEquals(memoryBeforeTransfer, childAllocator2.getAllocatedMemory());
    }
  }

  @Test
  public void testCopyValueSafe() {
    try (final LargeVarCharVector largeVarCharVector =
            new LargeVarCharVector("myvector", allocator);
        final LargeVarCharVector newLargeVarCharVector =
            new LargeVarCharVector("newvector", allocator)) {
      largeVarCharVector.allocateNew(10000, 1000);

      final int valueCount = 500;
      populateLargeVarcharVector(largeVarCharVector, valueCount, null);

      final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector);

      // new vector memory is not pre-allocated, we expect copyValueSafe work fine.
      for (int i = 0; i < valueCount; i++) {
        tp.copyValueSafe(i, i);
      }
      newLargeVarCharVector.setValueCount(valueCount);

      for (int i = 0; i < valueCount; i++) {
        final boolean expectedSet = (i % 3) == 0;
        if (expectedSet) {
          assertFalse(largeVarCharVector.isNull(i));
          assertFalse(newLargeVarCharVector.isNull(i));
          assertArrayEquals(largeVarCharVector.get(i), newLargeVarCharVector.get(i));
        } else {
          assertTrue(newLargeVarCharVector.isNull(i));
        }
      }
    }
  }

  @Test
  public void testSplitAndTransferNon() {
    try (final LargeVarCharVector largeVarCharVector =
        new LargeVarCharVector("myvector", allocator)) {

      largeVarCharVector.allocateNew(10000, 1000);
      final int valueCount = 500;
      populateLargeVarcharVector(largeVarCharVector, valueCount, null);

      final TransferPair tp = largeVarCharVector.getTransferPair(allocator);
      try (LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) {

        tp.splitAndTransfer(0, 0);
        assertEquals(0, newLargeVarCharVector.getValueCount());
      }
    }
  }

  @Test
  public void testSplitAndTransferAll() {
    try (final LargeVarCharVector largeVarCharVector =
        new LargeVarCharVector("myvector", allocator)) {

      largeVarCharVector.allocateNew(10000, 1000);
      final int valueCount = 500;
      populateLargeVarcharVector(largeVarCharVector, valueCount, null);

      final TransferPair tp = largeVarCharVector.getTransferPair(allocator);
      try (LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) {

        tp.splitAndTransfer(0, valueCount);
        assertEquals(valueCount, newLargeVarCharVector.getValueCount());
      }
    }
  }

  @Test
  public void testInvalidStartIndex() {
    try (final LargeVarCharVector largeVarCharVector =
            new LargeVarCharVector("myvector", allocator);
        final LargeVarCharVector newLargeVarCharVector =
            new LargeVarCharVector("newvector", allocator)) {

      largeVarCharVector.allocateNew(10000, 1000);
      final int valueCount = 500;
      populateLargeVarcharVector(largeVarCharVector, valueCount, null);

      final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector);

      IllegalArgumentException e =
          assertThrows(IllegalArgumentException.class, () -> tp.splitAndTransfer(valueCount, 10));

      assertEquals(
          "Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage());
    }
  }

  @Test
  public void testInvalidLength() {
    try (final LargeVarCharVector largeVarCharVector =
            new LargeVarCharVector("myvector", allocator);
        final LargeVarCharVector newLargeVarCharVector =
            new LargeVarCharVector("newvector", allocator)) {

      largeVarCharVector.allocateNew(10000, 1000);
      final int valueCount = 500;
      populateLargeVarcharVector(largeVarCharVector, valueCount, null);

      final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector);

      IllegalArgumentException e =
          assertThrows(
              IllegalArgumentException.class, () -> tp.splitAndTransfer(0, valueCount * 2));

      assertEquals(
          "Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage());
    }
  }

  @Test /* LargeVarCharVector */
  public void testSizeOfValueBuffer() {
    try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
      int valueCount = 100;
      int currentSize = 0;
      vector.setInitialCapacity(valueCount);
      vector.allocateNew();
      vector.setValueCount(valueCount);
      for (int i = 0; i < valueCount; i++) {
        currentSize += i;
        vector.setSafe(i, new byte[i]);
      }

      assertEquals(currentSize, vector.sizeOfValueBuffer());
    }
  }

  @Test
  public void testSetLastSetUsage() {
    final byte[] STR1 = "AAAAA1".getBytes(StandardCharsets.UTF_8);
    final byte[] STR2 = "BBBBBBBBB2".getBytes(StandardCharsets.UTF_8);
    final byte[] STR3 = "CCCC3".getBytes(StandardCharsets.UTF_8);
    final byte[] STR4 = "DDDDDDDD4".getBytes(StandardCharsets.UTF_8);
    final byte[] STR5 = "EEE5".getBytes(StandardCharsets.UTF_8);
    final byte[] STR6 = "FFFFF6".getBytes(StandardCharsets.UTF_8);

    try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {
      vector.allocateNew(1024 * 10, 1024);

      setBytes(0, STR1, vector);
      setBytes(1, STR2, vector);
      setBytes(2, STR3, vector);
      setBytes(3, STR4, vector);
      setBytes(4, STR5, vector);
      setBytes(5, STR6, vector);

      /* Check current lastSet */
      assertEquals(-1, vector.getLastSet());

      /* Check the vector output */
      assertArrayEquals(STR1, vector.get(0));
      assertArrayEquals(STR2, vector.get(1));
      assertArrayEquals(STR3, vector.get(2));
      assertArrayEquals(STR4, vector.get(3));
      assertArrayEquals(STR5, vector.get(4));
      assertArrayEquals(STR6, vector.get(5));

      /*
       * If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt
       * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays.
       * Run the test by commenting out next line and we should see incorrect vector output.
       */
      vector.setLastSet(5);
      vector.setValueCount(20);

      /* Check current lastSet */
      assertEquals(19, vector.getLastSet());

      /* Check the vector output again */
      assertArrayEquals(STR1, vector.get(0));
      assertArrayEquals(STR2, vector.get(1));
      assertArrayEquals(STR3, vector.get(2));
      assertArrayEquals(STR4, vector.get(3));
      assertArrayEquals(STR5, vector.get(4));
      assertArrayEquals(STR6, vector.get(5));
      assertEquals(0, vector.getValueLength(6));
      assertEquals(0, vector.getValueLength(7));
      assertEquals(0, vector.getValueLength(8));
      assertEquals(0, vector.getValueLength(9));
      assertEquals(0, vector.getValueLength(10));
      assertEquals(0, vector.getValueLength(11));
      assertEquals(0, vector.getValueLength(12));
      assertEquals(0, vector.getValueLength(13));
      assertEquals(0, vector.getValueLength(14));
      assertEquals(0, vector.getValueLength(15));
      assertEquals(0, vector.getValueLength(16));
      assertEquals(0, vector.getValueLength(17));
      assertEquals(0, vector.getValueLength(18));
      assertEquals(0, vector.getValueLength(19));

      /* Check offsets */
      assertEquals(0, vector.offsetBuffer.getLong(0 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(6, vector.offsetBuffer.getLong(1 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(16, vector.offsetBuffer.getLong(2 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(21, vector.offsetBuffer.getLong(3 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(30, vector.offsetBuffer.getLong(4 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(34, vector.offsetBuffer.getLong(5 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(6 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(7 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(8 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(9 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(10 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(11 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(12 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(13 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(14 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(15 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(16 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(17 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(18 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(19 * BaseLargeVariableWidthVector.OFFSET_WIDTH));

      vector.set(19, STR6);
      assertArrayEquals(STR6, vector.get(19));
      assertEquals(40, vector.offsetBuffer.getLong(19 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(46, vector.offsetBuffer.getLong(20 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
    }
  }

  @Test
  public void testVectorAllocateNew() {
    assertThrows(
        OutOfMemoryException.class,
        () -> {
          try (RootAllocator smallAllocator = new RootAllocator(200);
              LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) {
            vector.allocateNew();
          }
        });
  }

  @Test
  public void testLargeVariableVectorReallocation() {
    assertThrows(
        OversizedAllocationException.class,
        () -> {
          final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator);
          // edge case 1: value count = MAX_VALUE_ALLOCATION
          final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE;
          final int expectedOffsetSize = 10;
          try {
            vector.allocateNew(expectedAllocationInBytes, 10);
            assertTrue(expectedOffsetSize <= vector.getValueCapacity());
            assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity());
            vector.reAlloc();
            assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity());
            assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity());
          } finally {
            vector.close();
          }

          // common: value count < MAX_VALUE_ALLOCATION
          try {
            vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0);
            vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
            vector.reAlloc(); // this tests if it overflows
          } finally {
            vector.close();
          }
        });
  }

  @Test
  public void testSplitAndTransfer() {
    try (final LargeVarCharVector largeVarCharVector =
        new LargeVarCharVector("myvector", allocator)) {
      largeVarCharVector.allocateNew(10000, 1000);

      final int valueCount = 500;
      final String[] compareArray = new String[valueCount];

      populateLargeVarcharVector(largeVarCharVector, valueCount, compareArray);

      final TransferPair tp = largeVarCharVector.getTransferPair(allocator);
      try (final LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) {
        final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}};

        for (final int[] startLength : startLengths) {
          final int start = startLength[0];
          final int length = startLength[1];
          tp.splitAndTransfer(start, length);
          for (int i = 0; i < length; i++) {
            final boolean expectedSet = ((start + i) % 3) == 0;
            if (expectedSet) {
              final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8);
              assertFalse(newLargeVarCharVector.isNull(i));
              assertArrayEquals(expectedValue, newLargeVarCharVector.get(i));
            } else {
              assertTrue(newLargeVarCharVector.isNull(i));
            }
          }
        }
      }
    }
  }

  @Test
  public void testReallocAfterVectorTransfer() {
    final byte[] STR1 = "AAAAA1".getBytes(StandardCharsets.UTF_8);
    final byte[] STR2 = "BBBBBBBBB2".getBytes(StandardCharsets.UTF_8);

    try (final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator)) {
      /* 4096 values with 10 byte per record */
      vector.allocateNew(4096 * 10, 4096);
      int valueCapacity = vector.getValueCapacity();
      assertTrue(valueCapacity >= 4096);

      /* populate the vector */
      for (int i = 0; i < valueCapacity; i++) {
        if ((i & 1) == 1) {
          vector.set(i, STR1);
        } else {
          vector.set(i, STR2);
        }
      }

      /* Check the vector output */
      for (int i = 0; i < valueCapacity; i++) {
        if ((i & 1) == 1) {
          assertArrayEquals(STR1, vector.get(i));
        } else {
          assertArrayEquals(STR2, vector.get(i));
        }
      }

      /* trigger first realloc */
      vector.setSafe(valueCapacity, STR2, 0, STR2.length);
      assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
      while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
        vector.reallocDataBuffer();
      }

      /* populate the remaining vector */
      for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
        if ((i & 1) == 1) {
          vector.set(i, STR1);
        } else {
          vector.set(i, STR2);
        }
      }

      /* Check the vector output */
      valueCapacity = vector.getValueCapacity();
      for (int i = 0; i < valueCapacity; i++) {
        if ((i & 1) == 1) {
          assertArrayEquals(STR1, vector.get(i));
        } else {
          assertArrayEquals(STR2, vector.get(i));
        }
      }

      /* trigger second realloc */
      vector.setSafe(valueCapacity + 10, STR2, 0, STR2.length);
      assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
      while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
        vector.reallocDataBuffer();
      }

      /* populate the remaining vector */
      for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
        if ((i & 1) == 1) {
          vector.set(i, STR1);
        } else {
          vector.set(i, STR2);
        }
      }

      /* Check the vector output */
      valueCapacity = vector.getValueCapacity();
      for (int i = 0; i < valueCapacity; i++) {
        if ((i & 1) == 1) {
          assertArrayEquals(STR1, vector.get(i));
        } else {
          assertArrayEquals(STR2, vector.get(i));
        }
      }

      /* we are potentially working with 4x the size of vector buffer
       * that we initially started with. Now let's transfer the vector.
       */

      TransferPair transferPair = vector.getTransferPair(allocator);
      transferPair.transfer();
      try (LargeVarCharVector toVector = (LargeVarCharVector) transferPair.getTo()) {
        valueCapacity = toVector.getValueCapacity();

        for (int i = 0; i < valueCapacity; i++) {
          if ((i & 1) == 1) {
            assertArrayEquals(STR1, toVector.get(i));
          } else {
            assertArrayEquals(STR2, toVector.get(i));
          }
        }
      }
    }
  }

  @Test
  public void testVectorLoadUnload() {
    try (final LargeVarCharVector vector1 = new LargeVarCharVector("myvector", allocator)) {

      ValueVectorDataPopulator.setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6);

      assertEquals(5, vector1.getLastSet());
      vector1.setValueCount(15);
      assertEquals(14, vector1.getLastSet());

      /* Check the vector output */
      assertArrayEquals(STR1, vector1.get(0));
      assertArrayEquals(STR2, vector1.get(1));
      assertArrayEquals(STR3, vector1.get(2));
      assertArrayEquals(STR4, vector1.get(3));
      assertArrayEquals(STR5, vector1.get(4));
      assertArrayEquals(STR6, vector1.get(5));

      Field field = vector1.getField();
      String fieldName = field.getName();

      List<Field> fields = new ArrayList<>();
      List<FieldVector> fieldVectors = new ArrayList<>();

      fields.add(field);
      fieldVectors.add(vector1);

      Schema schema = new Schema(fields);

      VectorSchemaRoot schemaRoot1 =
          new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount());
      VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);

      try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
          VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, allocator); ) {

        VectorLoader vectorLoader = new VectorLoader(schemaRoot2);
        vectorLoader.load(recordBatch);

        LargeVarCharVector vector2 = (LargeVarCharVector) schemaRoot2.getVector(fieldName);
        /*
         * lastSet would have internally been set by VectorLoader.load() when it invokes
         * loadFieldBuffers.
         */
        assertEquals(14, vector2.getLastSet());
        vector2.setValueCount(25);
        assertEquals(24, vector2.getLastSet());

        /* Check the vector output */
        assertArrayEquals(STR1, vector2.get(0));
        assertArrayEquals(STR2, vector2.get(1));
        assertArrayEquals(STR3, vector2.get(2));
        assertArrayEquals(STR4, vector2.get(3));
        assertArrayEquals(STR5, vector2.get(4));
        assertArrayEquals(STR6, vector2.get(5));
      }
    }
  }

  @Test
  public void testFillEmptiesUsage() {
    try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {

      vector.allocateNew(1024 * 10, 1024);

      setBytes(0, STR1, vector);
      setBytes(1, STR2, vector);
      setBytes(2, STR3, vector);
      setBytes(3, STR4, vector);
      setBytes(4, STR5, vector);
      setBytes(5, STR6, vector);

      /* Check current lastSet */
      assertEquals(-1, vector.getLastSet());

      /* Check the vector output */
      assertArrayEquals(STR1, vector.get(0));
      assertArrayEquals(STR2, vector.get(1));
      assertArrayEquals(STR3, vector.get(2));
      assertArrayEquals(STR4, vector.get(3));
      assertArrayEquals(STR5, vector.get(4));
      assertArrayEquals(STR6, vector.get(5));

      vector.setLastSet(5);
      /* fill empty byte arrays from index [6, 9] */
      vector.fillEmpties(10);

      /* Check current lastSet */
      assertEquals(9, vector.getLastSet());

      /* Check the vector output */
      assertArrayEquals(STR1, vector.get(0));
      assertArrayEquals(STR2, vector.get(1));
      assertArrayEquals(STR3, vector.get(2));
      assertArrayEquals(STR4, vector.get(3));
      assertArrayEquals(STR5, vector.get(4));
      assertArrayEquals(STR6, vector.get(5));
      assertEquals(0, vector.getValueLength(6));
      assertEquals(0, vector.getValueLength(7));
      assertEquals(0, vector.getValueLength(8));
      assertEquals(0, vector.getValueLength(9));

      setBytes(10, STR1, vector);
      setBytes(11, STR2, vector);

      vector.setLastSet(11);
      /* fill empty byte arrays from index [12, 14] */
      vector.setValueCount(15);

      /* Check current lastSet */
      assertEquals(14, vector.getLastSet());

      /* Check the vector output */
      assertArrayEquals(STR1, vector.get(0));
      assertArrayEquals(STR2, vector.get(1));
      assertArrayEquals(STR3, vector.get(2));
      assertArrayEquals(STR4, vector.get(3));
      assertArrayEquals(STR5, vector.get(4));
      assertArrayEquals(STR6, vector.get(5));
      assertEquals(0, vector.getValueLength(6));
      assertEquals(0, vector.getValueLength(7));
      assertEquals(0, vector.getValueLength(8));
      assertEquals(0, vector.getValueLength(9));
      assertArrayEquals(STR1, vector.get(10));
      assertArrayEquals(STR2, vector.get(11));
      assertEquals(0, vector.getValueLength(12));
      assertEquals(0, vector.getValueLength(13));
      assertEquals(0, vector.getValueLength(14));

      /* Check offsets */
      assertEquals(0, vector.offsetBuffer.getLong(0 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(6, vector.offsetBuffer.getLong(1 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(16, vector.offsetBuffer.getLong(2 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(21, vector.offsetBuffer.getLong(3 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(30, vector.offsetBuffer.getLong(4 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(34, vector.offsetBuffer.getLong(5 * BaseLargeVariableWidthVector.OFFSET_WIDTH));

      assertEquals(40, vector.offsetBuffer.getLong(6 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(7 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(8 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(9 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(40, vector.offsetBuffer.getLong(10 * BaseLargeVariableWidthVector.OFFSET_WIDTH));

      assertEquals(46, vector.offsetBuffer.getLong(11 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(56, vector.offsetBuffer.getLong(12 * BaseLargeVariableWidthVector.OFFSET_WIDTH));

      assertEquals(56, vector.offsetBuffer.getLong(13 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(56, vector.offsetBuffer.getLong(14 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
      assertEquals(56, vector.offsetBuffer.getLong(15 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
    }
  }

  @Test
  public void testGetBufferAddress1() {
    try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {

      ValueVectorDataPopulator.setVector(vector, STR1, STR2, STR3, STR4, STR5, STR6);
      vector.setValueCount(15);

      /* check the vector output */
      assertArrayEquals(STR1, vector.get(0));
      assertArrayEquals(STR2, vector.get(1));
      assertArrayEquals(STR3, vector.get(2));
      assertArrayEquals(STR4, vector.get(3));
      assertArrayEquals(STR5, vector.get(4));
      assertArrayEquals(STR6, vector.get(5));

      List<ArrowBuf> buffers = vector.getFieldBuffers();
      long bitAddress = vector.getValidityBufferAddress();
      long offsetAddress = vector.getOffsetBufferAddress();
      long dataAddress = vector.getDataBufferAddress();

      assertEquals(3, buffers.size());
      assertEquals(bitAddress, buffers.get(0).memoryAddress());
      assertEquals(offsetAddress, buffers.get(1).memoryAddress());
      assertEquals(dataAddress, buffers.get(2).memoryAddress());
    }
  }

  @Test
  public void testSetNullableLargeVarCharHolder() {
    try (LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
      vector.allocateNew(100, 10);

      NullableLargeVarCharHolder nullHolder = new NullableLargeVarCharHolder();
      nullHolder.isSet = 0;

      NullableLargeVarCharHolder stringHolder = new NullableLargeVarCharHolder();
      stringHolder.isSet = 1;

      String str = "hello";
      ArrowBuf buf = allocator.buffer(16);
      buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));

      stringHolder.start = 0;
      stringHolder.end = str.length();
      stringHolder.buffer = buf;

      vector.set(0, nullHolder);
      vector.set(1, stringHolder);

      // verify results
      assertTrue(vector.isNull(0));
      assertEquals(str, new String(Objects.requireNonNull(vector.get(1)), StandardCharsets.UTF_8));

      buf.close();
    }
  }

  @Test
  public void testSetNullableLargeVarCharHolderSafe() {
    try (LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
      vector.allocateNew(5, 1);

      NullableLargeVarCharHolder nullHolder = new NullableLargeVarCharHolder();
      nullHolder.isSet = 0;

      NullableLargeVarCharHolder stringHolder = new NullableLargeVarCharHolder();
      stringHolder.isSet = 1;

      String str = "hello world";
      ArrowBuf buf = allocator.buffer(16);
      buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8));

      stringHolder.start = 0;
      stringHolder.end = str.length();
      stringHolder.buffer = buf;

      vector.setSafe(0, stringHolder);
      vector.setSafe(1, nullHolder);

      // verify results
      assertEquals(str, new String(Objects.requireNonNull(vector.get(0)), StandardCharsets.UTF_8));
      assertTrue(vector.isNull(1));

      buf.close();
    }
  }

  @Test
  public void testGetNullFromLargeVariableWidthVector() {
    try (final LargeVarCharVector largeVarCharVector =
            new LargeVarCharVector("largevarcharvec", allocator);
        final LargeVarBinaryVector largeVarBinaryVector =
            new LargeVarBinaryVector("largevarbinary", allocator)) {
      largeVarCharVector.allocateNew(10, 1);
      largeVarBinaryVector.allocateNew(10, 1);

      largeVarCharVector.setNull(0);
      largeVarBinaryVector.setNull(0);

      assertNull(largeVarCharVector.get(0));
      assertNull(largeVarBinaryVector.get(0));
    }
  }

  @Test
  public void testLargeVariableWidthVectorNullHashCode() {
    try (LargeVarCharVector largeVarChVec =
        new LargeVarCharVector("large var char vector", allocator)) {
      largeVarChVec.allocateNew(100, 1);
      largeVarChVec.setValueCount(1);

      largeVarChVec.set(0, "abc".getBytes(StandardCharsets.UTF_8));
      largeVarChVec.setNull(0);

      assertEquals(0, largeVarChVec.hashCode(0));
    }
  }

  @Test
  public void testUnloadLargeVariableWidthVector() {
    try (final LargeVarCharVector largeVarCharVector =
        new LargeVarCharVector("var char", allocator)) {
      largeVarCharVector.allocateNew(5, 2);
      largeVarCharVector.setValueCount(2);

      largeVarCharVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8));

      List<ArrowBuf> bufs = largeVarCharVector.getFieldBuffers();
      assertEquals(3, bufs.size());

      ArrowBuf offsetBuf = bufs.get(1);
      ArrowBuf dataBuf = bufs.get(2);

      assertEquals(24, offsetBuf.writerIndex());
      assertEquals(4, offsetBuf.getLong(8));
      assertEquals(4, offsetBuf.getLong(16));

      assertEquals(4, dataBuf.writerIndex());
    }
  }

  @Test
  public void testNullableType() {
    try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
      vector.setInitialCapacity(512);
      vector.allocateNew();

      assertTrue(vector.getValueCapacity() >= 512);
      int initialCapacity = vector.getValueCapacity();

      try {
        vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
        fail("Expected out of bounds exception");
      } catch (Exception e) {
        // ok
      }

      vector.reAlloc();
      assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);

      vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
      assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8));
    }
  }

  @Test
  public void testGetTextRepeatedly() {
    try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {

      ValueVectorDataPopulator.setVector(vector, STR1, STR2);
      vector.setValueCount(2);

      /* check the vector output */
      Text text = new Text();
      vector.read(0, text);
      byte[] result = new byte[(int) text.getLength()];
      System.arraycopy(text.getBytes(), 0, result, 0, (int) text.getLength());
      assertArrayEquals(STR1, result);
      vector.read(1, text);
      result = new byte[(int) text.getLength()];
      System.arraycopy(text.getBytes(), 0, result, 0, (int) text.getLength());
      assertArrayEquals(STR2, text.getBytes());
    }
  }

  @Test
  public void testGetTransferPairWithField() {
    try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000);
        LargeVarCharVector v1 = new LargeVarCharVector("v1", childAllocator1)) {
      v1.allocateNew();
      v1.setSafe(4094, "hello world".getBytes(StandardCharsets.UTF_8), 0, 11);
      v1.setValueCount(4001);

      TransferPair tp = v1.getTransferPair(v1.getField(), allocator);
      tp.transfer();
      LargeVarCharVector v2 = (LargeVarCharVector) tp.getTo();
      assertSame(v1.getField(), v2.getField());
      v2.clear();
    }
  }

  private void populateLargeVarcharVector(
      final LargeVarCharVector vector, int valueCount, String[] values) {
    for (int i = 0; i < valueCount; i += 3) {
      final String s = String.format("%010d", i);
      vector.set(i, s.getBytes(StandardCharsets.UTF_8));
      if (values != null) {
        values[i] = s;
      }
    }
    vector.setValueCount(valueCount);
  }

  public static void setBytes(int index, byte[] bytes, LargeVarCharVector vector) {
    final long currentOffset =
        vector.offsetBuffer.getLong((long) index * BaseLargeVariableWidthVector.OFFSET_WIDTH);

    BitVectorHelper.setBit(vector.validityBuffer, index);
    vector.offsetBuffer.setLong(
        (long) (index + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH,
        currentOffset + bytes.length);
    vector.valueBuffer.setBytes(currentOffset, bytes, 0, bytes.length);
  }
}