TestStructVector.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.arrow.vector;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertSame;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.complex.AbstractStructVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.complex.impl.NullableStructWriter;
import org.apache.arrow.vector.complex.writer.Float8Writer;
import org.apache.arrow.vector.complex.writer.IntWriter;
import org.apache.arrow.vector.holders.ComplexHolder;
import org.apache.arrow.vector.types.Types;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

public class TestStructVector {

  private BufferAllocator allocator;

  @BeforeEach
  public void init() {
    allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
  }

  @AfterEach
  public void terminate() throws Exception {
    allocator.close();
  }

  @Test
  public void testFieldMetadata() throws Exception {
    Map<String, String> metadata = new HashMap<>();
    metadata.put("k1", "v1");
    FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
    try (StructVector vector = new StructVector("struct", allocator, type, null)) {
      assertEquals(vector.getField().getMetadata(), type.getMetadata());
    }
  }

  @Test
  public void testMakeTransferPair() {
    try (final StructVector s1 = StructVector.empty("s1", allocator);
        final StructVector s2 = StructVector.empty("s2", allocator)) {
      s1.addOrGet("struct_child", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
      s1.makeTransferPair(s2);
      final FieldVector child = s1.getChild("struct_child");
      final FieldVector toChild =
          s2.addOrGet("struct_child", child.getField().getFieldType(), child.getClass());
      assertEquals(0, toChild.getValueCapacity());
      assertEquals(0, toChild.getDataBuffer().capacity());
      assertEquals(0, toChild.getValidityBuffer().capacity());
    }
  }

  @Test
  public void testAllocateAfterReAlloc() throws Exception {
    Map<String, String> metadata = new HashMap<>();
    metadata.put("k1", "v1");
    FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
    try (StructVector vector = new StructVector("struct", allocator, type, null)) {
      MinorType childtype = MinorType.INT;
      vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class);

      /*
       * Allocate the default size, and then, reAlloc. This should double the allocation.
       */
      vector.allocateNewSafe(); // Initial allocation
      vector.reAlloc(); // Double the allocation size of self, and all children.
      long savedValidityBufferCapacity = vector.getValidityBuffer().capacity();
      int savedValueCapacity = vector.getValueCapacity();

      /*
       * Clear and allocate again.
       */
      vector.clear();
      vector.allocateNewSafe();

      /*
       * Verify that the buffer sizes haven't changed.
       */
      assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity);
      assertEquals(vector.getValueCapacity(), savedValueCapacity);
    }
  }

  @Test
  public void testReadNullValue() {
    Map<String, String> metadata = new HashMap<>();
    metadata.put("k1", "v1");
    FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
    try (StructVector vector = new StructVector("struct", allocator, type, null)) {
      MinorType childtype = MinorType.INT;
      vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class);
      vector.setValueCount(2);

      IntVector intVector = (IntVector) vector.getChild("intchild");
      intVector.setSafe(0, 100);
      vector.setIndexDefined(0);
      intVector.setNull(1);
      vector.setNull(1);

      ComplexHolder holder = new ComplexHolder();
      vector.get(0, holder);
      assertNotEquals(0, holder.isSet);
      assertNotNull(holder.reader);

      vector.get(1, holder);
      assertEquals(0, holder.isSet);
      assertNull(holder.reader);
    }
  }

  @Test
  public void testGetPrimitiveVectors() {
    FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
    try (StructVector vector = new StructVector("struct", allocator, type, null)) {

      // add list vector
      vector.addOrGet("list", FieldType.nullable(MinorType.LIST.getType()), ListVector.class);
      ListVector listVector = vector.addOrGetList("list");
      listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));

      // add union vector
      vector.addOrGet("union", FieldType.nullable(MinorType.UNION.getType()), UnionVector.class);
      UnionVector unionVector = vector.addOrGetUnion("union");
      unionVector.addVector(new BigIntVector("bigInt", allocator));
      unionVector.addVector(new SmallIntVector("smallInt", allocator));

      // add varchar vector
      vector.addOrGet(
          "varchar", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class);

      List<ValueVector> primitiveVectors = vector.getPrimitiveVectors();
      assertEquals(4, primitiveVectors.size());
      assertEquals(MinorType.INT, primitiveVectors.get(0).getMinorType());
      assertEquals(MinorType.BIGINT, primitiveVectors.get(1).getMinorType());
      assertEquals(MinorType.SMALLINT, primitiveVectors.get(2).getMinorType());
      assertEquals(MinorType.VARCHAR, primitiveVectors.get(3).getMinorType());
    }
  }

  @Test
  public void testAddOrGetComplexChildVectors() {
    FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
    try (StructVector vector = new StructVector("struct", allocator, type, null)) {

      vector.addOrGetList("list");
      vector.addOrGetFixedSizeList("fixedList", 2);
      vector.addOrGetUnion("union");
      vector.addOrGetStruct("struct");
      vector.addOrGetMap("map", true);

      List<FieldVector> children = vector.getChildrenFromFields();
      assertEquals(5, children.size());
      assertEquals(MinorType.LIST, children.get(0).getMinorType());
      assertEquals(MinorType.FIXED_SIZE_LIST, children.get(1).getMinorType());
      assertEquals(MinorType.UNION, children.get(2).getMinorType());
      assertEquals(MinorType.STRUCT, children.get(3).getMinorType());
      assertEquals(MinorType.MAP, children.get(4).getMinorType());
    }
  }

  @Test
  public void testAddChildVectorsWithDuplicatedFieldNamesForConflictPolicyAppend() {
    final FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
    try (StructVector vector =
        new StructVector(
            "struct",
            allocator,
            type,
            null,
            AbstractStructVector.ConflictPolicy.CONFLICT_APPEND,
            true)) {
      final List<Field> initFields = new ArrayList<>();

      // Add a bit more fields to test against stability of the internal field
      // ordering mechanism of StructVector
      initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("int1", MinorType.INT.getType()));
      initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("int2", MinorType.INT.getType()));
      initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("int3", MinorType.INT.getType()));
      initFields.add(Field.nullable("uncertain-type", MinorType.INT.getType()));

      // To ensure duplicated field names don't mess up the original field order
      // in the struct vector
      initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("uncertain-type", MinorType.VARCHAR.getType()));

      vector.initializeChildrenFromFields(initFields);

      List<FieldVector> children = vector.getChildrenFromFields();
      assertEquals(11, children.size());
      assertEquals("varchar1", children.get(0).getName());
      assertEquals("int1", children.get(1).getName());
      assertEquals("varchar2", children.get(2).getName());
      assertEquals("int2", children.get(3).getName());
      assertEquals("varchar3", children.get(4).getName());
      assertEquals("int3", children.get(5).getName());
      assertEquals("uncertain-type", children.get(6).getName());
      assertEquals("varchar1", children.get(7).getName());
      assertEquals("varchar2", children.get(8).getName());
      assertEquals("varchar3", children.get(9).getName());
      assertEquals("uncertain-type", children.get(10).getName());
      assertEquals(MinorType.VARCHAR, children.get(0).getMinorType());
      assertEquals(MinorType.INT, children.get(1).getMinorType());
      assertEquals(MinorType.VARCHAR, children.get(2).getMinorType());
      assertEquals(MinorType.INT, children.get(3).getMinorType());
      assertEquals(MinorType.VARCHAR, children.get(4).getMinorType());
      assertEquals(MinorType.INT, children.get(5).getMinorType());
      assertEquals(MinorType.INT, children.get(6).getMinorType());
      assertEquals(MinorType.VARCHAR, children.get(7).getMinorType());
      assertEquals(MinorType.VARCHAR, children.get(8).getMinorType());
      assertEquals(MinorType.VARCHAR, children.get(9).getMinorType());
      assertEquals(MinorType.VARCHAR, children.get(10).getMinorType());
    }
  }

  @Test
  public void testAddChildVectorsWithDuplicatedFieldNamesForConflictPolicyReplace() {
    final FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
    try (StructVector vector =
        new StructVector(
            "struct",
            allocator,
            type,
            null,
            AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE,
            true)) {
      final List<Field> initFields = new ArrayList<>();

      // Add a bit more fields to test against stability of the internal field
      // ordering mechanism of StructVector
      initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("int1", MinorType.INT.getType()));
      initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("int2", MinorType.INT.getType()));
      initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("int3", MinorType.INT.getType()));
      initFields.add(Field.nullable("uncertain-type", MinorType.INT.getType()));

      // To ensure duplicated field names don't mess up the original field order
      // in the struct vector
      initFields.add(Field.nullable("varchar1", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("varchar2", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("varchar3", MinorType.VARCHAR.getType()));
      initFields.add(Field.nullable("uncertain-type", MinorType.VARCHAR.getType()));

      vector.initializeChildrenFromFields(initFields);

      List<FieldVector> children = vector.getChildrenFromFields();
      assertEquals(7, children.size());
      assertEquals("varchar1", children.get(0).getName());
      assertEquals("int1", children.get(1).getName());
      assertEquals("varchar2", children.get(2).getName());
      assertEquals("int2", children.get(3).getName());
      assertEquals("varchar3", children.get(4).getName());
      assertEquals("int3", children.get(5).getName());
      assertEquals("uncertain-type", children.get(6).getName());
      assertEquals(MinorType.VARCHAR, children.get(0).getMinorType());
      assertEquals(MinorType.INT, children.get(1).getMinorType());
      assertEquals(MinorType.VARCHAR, children.get(2).getMinorType());
      assertEquals(MinorType.INT, children.get(3).getMinorType());
      assertEquals(MinorType.VARCHAR, children.get(4).getMinorType());
      assertEquals(MinorType.INT, children.get(5).getMinorType());
      assertEquals(MinorType.VARCHAR, children.get(6).getMinorType());
    }
  }

  @Test
  public void testTypedGetters() {
    try (final StructVector s1 = StructVector.empty("s1", allocator)) {
      s1.addOrGet("struct_child", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
      assertEquals(IntVector.class, s1.getChild("struct_child", IntVector.class).getClass());
      assertEquals(IntVector.class, s1.getVectorById(0, IntVector.class).getClass());
    }
  }

  @Test
  public void testGetTransferPair() {
    try (final StructVector fromVector = simpleStructVector("s1", allocator)) {
      TransferPair tp = fromVector.getTransferPair(fromVector.getField(), allocator);
      final StructVector toVector = (StructVector) tp.getTo();
      // Field inside a new vector created by reusing a field should be the same in memory as the
      // original field.
      assertSame(toVector.getField(), fromVector.getField());
      toVector.clear();
    }
  }

  @Test
  public void testGetTransferPairWithFieldAndCallBack() {
    SchemaChangeCallBack callBack = new SchemaChangeCallBack();
    try (final StructVector fromVector = simpleStructVector("s1", allocator)) {
      TransferPair tp = fromVector.getTransferPair(fromVector.getField(), allocator, callBack);
      final StructVector toVector = (StructVector) tp.getTo();
      // Field inside a new vector created by reusing a field should be the same in memory as the
      // original field.
      assertSame(toVector.getField(), fromVector.getField());
      toVector.clear();
    }
  }

  private StructVector simpleStructVector(String name, BufferAllocator allocator) {
    final String INT_COL = "struct_int_child";
    final String FLT_COL = "struct_flt_child";
    StructVector structVector = StructVector.empty(name, allocator);
    final int size = 6; // number of structs

    NullableStructWriter structWriter = structVector.getWriter();
    structVector.addOrGet(
        INT_COL, FieldType.nullable(Types.MinorType.INT.getType()), IntVector.class);
    structVector.addOrGet(
        FLT_COL, FieldType.nullable(Types.MinorType.INT.getType()), IntVector.class);
    structVector.allocateNew();
    IntWriter intWriter = structWriter.integer(INT_COL);
    Float8Writer float8Writer = structWriter.float8(FLT_COL);

    for (int i = 0; i < size; i++) {
      structWriter.setPosition(i);
      structWriter.start();
      intWriter.writeInt(i);
      float8Writer.writeFloat8(i * .1);
      structWriter.end();
    }

    structWriter.setValueCount(size);

    return structVector;
  }
}