MapVector.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.arrow.vector.complex;

import static org.apache.arrow.util.Preconditions.checkArgument;

import java.util.List;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.AddOrGetResult;
import org.apache.arrow.vector.BitVectorHelper;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.ZeroVector;
import org.apache.arrow.vector.complex.impl.UnionMapReader;
import org.apache.arrow.vector.complex.impl.UnionMapWriter;
import org.apache.arrow.vector.types.Types;
import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType.Map;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.util.CallBack;
import org.apache.arrow.vector.util.TransferPair;

/**
 * A MapVector is used to store entries of key/value pairs. It is a container vector that is
 * composed of a list of struct values with "key" and "value" fields. The MapVector is nullable, but
 * if a map is set at a given index, there must be an entry. In other words, the StructVector data
 * is non-nullable. Also for a given entry, the "key" is non-nullable, however the "value" can be
 * null.
 */
public class MapVector extends ListVector {

  public static final String KEY_NAME = "key";
  public static final String VALUE_NAME = "value";
  public static final String DATA_VECTOR_NAME = "entries";

  /**
   * Construct an empty MapVector with no data. Child vectors must be added subsequently.
   *
   * @param name The name of the vector.
   * @param allocator The allocator used for allocating/reallocating buffers.
   * @param keysSorted True if the map keys have been pre-sorted.
   * @return a new instance of MapVector.
   */
  public static MapVector empty(String name, BufferAllocator allocator, boolean keysSorted) {
    return new MapVector(name, allocator, FieldType.nullable(new Map(keysSorted)), null);
  }

  /**
   * Construct a MapVector instance.
   *
   * @param name The name of the vector.
   * @param allocator The allocator used for allocating/reallocating buffers.
   * @param fieldType The type definition of the MapVector.
   * @param callBack A schema change callback.
   */
  public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
    super(name, allocator, fieldType, callBack);
    defaultDataVectorName = DATA_VECTOR_NAME;
  }

  public MapVector(Field field, BufferAllocator allocator, CallBack callBack) {
    super(field, allocator, callBack);
    defaultDataVectorName = DATA_VECTOR_NAME;
  }

  /**
   * Initialize child vectors of the map from the given list of fields.
   *
   * @param children List of fields that will be children of this MapVector.
   */
  @Override
  public void initializeChildrenFromFields(List<Field> children) {
    checkArgument(
        children.size() == 1,
        "Maps have one List child. Found: %s",
        children.isEmpty() ? "none" : children);

    Field structField = children.get(0);
    MinorType minorType = Types.getMinorTypeForArrowType(structField.getType());
    checkArgument(
        minorType == MinorType.STRUCT && !structField.isNullable(),
        "Map data should be a non-nullable struct type");
    checkArgument(
        structField.getChildren().size() == 2,
        "Map data should be a struct with 2 children. Found: %s",
        children);

    Field keyField = structField.getChildren().get(0);
    checkArgument(!keyField.isNullable(), "Map data key type should be a non-nullable");

    AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(structField.getFieldType());
    checkArgument(
        addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector());

    addOrGetVector.getVector().initializeChildrenFromFields(structField.getChildren());
    this.field = new Field(this.field.getName(), this.field.getFieldType(), children);
  }

  /** Get the writer for this MapVector instance. */
  @Override
  public UnionMapWriter getWriter() {
    return new UnionMapWriter(this);
  }

  /** Get the reader for this MapVector instance. */
  @Override
  public UnionMapReader getReader() {
    if (reader == null) {
      reader = new UnionMapReader(this);
    }
    return (UnionMapReader) reader;
  }

  @Override
  public MinorType getMinorType() {
    return MinorType.MAP;
  }

  @Override
  public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
    return getTransferPair(ref, allocator, null);
  }

  @Override
  public TransferPair getTransferPair(Field field, BufferAllocator allocator) {
    return new TransferImpl(field, allocator, null);
  }

  @Override
  public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
    return new TransferImpl(ref, allocator, callBack);
  }

  @Override
  public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) {
    return new TransferImpl(field, allocator, callBack);
  }

  @Override
  public TransferPair makeTransferPair(ValueVector target) {
    return new MapVector.TransferImpl((MapVector) target);
  }

  private class TransferImpl implements TransferPair {

    MapVector to;
    TransferPair dataTransferPair;

    public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
      this(new MapVector(name, allocator, field.getFieldType(), callBack));
    }

    public TransferImpl(Field field, BufferAllocator allocator, CallBack callBack) {
      this(new MapVector(field, allocator, callBack));
    }

    public TransferImpl(MapVector to) {
      this.to = to;
      to.addOrGetVector(vector.getField().getFieldType());
      if (to.getDataVector() instanceof ZeroVector) {
        to.addOrGetVector(vector.getField().getFieldType());
      }
      dataTransferPair = getDataVector().makeTransferPair(to.getDataVector());
    }

    /**
     * Transfer this vector'data to another vector. The memory associated with this vector is
     * transferred to the allocator of target vector for accounting and management purposes.
     */
    @Override
    public void transfer() {
      to.clear();
      dataTransferPair.transfer();
      to.validityBuffer = transferBuffer(validityBuffer, to.allocator);
      to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator);
      to.lastSet = lastSet;
      if (valueCount > 0) {
        to.setValueCount(valueCount);
      }
      clear();
    }

    /**
     * Slice this vector at desired index and length and transfer the corresponding data to the
     * target vector.
     *
     * @param startIndex start position of the split in source vector.
     * @param length length of the split.
     */
    @Override
    public void splitAndTransfer(int startIndex, int length) {
      Preconditions.checkArgument(
          startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
          "Invalid parameters startIndex: %s, length: %s for valueCount: %s",
          startIndex,
          length,
          valueCount);
      final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
      final int sliceLength =
          offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
      to.clear();
      to.offsetBuffer = to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
      /* splitAndTransfer offset buffer */
      for (int i = 0; i < length + 1; i++) {
        final int relativeOffset =
            offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
        to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
      }
      /* splitAndTransfer validity buffer */
      splitAndTransferValidityBuffer(startIndex, length, to);
      /* splitAndTransfer data buffer */
      dataTransferPair.splitAndTransfer(startPoint, sliceLength);
      to.lastSet = length - 1;
      to.setValueCount(length);
    }

    /*
     * transfer the validity.
     */
    private void splitAndTransferValidityBuffer(int startIndex, int length, MapVector target) {
      int firstByteSource = BitVectorHelper.byteIndex(startIndex);
      int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
      int byteSizeTarget = getValidityBufferSizeFromCount(length);
      int offset = startIndex % 8;

      if (length > 0) {
        if (offset == 0) {
          // slice
          if (target.validityBuffer != null) {
            target.validityBuffer.getReferenceManager().release();
          }
          target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
          target.validityBuffer.getReferenceManager().retain(1);
        } else {
          /* Copy data
           * When the first bit starts from the middle of a byte (offset != 0),
           * copy data from src BitVector.
           * Each byte in the target is composed by a part in i-th byte,
           * another part in (i+1)-th byte.
           */
          target.allocateValidityBuffer(byteSizeTarget);

          for (int i = 0; i < byteSizeTarget - 1; i++) {
            byte b1 =
                BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
            byte b2 =
                BitVectorHelper.getBitsFromNextByte(
                    validityBuffer, firstByteSource + i + 1, offset);

            target.validityBuffer.setByte(i, (b1 + b2));
          }

          /* Copying the last piece is done in the following manner:
           * if the source vector has 1 or more bytes remaining, we copy
           * the last piece as a byte formed by shifting data
           * from the current byte and the next byte.
           *
           * if the source vector has no more bytes remaining
           * (we are at the last byte), we copy the last piece as a byte
           * by shifting data from the current byte.
           */
          if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
            byte b1 =
                BitVectorHelper.getBitsFromCurrentByte(
                    validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
            byte b2 =
                BitVectorHelper.getBitsFromNextByte(
                    validityBuffer, firstByteSource + byteSizeTarget, offset);

            target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
          } else {
            byte b1 =
                BitVectorHelper.getBitsFromCurrentByte(
                    validityBuffer, firstByteSource + byteSizeTarget - 1, offset);
            target.validityBuffer.setByte(byteSizeTarget - 1, b1);
          }
        }
      }
    }

    @Override
    public ValueVector getTo() {
      return to;
    }

    @Override
    public void copyValueSafe(int from, int to) {
      this.to.copyFrom(from, to, MapVector.this);
    }
  }
}