TestBitVector.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.vector;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.stream.IntStream;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.memory.util.hash.MurmurHasher;
import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
import org.apache.arrow.vector.util.TransferPair;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
public class TestBitVector {
private static final String EMPTY_SCHEMA_PATH = "";
private BufferAllocator allocator;
@BeforeEach
public void init() {
allocator = new RootAllocator(Long.MAX_VALUE);
}
@AfterEach
public void terminate() throws Exception {
allocator.close();
}
@Test
public void testBitVectorCopyFromSafe() {
final int size = 20;
try (final BitVector src = new BitVector(EMPTY_SCHEMA_PATH, allocator);
final BitVector dst = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
src.allocateNew(size);
dst.allocateNew(10);
for (int i = 0; i < size; i++) {
src.set(i, i % 2);
}
src.setValueCount(size);
for (int i = 0; i < size; i++) {
dst.copyFromSafe(i, i, src);
}
dst.setValueCount(size);
for (int i = 0; i < size; i++) {
assertEquals(src.getObject(i), dst.getObject(i));
}
}
}
@Test
public void testSplitAndTransfer() throws Exception {
try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
sourceVector.allocateNew(40);
/* populate the bitvector -- 010101010101010101010101..... */
for (int i = 0; i < 40; i++) {
if ((i & 1) == 1) {
sourceVector.set(i, 1);
} else {
sourceVector.set(i, 0);
}
}
sourceVector.setValueCount(40);
/* check the vector output */
for (int i = 0; i < 40; i++) {
int result = sourceVector.get(i);
if ((i & 1) == 1) {
assertEquals(Integer.toString(1), Integer.toString(result));
} else {
assertEquals(Integer.toString(0), Integer.toString(result));
}
}
try (final BitVector toVector = new BitVector("toVector", allocator)) {
final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
/*
* form test cases such that we cover:
*
* (1) the start index is exactly where a particular byte starts in the source bit vector
* (2) the start index is randomly positioned within a byte in the source bit vector
* (2.1) the length is a multiple of 8
* (2.2) the length is not a multiple of 8
*/
final int[][] transferLengths = {{0, 8}, {8, 10}, {18, 0}, {18, 8}, {26, 0}, {26, 14}};
for (final int[] transferLength : transferLengths) {
final int start = transferLength[0];
final int length = transferLength[1];
transferPair.splitAndTransfer(start, length);
/* check the toVector output after doing splitAndTransfer */
for (int i = 0; i < length; i++) {
int actual = toVector.get(i);
int expected = sourceVector.get(start + i);
assertEquals(
expected,
actual,
"different data values not expected --> sourceVector index: "
+ (start + i)
+ " toVector index: "
+ i);
}
}
}
}
}
@Test
public void testSplitAndTransfer1() throws Exception {
try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
sourceVector.allocateNew(8190);
/* populate the bitvector */
for (int i = 0; i < 8190; i++) {
sourceVector.set(i, 1);
}
sourceVector.setValueCount(8190);
/* check the vector output */
for (int i = 0; i < 8190; i++) {
int result = sourceVector.get(i);
assertEquals(Integer.toString(1), Integer.toString(result));
}
try (final BitVector toVector = new BitVector("toVector", allocator)) {
final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
final int[][] transferLengths = {{0, 4095}, {4095, 4095}};
for (final int[] transferLength : transferLengths) {
final int start = transferLength[0];
final int length = transferLength[1];
transferPair.splitAndTransfer(start, length);
/* check the toVector output after doing splitAndTransfer */
for (int i = 0; i < length; i++) {
int actual = toVector.get(i);
int expected = sourceVector.get(start + i);
assertEquals(
expected,
actual,
"different data values not expected --> sourceVector index: "
+ (start + i)
+ " toVector index: "
+ i);
}
}
}
}
}
@Test
public void testSplitAndTransfer2() throws Exception {
try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
sourceVector.allocateNew(32);
/* populate the bitvector */
for (int i = 0; i < 32; i++) {
if ((i & 1) == 1) {
sourceVector.set(i, 1);
} else {
sourceVector.set(i, 0);
}
}
sourceVector.setValueCount(32);
/* check the vector output */
for (int i = 0; i < 32; i++) {
int result = sourceVector.get(i);
if ((i & 1) == 1) {
assertEquals(Integer.toString(1), Integer.toString(result));
} else {
assertEquals(Integer.toString(0), Integer.toString(result));
}
}
try (final BitVector toVector = new BitVector("toVector", allocator)) {
final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
final int[][] transferLengths = {
{5, 22}, {5, 24}, {5, 25}, {5, 27}, {0, 31}, {5, 7}, {2, 3}
};
for (final int[] transferLength : transferLengths) {
final int start = transferLength[0];
final int length = transferLength[1];
transferPair.splitAndTransfer(start, length);
/* check the toVector output after doing splitAndTransfer */
for (int i = 0; i < length; i++) {
int actual = toVector.get(i);
int expected = sourceVector.get(start + i);
assertEquals(
expected,
actual,
"different data values not expected --> sourceVector index: "
+ (start + i)
+ " toVector index: "
+ i);
}
}
}
}
}
@Test
public void testReallocAfterVectorTransfer1() {
try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
vector.allocateNew(4096);
int valueCapacity = vector.getValueCapacity();
assertEquals(4096, valueCapacity);
for (int i = 0; i < valueCapacity; i++) {
if ((i & 1) == 1) {
vector.setToOne(i);
}
}
for (int i = 0; i < valueCapacity; i++) {
if ((i & 1) == 1) {
assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i);
} else {
assertTrue(vector.isNull(i), "unexpected set bit at index: " + i);
}
}
/* trigger first realloc */
vector.setSafeToOne(valueCapacity);
assertEquals(valueCapacity * 2, vector.getValueCapacity());
for (int i = valueCapacity; i < valueCapacity * 2; i++) {
if ((i & 1) == 1) {
vector.setToOne(i);
}
}
for (int i = 0; i < valueCapacity * 2; i++) {
if (((i & 1) == 1) || (i == valueCapacity)) {
assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i);
} else {
assertTrue(vector.isNull(i), "unexpected set bit at index: " + i);
}
}
/* trigger second realloc */
vector.setSafeToOne(valueCapacity * 2);
assertEquals(valueCapacity * 4, vector.getValueCapacity());
for (int i = valueCapacity * 2; i < valueCapacity * 4; i++) {
if ((i & 1) == 1) {
vector.setToOne(i);
}
}
for (int i = 0; i < valueCapacity * 4; i++) {
if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) {
assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i);
} else {
assertTrue(vector.isNull(i), "unexpected set bit at index: " + i);
}
}
/* now transfer the vector */
TransferPair transferPair = vector.getTransferPair(allocator);
transferPair.transfer();
final BitVector toVector = (BitVector) transferPair.getTo();
assertEquals(valueCapacity * 4, toVector.getValueCapacity());
/* realloc the toVector */
toVector.setSafeToOne(valueCapacity * 4);
for (int i = 0; i < toVector.getValueCapacity(); i++) {
if (i <= valueCapacity * 4) {
if (((i & 1) == 1)
|| (i == valueCapacity)
|| (i == valueCapacity * 2)
|| (i == valueCapacity * 4)) {
assertEquals(1, toVector.get(i), "unexpected cleared bit at index: " + i);
} else {
assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i);
}
} else {
assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i);
}
}
toVector.close();
}
}
@Test
public void testReallocAfterVectorTransfer2() {
try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
vector.allocateNew(4096);
int valueCapacity = vector.getValueCapacity();
assertEquals(4096, valueCapacity);
for (int i = 0; i < valueCapacity; i++) {
if ((i & 1) == 1) {
vector.set(i, 1);
}
}
for (int i = 0; i < valueCapacity; i++) {
if ((i & 1) == 1) {
assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i);
} else {
assertTrue(vector.isNull(i), "unexpected set bit at index: " + i);
}
}
/* trigger first realloc */
vector.setSafe(valueCapacity, 1, 1);
assertEquals(valueCapacity * 2, vector.getValueCapacity());
for (int i = valueCapacity; i < valueCapacity * 2; i++) {
if ((i & 1) == 1) {
vector.set(i, 1);
}
}
for (int i = 0; i < valueCapacity * 2; i++) {
if (((i & 1) == 1) || (i == valueCapacity)) {
assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i);
} else {
assertTrue(vector.isNull(i), "unexpected set bit at index: " + i);
}
}
/* trigger second realloc */
vector.setSafe(valueCapacity * 2, 1, 1);
assertEquals(valueCapacity * 4, vector.getValueCapacity());
for (int i = valueCapacity * 2; i < valueCapacity * 4; i++) {
if ((i & 1) == 1) {
vector.set(i, 1);
}
}
for (int i = 0; i < valueCapacity * 4; i++) {
if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) {
assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i);
} else {
assertTrue(vector.isNull(i), "unexpected set bit at index: " + i);
}
}
/* now transfer the vector */
TransferPair transferPair = vector.getTransferPair(allocator);
transferPair.transfer();
final BitVector toVector = (BitVector) transferPair.getTo();
assertEquals(valueCapacity * 4, toVector.getValueCapacity());
/* realloc the toVector */
toVector.setSafe(valueCapacity * 4, 1, 1);
for (int i = 0; i < toVector.getValueCapacity(); i++) {
if (i <= valueCapacity * 4) {
if (((i & 1) == 1)
|| (i == valueCapacity)
|| (i == valueCapacity * 2)
|| (i == valueCapacity * 4)) {
assertFalse(toVector.isNull(i), "unexpected cleared bit at index: " + i);
} else {
assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i);
}
} else {
assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i);
}
}
toVector.close();
}
}
@Test
public void testBitVector() {
// Create a new value vector for 1024 integers
try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
vector.allocateNew(1024);
vector.setValueCount(1024);
// Put and set a few values
vector.set(0, 1);
vector.set(1, 0);
vector.set(100, 0);
vector.set(1022, 1);
vector.setValueCount(1024);
assertEquals(1, vector.get(0));
assertEquals(0, vector.get(1));
assertEquals(0, vector.get(100));
assertEquals(1, vector.get(1022));
assertEquals(1020, vector.getNullCount());
// test setting the same value twice
vector.set(0, 1);
vector.set(0, 1);
vector.set(1, 0);
vector.set(1, 0);
assertEquals(1, vector.get(0));
assertEquals(0, vector.get(1));
// test toggling the values
vector.set(0, 0);
vector.set(1, 1);
assertEquals(0, vector.get(0));
assertEquals(1, vector.get(1));
// should not change
assertEquals(1020, vector.getNullCount());
// Ensure null value
assertTrue(vector.isNull(3));
// unset the previously set bits
vector.setNull(0);
vector.setNull(1);
vector.setNull(100);
vector.setNull(1022);
// this should set all the array to 0
assertEquals(1024, vector.getNullCount());
// set all the array to 1
for (int i = 0; i < 1024; ++i) {
assertEquals(1024 - i, vector.getNullCount());
vector.set(i, 1);
}
assertEquals(0, vector.getNullCount());
vector.allocateNew(1015);
vector.setValueCount(1015);
// ensure it has been zeroed
assertEquals(1015, vector.getNullCount());
vector.set(0, 1);
vector.set(1014, 1); // ensure that the last item of the last byte is allocated
assertEquals(1013, vector.getNullCount());
vector.zeroVector();
assertEquals(1015, vector.getNullCount());
// set all the array to 1
for (int i = 0; i < 1015; ++i) {
assertEquals(1015 - i, vector.getNullCount());
vector.set(i, 1);
}
assertEquals(0, vector.getNullCount());
}
}
@Test
public void testBitVectorRangeSetAllOnes() {
validateRange(1000, 0, 1000);
validateRange(1000, 0, 1);
validateRange(1000, 1, 2);
validateRange(1000, 5, 6);
validateRange(1000, 5, 10);
validateRange(1000, 5, 150);
validateRange(1000, 5, 27);
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
validateRange(1000, 10 + i, 27 + j);
validateRange(1000, i, j);
}
}
}
private void validateRange(int length, int start, int count) {
String desc = "[" + start + ", " + (start + count) + ") ";
try (BitVector bitVector = new BitVector("bits", allocator)) {
bitVector.reset();
bitVector.allocateNew(length);
bitVector.setRangeToOne(start, count);
for (int i = 0; i < start; i++) {
assertTrue(bitVector.isNull(i), desc + i);
}
for (int i = start; i < start + count; i++) {
assertEquals(1, bitVector.get(i), desc + i);
}
for (int i = start + count; i < length; i++) {
assertTrue(bitVector.isNull(i), desc + i);
}
}
}
@Test
public void testBitVectorHashCode() {
final int size = 6;
try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
ValueVectorDataPopulator.setVector(vector, 0, 1, null, 0, 1, null);
int[] hashCodes = new int[size];
IntStream.range(0, size).forEach(i -> hashCodes[i] = vector.hashCode(i));
assertTrue(hashCodes[0] == hashCodes[3]);
assertTrue(hashCodes[1] == hashCodes[4]);
assertTrue(hashCodes[2] == hashCodes[5]);
assertFalse(hashCodes[0] == hashCodes[1]);
assertFalse(hashCodes[0] == hashCodes[2]);
assertFalse(hashCodes[1] == hashCodes[2]);
MurmurHasher hasher = new MurmurHasher();
IntStream.range(0, size).forEach(i -> hashCodes[i] = vector.hashCode(i, hasher));
assertTrue(hashCodes[0] == hashCodes[3]);
assertTrue(hashCodes[1] == hashCodes[4]);
assertTrue(hashCodes[2] == hashCodes[5]);
assertFalse(hashCodes[0] == hashCodes[1]);
assertFalse(hashCodes[0] == hashCodes[2]);
assertFalse(hashCodes[1] == hashCodes[2]);
}
}
@Test
public void testGetTransferPairWithField() {
final BitVector fromVector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
final TransferPair transferPair = fromVector.getTransferPair(fromVector.getField(), allocator);
final BitVector toVector = (BitVector) transferPair.getTo();
// Field inside a new vector created by reusing a field should be the same in memory as the
// original field.
assertSame(fromVector.getField(), toVector.getField());
}
}