SimpleHasher.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.memory.util.hash;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.util.MemoryUtil;
import org.checkerframework.checker.nullness.qual.Nullable;
/**
* A simple hasher that calculates the hash code of integers as is, and does not perform any
* finalization. So the computation is extremely efficient.
*
* <p>This algorithm only provides the most basic semantics for the hash code. That is, if two
* objects are equal, they must have equal hash code. However, the quality of the produced hash code
* may not be good. In other words, the generated hash codes are far from being uniformly
* distributed in the universe.
*
* <p>Therefore, this algorithm is suitable only for scenarios where the most basic semantics of the
* hash code is required (e.g. in scenarios that require fast and proactive data pruning)
*
* <p>An object of this class is stateless, so it can be shared between threads.
*/
public class SimpleHasher implements ArrowBufHasher {
public static SimpleHasher INSTANCE = new SimpleHasher();
protected SimpleHasher() {}
/**
* Calculates the hash code for a memory region.
*
* @param address start address of the memory region.
* @param length length of the memory region.
* @return the hash code.
*/
@Override
public int hashCode(long address, long length) {
int hashValue = 0;
int index = 0;
while (index + 8 <= length) {
long longValue = MemoryUtil.getLong(address + index);
int longHash = getLongHashCode(longValue);
hashValue = combineHashCode(hashValue, longHash);
index += 8;
}
if (index + 4 <= length) {
int intValue = MemoryUtil.getInt(address + index);
int intHash = intValue;
hashValue = combineHashCode(hashValue, intHash);
index += 4;
}
while (index < length) {
byte byteValue = MemoryUtil.getByte(address + index);
int byteHash = byteValue;
hashValue = combineHashCode(hashValue, byteHash);
index += 1;
}
return finalizeHashCode(hashValue);
}
/**
* Calculates the hash code for a memory region.
*
* @param buf the buffer for the memory region.
* @param offset offset within the buffer for the memory region.
* @param length length of the memory region.
* @return the hash code.
*/
@Override
public int hashCode(ArrowBuf buf, long offset, long length) {
buf.checkBytes(offset, offset + length);
return hashCode(buf.memoryAddress() + offset, length);
}
protected int combineHashCode(int currentHashCode, int newHashCode) {
return currentHashCode * 37 + newHashCode;
}
protected int getLongHashCode(long longValue) {
return Long.hashCode(longValue);
}
protected int finalizeHashCode(int hashCode) {
return hashCode;
}
@Override
public int hashCode() {
return 123;
}
@Override
public boolean equals(@Nullable Object obj) {
return obj != null && (obj instanceof SimpleHasher);
}
}