MurmurHasher.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.memory.util.hash;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.util.MemoryUtil;
import org.checkerframework.checker.nullness.qual.Nullable;
/**
* Implementation of the Murmur hashing algorithm. Details of the algorithm can be found in
* https://en.wikipedia.org/wiki/MurmurHash
*
* <p>Murmur hashing is computationally expensive, as it involves several integer multiplications.
* However, the produced hash codes have good quality in the sense that they are uniformly
* distributed in the universe.
*
* <p>Therefore, this algorithm is suitable for scenarios where uniform hashing is desired (e.g. in
* an open addressing hash table/hash set).
*/
public class MurmurHasher implements ArrowBufHasher {
private final int seed;
/** Creates a default Murmur hasher, with seed 0. */
public MurmurHasher() {
this(0);
}
/**
* Creates a Murmur hasher.
*
* @param seed the seed for the hasher.
*/
public MurmurHasher(int seed) {
this.seed = seed;
}
@Override
public int hashCode(long address, long length) {
return hashCode(address, length, seed);
}
@Override
public int hashCode(ArrowBuf buf, long offset, long length) {
buf.checkBytes(offset, offset + length);
return hashCode(buf.memoryAddress() + offset, length);
}
/**
* Calculates the hash code for a memory region.
*
* @param buf the buffer for the memory region.
* @param offset offset within the buffer for the memory region.
* @param length length of the memory region.
* @param seed the seed.
* @return the hash code.
*/
public static int hashCode(ArrowBuf buf, long offset, long length, int seed) {
buf.checkBytes(offset, offset + length);
return hashCode(buf.memoryAddress() + offset, length, seed);
}
/**
* Calculates the hash code for a memory region.
*
* @param address start address of the memory region.
* @param length length of the memory region.
* @param seed the seed.
* @return the hash code.
*/
public static int hashCode(long address, long length, int seed) {
int index = 0;
int hash = seed;
while (index + 4 <= length) {
int intValue = MemoryUtil.getInt(address + index);
hash = combineHashCode(hash, intValue);
index += 4;
}
if (index < length) {
// process remaining data as a integer in little endian
int intValue = 0;
for (long i = length - 1; i >= index; i--) {
intValue <<= 8;
intValue |= (MemoryUtil.getByte(address + i) & 0x000000ff);
index += 1;
}
hash = combineHashCode(hash, intValue);
}
return finalizeHashCode(hash, length);
}
/**
* Combine the current hash code and a new int value to calculate a new hash code.
*
* @param currentHashCode the current hash code.
* @param intValue the new int value.
* @return the new hah code.
*/
public static int combineHashCode(int currentHashCode, int intValue) {
int c1 = 0xcc9e2d51;
int c2 = 0x1b873593;
int r1 = 15;
int r2 = 13;
int m = 5;
int n = 0xe6546b64;
int k = intValue;
k = k * c1;
k = rotateLeft(k, r1);
k = k * c2;
int hash = currentHashCode;
hash = hash ^ k;
hash = rotateLeft(hash, r2);
hash = hash * m + n;
return hash;
}
/**
* Finalizing the hash code.
*
* @param hashCode the current hash code.
* @param length the length of the memory region.
* @return the finalized hash code.
*/
public static int finalizeHashCode(int hashCode, long length) {
hashCode = hashCode ^ (int) length;
hashCode = hashCode ^ (hashCode >>> 16);
hashCode = hashCode * 0x85ebca6b;
hashCode = hashCode ^ (hashCode >>> 13);
hashCode = hashCode * 0xc2b2ae35;
hashCode = hashCode ^ (hashCode >>> 16);
return hashCode;
}
private static int rotateLeft(int value, int count) {
return (value << count) | (value >>> (32 - count));
}
@Override
public boolean equals(@Nullable Object o) {
if (this == o) {
return true;
}
if (!(o instanceof MurmurHasher)) {
return false;
}
MurmurHasher that = (MurmurHasher) o;
return seed == that.seed;
}
@Override
public int hashCode() {
return seed;
}
}