summaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-21 10:39:55 +0100
committerHenning Baldersheim <balder@yahoo-inc.com>2024-01-21 10:48:31 +0100
commite10b5df5e6f640a3d82be58647847c5b26340ad5 (patch)
tree77c018767757068fc024ded6c73da24be7bb1cdd /vespajlib
parentf481110c07a4759b98452deb35fe719ce6c49be4 (diff)
- Use xxh3 hash for better hashing, and cache the hash value.
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/src/main/java/com/yahoo/compress/Hasher.java17
-rw-r--r--vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java21
2 files changed, 32 insertions, 6 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/compress/Hasher.java b/vespajlib/src/main/java/com/yahoo/compress/Hasher.java
index 92a9ed26085..d991a98a5d0 100644
--- a/vespajlib/src/main/java/com/yahoo/compress/Hasher.java
+++ b/vespajlib/src/main/java/com/yahoo/compress/Hasher.java
@@ -8,8 +8,25 @@ import net.openhft.hashing.LongHashFunction;
* @author baldersheim
*/
public class Hasher {
+ private final LongHashFunction hasher;
/** Uses net.openhft.hashing.LongHashFunction.xx3() */
public static long xxh3(byte [] data) {
return LongHashFunction.xx3().hashBytes(data);
}
+ public static long xxh3(byte [] data, long seed) {
+ return LongHashFunction.xx3(seed).hashBytes(data);
+ }
+
+ private Hasher(LongHashFunction hasher) {
+ this.hasher = hasher;
+ }
+ public static Hasher of(long seed) {
+ return new Hasher(LongHashFunction.xx3(seed));
+ }
+ public long hash(long v) {
+ return hasher.hashLong(v);
+ }
+ public long hash(String s) {
+ return hasher.hashChars(s);
+ }
}
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java
index f841b7757fb..e3dfc297da6 100644
--- a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java
+++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.tensor;
+import com.yahoo.compress.Hasher;
import com.yahoo.tensor.impl.NumericTensorAddress;
import com.yahoo.tensor.impl.StringTensorAddress;
@@ -28,6 +29,9 @@ public abstract class TensorAddress implements Comparable<TensorAddress> {
return NumericTensorAddress.of(labels);
}
+ // If the hash ends up as 0 it will be recalculated everytime, but that is not a practical issue.
+ private int cached_hash = 0;
+
/** Returns the number of labels in this */
public abstract int size();
@@ -62,12 +66,15 @@ public abstract class TensorAddress implements Comparable<TensorAddress> {
@Override
public int hashCode() {
- int result = 1;
+ if (cached_hash != 0) return cached_hash;
+ Hasher hasher = Hasher.of(0);
+ long hash = 0;
for (int i = 0; i < size(); i++) {
- if (label(i) != null)
- result = 31 * result + label(i).hashCode();
+ hash = hash ^ hasher.hash(label(i));
}
- return result;
+ int low = (int) hash;
+ int high = (int) (hash >> 32);
+ return cached_hash = low ^ high;
}
@Override
@@ -105,7 +112,7 @@ public abstract class TensorAddress implements Comparable<TensorAddress> {
public static class Builder {
final TensorType type;
- final String[] labels;
+ String[] labels;
public Builder(TensorType type) {
this(type, new String[type.dimensions().size()]);
@@ -162,7 +169,9 @@ public abstract class TensorAddress implements Comparable<TensorAddress> {
public TensorAddress build() {
validate();
- return TensorAddress.of(labels);
+ var address = StringTensorAddress.unsafeOf(labels);
+ labels = null;
+ return address;
}
}