diff options
Diffstat (limited to 'vespajlib')
4 files changed, 81 insertions, 7 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/compress/Hasher.java b/vespajlib/src/main/java/com/yahoo/compress/Hasher.java index 92a9ed26085..7a3d34eca7b 100644 --- a/vespajlib/src/main/java/com/yahoo/compress/Hasher.java +++ b/vespajlib/src/main/java/com/yahoo/compress/Hasher.java @@ -8,8 +8,25 @@ import net.openhft.hashing.LongHashFunction; * @author baldersheim */ public class Hasher { + private final LongHashFunction hasher; /** Uses net.openhft.hashing.LongHashFunction.xx3() */ public static long xxh3(byte [] data) { return LongHashFunction.xx3().hashBytes(data); } + public static long xxh3(byte [] data, long seed) { + return LongHashFunction.xx3(seed).hashBytes(data); + } + + private Hasher(LongHashFunction hasher) { + this.hasher = hasher; + } + public static Hasher withSeed(long seed) { + return new Hasher(LongHashFunction.xx3(seed)); + } + public long hash(long v) { + return hasher.hashLong(v); + } + public long hash(String s) { + return hasher.hashChars(s); + } } diff --git a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java index d755d6d6337..30dd1d6dc29 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/MixedTensor.java @@ -9,6 +9,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; @@ -307,7 +308,7 @@ public class MixedTensor implements Tensor { public static class BoundBuilder extends Builder { /** For each sparse partial address, hold a dense subspace */ - private final Map<TensorAddress, double[]> denseSubspaceMap = new HashMap<>(); + private final Map<TensorAddress, double[]> denseSubspaceMap = new LinkedHashMap<>(); private final Index.Builder indexBuilder; private final Index index; private final TensorType denseSubtype; diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java index f841b7757fb..e011717cdd8 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.tensor; +import com.yahoo.compress.Hasher; import com.yahoo.tensor.impl.NumericTensorAddress; import com.yahoo.tensor.impl.StringTensorAddress; @@ -28,6 +29,9 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return NumericTensorAddress.of(labels); } + // If the hash ends up as 0 it will be recalculated everytime, but that is not a practical issue. + private int cached_hash = 0; + /** Returns the number of labels in this */ public abstract int size(); @@ -62,12 +66,18 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { @Override public int hashCode() { - int result = 1; + if (cached_hash != 0) return cached_hash; + Hasher hasher = Hasher.withSeed(0); + long hash = 0; for (int i = 0; i < size(); i++) { - if (label(i) != null) - result = 31 * result + label(i).hashCode(); + String label = label(i); + if (label != null) { + hash = hash ^ hasher.hash(label); + } } - return result; + int low = (int) hash; + int high = (int) (hash >> 32); + return cached_hash = low ^ high; } @Override @@ -105,7 +115,7 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { public static class Builder { final TensorType type; - final String[] labels; + String[] labels; public Builder(TensorType type) { this(type, new String[type.dimensions().size()]); @@ -162,7 +172,9 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { public TensorAddress build() { validate(); - return TensorAddress.of(labels); + var address = StringTensorAddress.unsafeOf(labels); + labels = null; + return address; } } diff --git a/vespajlib/src/test/java/com/yahoo/tensor/TensorAddressTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/TensorAddressTestCase.java new file mode 100644 index 00000000000..79202e3f07e --- /dev/null +++ b/vespajlib/src/test/java/com/yahoo/tensor/TensorAddressTestCase.java @@ -0,0 +1,44 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.tensor; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; + +/** + * Test for tensor address. + * + * @author baldersheim + */ +public class TensorAddressTestCase { + private void equal(Object a, Object b) { + assertEquals(a.hashCode(), b.hashCode()); + assertEquals(a, b); + } + private void notEqual(Object a, Object b) { + assertNotEquals(a.hashCode(), b.hashCode()); // This might not hold, but is bad if not very rare + assertNotEquals(a, b); + } + @Test + void testStringVersusNumericAddressEquality() { + equal(TensorAddress.ofLabels("1"), TensorAddress.of(1)); + } + @Test + void testInEquality() { + notEqual(TensorAddress.ofLabels("1"), TensorAddress.ofLabels("2")); + notEqual(TensorAddress.of(1), TensorAddress.of(2)); + } + @Test + void testDimensionsEffectsEqualityAndHash() { + notEqual(TensorAddress.ofLabels("1"), TensorAddress.ofLabels("1", "1")); + notEqual(TensorAddress.of(1), TensorAddress.of(1, 1)); + } + @Test + void testAllowNullDimension() { + TensorAddress s1 = TensorAddress.ofLabels("1", null, "2"); + TensorAddress s2 = TensorAddress.ofLabels("1", "2"); + assertNotEquals(s1, s2); + assertEquals(s1.hashCode(), s2.hashCode()); + } +} |