diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-21 22:18:30 +0100 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-21 22:18:30 +0100 |
commit | d7525c7ef9f63e275b466840dd30dad24e5d4779 (patch) | |
tree | 821a2b44ec26ab12c61e6e2c2a8da21521a7b303 /vespajlib | |
parent | 44bea161aac2bf0706dd4b30ac6bab0a63470dfd (diff) |
Use lz4-java and xxhash32
Diffstat (limited to 'vespajlib')
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java | 20 | ||||
-rw-r--r-- | vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java | 20 |
2 files changed, 27 insertions, 13 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java index f841b7757fb..1346ee7cf46 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java @@ -3,7 +3,10 @@ package com.yahoo.tensor; import com.yahoo.tensor.impl.NumericTensorAddress; import com.yahoo.tensor.impl.StringTensorAddress; +import net.jpountz.xxhash.XXHash32; +import net.jpountz.xxhash.XXHashFactory; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Objects; import java.util.Optional; @@ -16,6 +19,8 @@ import java.util.Optional; */ public abstract class TensorAddress implements Comparable<TensorAddress> { + private static final XXHash32 hasher = XXHashFactory.fastestJavaInstance().hash32(); + public static TensorAddress of(String[] labels) { return StringTensorAddress.of(labels); } @@ -28,6 +33,8 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return NumericTensorAddress.of(labels); } + private int cached_hash = 0; + /** Returns the number of labels in this */ public abstract int size(); @@ -62,12 +69,17 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { @Override public int hashCode() { - int result = 1; + if (cached_hash != 0) return cached_hash; + + int hash = 0; for (int i = 0; i < size(); i++) { - if (label(i) != null) - result = 31 * result + label(i).hashCode(); + String label = label(i); + if (label != null) { + byte [] buf = label.getBytes(StandardCharsets.UTF_8); + hash = hasher.hash(buf, 0, buf.length, hash); + } } - return result; + return cached_hash = hash; } @Override diff --git a/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java b/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java index 37c0fb87be0..74237a218fb 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/TensorFunctionBenchmark.java @@ -9,8 +9,10 @@ import com.yahoo.tensor.functions.Join; import com.yahoo.tensor.functions.Reduce; import com.yahoo.tensor.functions.TensorFunction; -import java.util.*; -import java.util.stream.Collectors; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + /** * Microbenchmark of tensor operations. @@ -91,7 +93,7 @@ public class TensorFunctionBenchmark { .value(random.nextDouble()); } } - return Collections.singletonList(builder.build()); + return List.of(builder.build()); } private static TensorType vectorType(TensorType.Builder builder, String name, TensorType.Dimension.Type type, int size) { @@ -136,22 +138,22 @@ public class TensorFunctionBenchmark { // ---------------- Mapped with extra space (sidesteps current special-case optimizations): time = new TensorFunctionBenchmark().benchmark(1000, vectors(100, 300, TensorType.Dimension.Type.mapped), TensorType.Dimension.Type.mapped, true); System.out.printf("Mapped vectors, x space time per join: %1$8.3f ms\n", time); + time = new TensorFunctionBenchmark().benchmark(1000, matrix(100, 300, TensorType.Dimension.Type.mapped), TensorType.Dimension.Type.mapped, true); System.out.printf("Mapped matrix, x space time per join: %1$8.3f ms\n", time); - /** 2.4Ghz Intel Core i9, Macbook Pro 2019 + /* 2.4Ghz Intel Core i9, Macbook Pro 2019 * Indexed unbound vectors, time per join: 0,067 ms * Indexed unbound matrix, time per join: 0,107 ms * Indexed bound vectors, time per join: 0,068 ms * Indexed bound matrix, time per join: 0,105 ms - * Mapped vectors, time per join: 1,780 ms - * Mapped matrix, time per join: 5,339 ms + * Mapped vectors, time per join: 1,342 ms + * Mapped matrix, time per join: 3,448 ms * Indexed vectors, x space time per join: 6,398 ms * Indexed matrix, x space time per join: 3,220 ms - * Mapped vectors, x space time per join: 13,026 ms - * Mapped matrix, x space time per join: 28,259 ms + * Mapped vectors, x space time per join: 14,984 ms + * Mapped matrix, x space time per join: 19,873 ms */ - } } |