diff options
author | Arne Juul <arnej@yahooinc.com> | 2023-06-26 06:31:44 +0000 |
---|---|---|
committer | Arne Juul <arnej@yahooinc.com> | 2023-06-26 08:37:46 +0000 |
commit | 643a09268b71ea0ebf128552874f1a3ee15aca2e (patch) | |
tree | 0d224c2130b9f9c213152c4fe0a2a00ecf625b1d | |
parent | f374e7d08c7e492130956c757ebdbd6cccdda74f (diff) |
add euclidean_distance
-rw-r--r-- | searchlib/abi-spec.json | 2 | ||||
-rwxr-xr-x | searchlib/src/main/javacc/RankingExpressionParser.jj | 17 | ||||
-rw-r--r-- | vespajlib/abi-spec.json | 16 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/tensor/functions/EuclideanDistance.java | 57 | ||||
-rw-r--r-- | vespajlib/src/test/java/com/yahoo/tensor/functions/EuclideanDistanceTestCase.java | 43 |
5 files changed, 134 insertions, 1 deletions
diff --git a/searchlib/abi-spec.json b/searchlib/abi-spec.json index f3fe86e261f..30f2cb5c6ea 100644 --- a/searchlib/abi-spec.json +++ b/searchlib/abi-spec.json @@ -946,6 +946,7 @@ "public final com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode tensorRandom()", "public final com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode tensorL1Normalize()", "public final com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode tensorL2Normalize()", + "public final com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode tensorEuclideanDistance()", "public final com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode tensorMatmul()", "public final com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode tensorSoftmax()", "public final com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode tensorXwPlusB()", @@ -1098,6 +1099,7 @@ "public static final int RANDOM", "public static final int L1_NORMALIZE", "public static final int L2_NORMALIZE", + "public static final int EUCLIDEAN_DISTANCE", "public static final int MATMUL", "public static final int SOFTMAX", "public static final int XW_PLUS_B", diff --git a/searchlib/src/main/javacc/RankingExpressionParser.jj b/searchlib/src/main/javacc/RankingExpressionParser.jj index 42b5f2c191a..744e629893e 100755 --- a/searchlib/src/main/javacc/RankingExpressionParser.jj +++ b/searchlib/src/main/javacc/RankingExpressionParser.jj @@ -138,6 +138,7 @@ TOKEN : <RANDOM: "random"> | <L1_NORMALIZE: "l1_normalize"> | <L2_NORMALIZE: "l2_normalize"> | + <EUCLIDEAN_DISTANCE: "euclidean_distance"> | <MATMUL: "matmul"> | <SOFTMAX: "softmax"> | <XW_PLUS_B: "xw_plus_b"> | @@ -379,6 +380,7 @@ TensorFunctionNode tensorFunction() : tensorExpression = tensorRandom() | tensorExpression = tensorL1Normalize() | tensorExpression = tensorL2Normalize() | + tensorExpression = tensorEuclideanDistance() | tensorExpression = tensorMatmul() | tensorExpression = tensorSoftmax() | tensorExpression = tensorXwPlusB() | @@ -544,6 +546,18 @@ TensorFunctionNode tensorL2Normalize() : { return new TensorFunctionNode(new L2Normalize(TensorFunctionNode.wrap(tensor), dimension)); } } +TensorFunctionNode tensorEuclideanDistance() : +{ + ExpressionNode tensor1, tensor2; + String dimension; +} +{ + <EUCLIDEAN_DISTANCE> <LBRACE> tensor1 = expression() <COMMA> tensor2 = expression() <COMMA> dimension = identifier() <RBRACE> + { return new TensorFunctionNode(new EuclideanDistance(TensorFunctionNode.wrap(tensor1), + TensorFunctionNode.wrap(tensor2), + dimension)); } +} + TensorFunctionNode tensorMatmul() : { ExpressionNode tensor1, tensor2; @@ -701,6 +715,7 @@ String tensorFunctionName() : ( <RANDOM> { return token.image; } ) | ( <L1_NORMALIZE> { return token.image; } ) | ( <L2_NORMALIZE> { return token.image; } ) | + ( <EUCLIDEAN_DISTANCE> { return token.image; } ) | ( <MATMUL> { return token.image; } ) | ( <SOFTMAX> { return token.image; } ) | ( <XW_PLUS_B> { return token.image; } ) | @@ -1041,4 +1056,4 @@ String label() : String string() : {} { <STRING> { return token.image.substring(1, token.image.length() - 1); } -}
\ No newline at end of file +} diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json index 676e212f5c6..3b9f494dc50 100644 --- a/vespajlib/abi-spec.json +++ b/vespajlib/abi-spec.json @@ -1740,6 +1740,22 @@ ], "fields" : [ ] }, + "com.yahoo.tensor.functions.EuclideanDistance" : { + "superClass" : "com.yahoo.tensor.functions.CompositeTensorFunction", + "interfaces" : [ ], + "attributes" : [ + "public" + ], + "methods" : [ + "public void <init>(com.yahoo.tensor.functions.TensorFunction, com.yahoo.tensor.functions.TensorFunction, java.lang.String)", + "public java.util.List arguments()", + "public com.yahoo.tensor.functions.TensorFunction withArguments(java.util.List)", + "public com.yahoo.tensor.functions.PrimitiveTensorFunction toPrimitive()", + "public java.lang.String toString(com.yahoo.tensor.functions.ToStringContext)", + "public int hashCode()" + ], + "fields" : [ ] + }, "com.yahoo.tensor.functions.Expand" : { "superClass" : "com.yahoo.tensor.functions.CompositeTensorFunction", "interfaces" : [ ], diff --git a/vespajlib/src/main/java/com/yahoo/tensor/functions/EuclideanDistance.java b/vespajlib/src/main/java/com/yahoo/tensor/functions/EuclideanDistance.java new file mode 100644 index 00000000000..4feddf9f808 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/tensor/functions/EuclideanDistance.java @@ -0,0 +1,57 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.tensor.functions; + +import com.yahoo.tensor.evaluation.Name; + +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +/** + * @author arnej + */ +public class EuclideanDistance<NAMETYPE extends Name> extends CompositeTensorFunction<NAMETYPE> { + + private final TensorFunction<NAMETYPE> arg1; + private final TensorFunction<NAMETYPE> arg2; + private final String dimension; + + public EuclideanDistance(TensorFunction<NAMETYPE> argument1, + TensorFunction<NAMETYPE> argument2, + String dimension) + { + this.arg1 = argument1; + this.arg2 = argument2; + this.dimension = dimension; + } + + @Override + public List<TensorFunction<NAMETYPE>> arguments() { return List.of(arg1, arg2); } + + @Override + public TensorFunction<NAMETYPE> withArguments(List<TensorFunction<NAMETYPE>> arguments) { + if ( arguments.size() != 2) + throw new IllegalArgumentException("EuclideanDistance must have 2 arguments, got " + arguments.size()); + return new EuclideanDistance<>(arguments.get(0), arguments.get(1), dimension); + } + + @Override + public PrimitiveTensorFunction<NAMETYPE> toPrimitive() { + TensorFunction<NAMETYPE> primitive1 = arg1.toPrimitive(); + TensorFunction<NAMETYPE> primitive2 = arg2.toPrimitive(); + // this should match the C++ optimized "l2_distance" + var diffs = new Join<>(primitive1, primitive2, ScalarFunctions.subtract()); + var squaredDiffs = new Map<>(diffs, ScalarFunctions.square()); + var sumOfSquares = new Reduce<>(squaredDiffs, Reduce.Aggregator.sum, dimension); + return new Map<>(sumOfSquares, ScalarFunctions.sqrt()); + } + + @Override + public String toString(ToStringContext<NAMETYPE> context) { + return "euclidean_distance(" + arg1.toString(context) + ", " + arg2.toString(context) + ", " + dimension + ")"; + } + + @Override + public int hashCode() { return Objects.hash("euclidean_distance", arg1, arg2, dimension); } + +} diff --git a/vespajlib/src/test/java/com/yahoo/tensor/functions/EuclideanDistanceTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/functions/EuclideanDistanceTestCase.java new file mode 100644 index 00000000000..9d06c313ecc --- /dev/null +++ b/vespajlib/src/test/java/com/yahoo/tensor/functions/EuclideanDistanceTestCase.java @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.tensor.functions; + +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; +import org.junit.Test; + +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * @author arnej + */ +public class EuclideanDistanceTestCase { + + @Test + public void testVectorDistances() { + var a = Tensor.from("tensor(x[3]):[1.0, 2.0, 3.0]"); + var b = Tensor.from("tensor(x[3]):[4.0, 2.0, 7.0]"); + var c = Tensor.from("tensor(x[3]):[1.0, 6.0, 6.0]"); + var op = new EuclideanDistance<>(new ConstantTensor<>(a), new ConstantTensor<>(b), "x"); + Tensor result = op.evaluate(); + assertEquals(5.0, result.asDouble(), 0.000001); + op = new EuclideanDistance<>(new ConstantTensor<>(b), new ConstantTensor<>(a), "x"); + result = op.evaluate(); + assertEquals(5.0, result.asDouble(), 0.000001); + op = new EuclideanDistance<>(new ConstantTensor<>(c), new ConstantTensor<>(a), "x"); + result = op.evaluate(); + assertEquals(5.0, result.asDouble(), 0.000001); + } + + @Test + public void testDistancesInMixed() { + var a = Tensor.from("tensor(c{},x[3]):{foo:[1.0, 2.0, 3.0],bar:[0.0, 0.0, 0.0]}"); + var b = Tensor.from("tensor(c{},x[3]):{foo:[4.0, 2.0, 7.0],bar:[12.0, 0.0, 5.0]}"); + var op = new EuclideanDistance<>(new ConstantTensor<>(a), new ConstantTensor<>(b), "x"); + Tensor result = op.evaluate(); + var expect = Tensor.from("tensor(c{}):{foo:5.0,bar:13.0}"); + assertEquals(expect, result); + } + +} |