aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-06-26 09:03:12 +0000
committerArne Juul <arnej@verizonmedia.com>2020-06-26 09:03:12 +0000
commit27b8596bb6508f43d03d0784378ea537347e9d3a (patch)
tree29da378571c001ca9cf532529e61d3086690fcb2 /searchlib/src
parent3e273894fc49c215764c9beb4270ea206038af5f (diff)
adjust angular distance slighly
* now gives actual angle as final distance, in range [0,pi] * extend unit tests and test to_rawscore for all metrics * move explicit template instantiations to cpp file
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_functions.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_functions.h28
4 files changed, 61 insertions, 26 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
index 283a38ec95d..7d0f741e362 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
@@ -45,32 +45,53 @@ TEST(DistanceFunctionsTest, gives_expected_score)
std::vector<double> p3{0.0, 0.0, 1.0};
std::vector<double> p4{0.5, 0.5, 0.707107};
std::vector<double> p5{0.0,-1.0, 0.0};
+ std::vector<double> p6{1.0, 2.0, 2.0};
double n4 = euclid->calc(t(p0), t(p4));
- EXPECT_GT(n4, 0.99999);
- EXPECT_LT(n4, 1.00001);
+ EXPECT_FLOAT_EQ(n4, 1.0);
double d12 = euclid->calc(t(p1), t(p2));
EXPECT_EQ(d12, 2.0);
+ EXPECT_DOUBLE_EQ(euclid->to_rawscore(d12), 1.0/(1.0 + sqrt(2.0)));
+ constexpr double pi = 3.14159265358979323846;
double a12 = angular->calc(t(p1), t(p2));
double a13 = angular->calc(t(p1), t(p3));
double a23 = angular->calc(t(p2), t(p3));
- EXPECT_DOUBLE_EQ(a12, 0.5);
- EXPECT_DOUBLE_EQ(a13, 0.5);
- EXPECT_DOUBLE_EQ(a23, 0.5);
+ EXPECT_DOUBLE_EQ(a12, 1.0);
+ EXPECT_DOUBLE_EQ(a13, 1.0);
+ EXPECT_DOUBLE_EQ(a23, 1.0);
+ EXPECT_FLOAT_EQ(angular->to_rawscore(a12), 1.0/(1.0 + pi/2));
+
double a14 = angular->calc(t(p1), t(p4));
double a24 = angular->calc(t(p2), t(p4));
- EXPECT_FLOAT_EQ(a14, 0.25);
- EXPECT_FLOAT_EQ(a24, 0.25);
+ EXPECT_FLOAT_EQ(a14, 0.5);
+ EXPECT_FLOAT_EQ(a24, 0.5);
+ EXPECT_FLOAT_EQ(angular->to_rawscore(a14), 1.0/(1.0 + pi/3));
+
double a34 = angular->calc(t(p3), t(p4));
- EXPECT_FLOAT_EQ(a34, (1.0 - 0.707107)*0.5);
+ EXPECT_FLOAT_EQ(a34, (1.0 - 0.707107));
+ EXPECT_FLOAT_EQ(angular->to_rawscore(a34), 1.0/(1.0 + pi/4));
double a25 = angular->calc(t(p2), t(p5));
- EXPECT_DOUBLE_EQ(a25, 1.0);
+ EXPECT_DOUBLE_EQ(a25, 2.0);
+ EXPECT_FLOAT_EQ(angular->to_rawscore(a25), 1.0/(1.0 + pi));
double a44 = angular->calc(t(p4), t(p4));
EXPECT_GE(a44, 0.0);
EXPECT_LT(a44, 0.000001);
+ EXPECT_FLOAT_EQ(angular->to_rawscore(a44), 1.0);
+
+ double a66 = angular->calc(t(p6), t(p6));
+ EXPECT_GE(a66, 0.0);
+ EXPECT_LT(a66, 0.000001);
+ EXPECT_FLOAT_EQ(angular->to_rawscore(a66), 1.0);
+
+ double a16 = angular->calc(t(p1), t(p6));
+ double a26 = angular->calc(t(p2), t(p6));
+ double a36 = angular->calc(t(p3), t(p6));
+ EXPECT_FLOAT_EQ(a16, 1.0 - (1.0/3.0));
+ EXPECT_FLOAT_EQ(a26, 1.0 - (2.0/3.0));
+ EXPECT_FLOAT_EQ(a36, 1.0 - (2.0/3.0));
double i12 = innerproduct->calc(t(p1), t(p2));
double i13 = innerproduct->calc(t(p1), t(p3));
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index 0f106f693f8..35615b255c0 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_add_library(searchlib_tensor OBJECT
dense_tensor_attribute_saver.cpp
dense_tensor_store.cpp
distance_function_factory.cpp
+ distance_functions.cpp
generic_tensor_attribute.cpp
generic_tensor_attribute_saver.cpp
generic_tensor_store.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp b/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp
new file mode 100644
index 00000000000..9017628d42c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp
@@ -0,0 +1,19 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "distance_functions.h"
+
+namespace search::tensor {
+
+template class SquaredEuclideanDistance<float>;
+template class SquaredEuclideanDistance<double>;
+
+template class AngularDistance<float>;
+template class AngularDistance<double>;
+
+template class InnerProductDistance<float>;
+template class InnerProductDistance<double>;
+
+template class GeoDegreesDistance<float>;
+template class GeoDegreesDistance<double>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
index 6cb3f120ae9..2fb191881dc 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
@@ -50,9 +50,6 @@ public:
const vespalib::hwaccelrated::IAccelrated & _computer;
};
-template class SquaredEuclideanDistance<float>;
-template class SquaredEuclideanDistance<double>;
-
/**
* Calculates angular distance between vectors
*/
@@ -71,14 +68,20 @@ public:
auto b = &rhs_vector[0];
double a_norm_sq = _computer.dotProduct(a, a, sz);
double b_norm_sq = _computer.dotProduct(b, b, sz);
+ double squared_norms = a_norm_sq * b_norm_sq;
double dot_product = _computer.dotProduct(a, b, sz);
- double div = sqrt(a_norm_sq * b_norm_sq);
- double cosine_similarity = (div > 0) ? (dot_product / div) : 0.0; // [-1, 1]
- double score = (1.0 - cosine_similarity) * 0.5; // [1, 0]
- return score;
+ double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
+ double cosine_similarity = dot_product / div;
+ double distance = 1.0 - cosine_similarity; // in range [0,2]
+ return distance;
}
double to_rawscore(double distance) const override {
- double score = 1.0 - distance;
+ double cosine_similarity = 1.0 - distance;
+ // should be in in range [-1,1] but roundoff may cause problems:
+ cosine_similarity = std::min(1.0, cosine_similarity);
+ cosine_similarity = std::max(-1.0, cosine_similarity);
+ double angle_distance = acos(cosine_similarity); // in range [0,pi]
+ double score = 1.0 / (1.0 + angle_distance);
return score;
}
double calc_with_limit(const vespalib::tensor::TypedCells& lhs,
@@ -91,9 +94,6 @@ public:
const vespalib::hwaccelrated::IAccelrated & _computer;
};
-template class AngularDistance<float>;
-template class AngularDistance<double>;
-
/**
* Calculates angular distance between vectors with assumed norm 1.
*/
@@ -125,9 +125,6 @@ public:
const vespalib::hwaccelrated::IAccelrated & _computer;
};
-template class InnerProductDistance<float>;
-template class InnerProductDistance<double>;
-
/**
* Calculates great-circle distance between Latitude/Longitude pairs,
* measured in degrees. Output distance is measured in meters.
@@ -180,7 +177,4 @@ public:
};
-template class GeoDegreesDistance<float>;
-template class GeoDegreesDistance<double>;
-
}