diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-06-26 09:03:12 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-06-26 09:03:12 +0000 |
commit | 27b8596bb6508f43d03d0784378ea537347e9d3a (patch) | |
tree | 29da378571c001ca9cf532529e61d3086690fcb2 /searchlib/src | |
parent | 3e273894fc49c215764c9beb4270ea206038af5f (diff) |
adjust angular distance slighly
* now gives actual angle as final distance, in range [0,pi]
* extend unit tests and test to_rawscore for all metrics
* move explicit template instantiations to cpp file
Diffstat (limited to 'searchlib/src')
4 files changed, 61 insertions, 26 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp index 283a38ec95d..7d0f741e362 100644 --- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp @@ -45,32 +45,53 @@ TEST(DistanceFunctionsTest, gives_expected_score) std::vector<double> p3{0.0, 0.0, 1.0}; std::vector<double> p4{0.5, 0.5, 0.707107}; std::vector<double> p5{0.0,-1.0, 0.0}; + std::vector<double> p6{1.0, 2.0, 2.0}; double n4 = euclid->calc(t(p0), t(p4)); - EXPECT_GT(n4, 0.99999); - EXPECT_LT(n4, 1.00001); + EXPECT_FLOAT_EQ(n4, 1.0); double d12 = euclid->calc(t(p1), t(p2)); EXPECT_EQ(d12, 2.0); + EXPECT_DOUBLE_EQ(euclid->to_rawscore(d12), 1.0/(1.0 + sqrt(2.0))); + constexpr double pi = 3.14159265358979323846; double a12 = angular->calc(t(p1), t(p2)); double a13 = angular->calc(t(p1), t(p3)); double a23 = angular->calc(t(p2), t(p3)); - EXPECT_DOUBLE_EQ(a12, 0.5); - EXPECT_DOUBLE_EQ(a13, 0.5); - EXPECT_DOUBLE_EQ(a23, 0.5); + EXPECT_DOUBLE_EQ(a12, 1.0); + EXPECT_DOUBLE_EQ(a13, 1.0); + EXPECT_DOUBLE_EQ(a23, 1.0); + EXPECT_FLOAT_EQ(angular->to_rawscore(a12), 1.0/(1.0 + pi/2)); + double a14 = angular->calc(t(p1), t(p4)); double a24 = angular->calc(t(p2), t(p4)); - EXPECT_FLOAT_EQ(a14, 0.25); - EXPECT_FLOAT_EQ(a24, 0.25); + EXPECT_FLOAT_EQ(a14, 0.5); + EXPECT_FLOAT_EQ(a24, 0.5); + EXPECT_FLOAT_EQ(angular->to_rawscore(a14), 1.0/(1.0 + pi/3)); + double a34 = angular->calc(t(p3), t(p4)); - EXPECT_FLOAT_EQ(a34, (1.0 - 0.707107)*0.5); + EXPECT_FLOAT_EQ(a34, (1.0 - 0.707107)); + EXPECT_FLOAT_EQ(angular->to_rawscore(a34), 1.0/(1.0 + pi/4)); double a25 = angular->calc(t(p2), t(p5)); - EXPECT_DOUBLE_EQ(a25, 1.0); + EXPECT_DOUBLE_EQ(a25, 2.0); + EXPECT_FLOAT_EQ(angular->to_rawscore(a25), 1.0/(1.0 + pi)); double a44 = angular->calc(t(p4), t(p4)); EXPECT_GE(a44, 0.0); EXPECT_LT(a44, 0.000001); + EXPECT_FLOAT_EQ(angular->to_rawscore(a44), 1.0); + + double a66 = angular->calc(t(p6), t(p6)); + EXPECT_GE(a66, 0.0); + EXPECT_LT(a66, 0.000001); + EXPECT_FLOAT_EQ(angular->to_rawscore(a66), 1.0); + + double a16 = angular->calc(t(p1), t(p6)); + double a26 = angular->calc(t(p2), t(p6)); + double a36 = angular->calc(t(p3), t(p6)); + EXPECT_FLOAT_EQ(a16, 1.0 - (1.0/3.0)); + EXPECT_FLOAT_EQ(a26, 1.0 - (2.0/3.0)); + EXPECT_FLOAT_EQ(a36, 1.0 - (2.0/3.0)); double i12 = innerproduct->calc(t(p1), t(p2)); double i13 = innerproduct->calc(t(p1), t(p3)); diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 0f106f693f8..35615b255c0 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -6,6 +6,7 @@ vespa_add_library(searchlib_tensor OBJECT dense_tensor_attribute_saver.cpp dense_tensor_store.cpp distance_function_factory.cpp + distance_functions.cpp generic_tensor_attribute.cpp generic_tensor_attribute_saver.cpp generic_tensor_store.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp b/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp new file mode 100644 index 00000000000..9017628d42c --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp @@ -0,0 +1,19 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "distance_functions.h" + +namespace search::tensor { + +template class SquaredEuclideanDistance<float>; +template class SquaredEuclideanDistance<double>; + +template class AngularDistance<float>; +template class AngularDistance<double>; + +template class InnerProductDistance<float>; +template class InnerProductDistance<double>; + +template class GeoDegreesDistance<float>; +template class GeoDegreesDistance<double>; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h index 6cb3f120ae9..2fb191881dc 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h @@ -50,9 +50,6 @@ public: const vespalib::hwaccelrated::IAccelrated & _computer; }; -template class SquaredEuclideanDistance<float>; -template class SquaredEuclideanDistance<double>; - /** * Calculates angular distance between vectors */ @@ -71,14 +68,20 @@ public: auto b = &rhs_vector[0]; double a_norm_sq = _computer.dotProduct(a, a, sz); double b_norm_sq = _computer.dotProduct(b, b, sz); + double squared_norms = a_norm_sq * b_norm_sq; double dot_product = _computer.dotProduct(a, b, sz); - double div = sqrt(a_norm_sq * b_norm_sq); - double cosine_similarity = (div > 0) ? (dot_product / div) : 0.0; // [-1, 1] - double score = (1.0 - cosine_similarity) * 0.5; // [1, 0] - return score; + double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0; + double cosine_similarity = dot_product / div; + double distance = 1.0 - cosine_similarity; // in range [0,2] + return distance; } double to_rawscore(double distance) const override { - double score = 1.0 - distance; + double cosine_similarity = 1.0 - distance; + // should be in in range [-1,1] but roundoff may cause problems: + cosine_similarity = std::min(1.0, cosine_similarity); + cosine_similarity = std::max(-1.0, cosine_similarity); + double angle_distance = acos(cosine_similarity); // in range [0,pi] + double score = 1.0 / (1.0 + angle_distance); return score; } double calc_with_limit(const vespalib::tensor::TypedCells& lhs, @@ -91,9 +94,6 @@ public: const vespalib::hwaccelrated::IAccelrated & _computer; }; -template class AngularDistance<float>; -template class AngularDistance<double>; - /** * Calculates angular distance between vectors with assumed norm 1. */ @@ -125,9 +125,6 @@ public: const vespalib::hwaccelrated::IAccelrated & _computer; }; -template class InnerProductDistance<float>; -template class InnerProductDistance<double>; - /** * Calculates great-circle distance between Latitude/Longitude pairs, * measured in degrees. Output distance is measured in meters. @@ -180,7 +177,4 @@ public: }; -template class GeoDegreesDistance<float>; -template class GeoDegreesDistance<double>; - } |