diff options
Diffstat (limited to 'searchlib/src')
8 files changed, 166 insertions, 22 deletions
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp index b94186626c2..1191a7aa2e2 100644 --- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -289,6 +289,12 @@ AttributeManagerTest::testConfigConvert() auto out = ConfigConverter::convert(a); EXPECT_TRUE(out.distance_metric() == DistanceMetric::GeoDegrees); } + { // distance metric (explicit) + CACA a; + a.distancemetric = AttributesConfig::Attribute::Distancemetric::INNERPRODUCT; + auto out = ConfigConverter::convert(a); + EXPECT_TRUE(out.distance_metric() == DistanceMetric::InnerProduct); + } { // hnsw index params (enabled) auto dm_in = AttributesConfig::Attribute::Distancemetric::ANGULAR; auto dm_out = DistanceMetric::Angular; @@ -306,6 +312,7 @@ AttributeManagerTest::testConfigConvert() EXPECT_TRUE(params.distance_metric() == dm_out); EXPECT_TRUE(params.multi_threaded_indexing()); } + { // hnsw index params (disabled) CACA a; a.index.hnsw.enabled = false; diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp index 59532919347..082d04f104b 100644 --- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp @@ -31,12 +31,11 @@ void verify_geo_miles(const DistanceFunction *dist_fun, } -TEST(DistanceFunctionsTest, gives_expected_score) +TEST(DistanceFunctionsTest, euclidean_gives_expected_score) { auto ct = vespalib::eval::ValueType::CellType::DOUBLE; auto euclid = make_distance_function(DistanceMetric::Euclidean, ct); - auto angular = make_distance_function(DistanceMetric::Angular, ct); std::vector<double> p0{0.0, 0.0, 0.0}; std::vector<double> p1{1.0, 0.0, 0.0}; @@ -44,33 +43,103 @@ TEST(DistanceFunctionsTest, gives_expected_score) std::vector<double> p3{0.0, 0.0, 1.0}; std::vector<double> p4{0.5, 0.5, 0.707107}; std::vector<double> p5{0.0,-1.0, 0.0}; + std::vector<double> p6{1.0, 2.0, 2.0}; double n4 = euclid->calc(t(p0), t(p4)); - EXPECT_GT(n4, 0.99999); - EXPECT_LT(n4, 1.00001); + EXPECT_FLOAT_EQ(n4, 1.0); double d12 = euclid->calc(t(p1), t(p2)); EXPECT_EQ(d12, 2.0); + EXPECT_DOUBLE_EQ(euclid->to_rawscore(d12), 1.0/(1.0 + sqrt(2.0))); +} + +TEST(DistanceFunctionsTest, angular_gives_expected_score) +{ + auto ct = vespalib::eval::ValueType::CellType::DOUBLE; + auto angular = make_distance_function(DistanceMetric::Angular, ct); + + std::vector<double> p0{0.0, 0.0, 0.0}; + std::vector<double> p1{1.0, 0.0, 0.0}; + std::vector<double> p2{0.0, 1.0, 0.0}; + std::vector<double> p3{0.0, 0.0, 1.0}; + std::vector<double> p4{0.5, 0.5, 0.707107}; + std::vector<double> p5{0.0,-1.0, 0.0}; + std::vector<double> p6{1.0, 2.0, 2.0}; + + constexpr double pi = 3.14159265358979323846; double a12 = angular->calc(t(p1), t(p2)); double a13 = angular->calc(t(p1), t(p3)); double a23 = angular->calc(t(p2), t(p3)); - EXPECT_EQ(a12, 1.0); - EXPECT_EQ(a13, 1.0); - EXPECT_EQ(a23, 1.0); + EXPECT_DOUBLE_EQ(a12, 1.0); + EXPECT_DOUBLE_EQ(a13, 1.0); + EXPECT_DOUBLE_EQ(a23, 1.0); + EXPECT_FLOAT_EQ(angular->to_rawscore(a12), 1.0/(1.0 + pi/2)); + double a14 = angular->calc(t(p1), t(p4)); double a24 = angular->calc(t(p2), t(p4)); - EXPECT_EQ(a14, 0.5); - EXPECT_EQ(a24, 0.5); + EXPECT_FLOAT_EQ(a14, 0.5); + EXPECT_FLOAT_EQ(a24, 0.5); + EXPECT_FLOAT_EQ(angular->to_rawscore(a14), 1.0/(1.0 + pi/3)); + double a34 = angular->calc(t(p3), t(p4)); - EXPECT_GT(a34, 0.999999 - 0.707107); - EXPECT_LT(a34, 1.000001 - 0.707107); + EXPECT_FLOAT_EQ(a34, (1.0 - 0.707107)); + EXPECT_FLOAT_EQ(angular->to_rawscore(a34), 1.0/(1.0 + pi/4)); double a25 = angular->calc(t(p2), t(p5)); - EXPECT_EQ(a25, 2.0); + EXPECT_DOUBLE_EQ(a25, 2.0); + EXPECT_FLOAT_EQ(angular->to_rawscore(a25), 1.0/(1.0 + pi)); double a44 = angular->calc(t(p4), t(p4)); EXPECT_GE(a44, 0.0); EXPECT_LT(a44, 0.000001); + EXPECT_FLOAT_EQ(angular->to_rawscore(a44), 1.0); + + double a66 = angular->calc(t(p6), t(p6)); + EXPECT_GE(a66, 0.0); + EXPECT_LT(a66, 0.000001); + EXPECT_FLOAT_EQ(angular->to_rawscore(a66), 1.0); + + double a16 = angular->calc(t(p1), t(p6)); + double a26 = angular->calc(t(p2), t(p6)); + double a36 = angular->calc(t(p3), t(p6)); + EXPECT_FLOAT_EQ(a16, 1.0 - (1.0/3.0)); + EXPECT_FLOAT_EQ(a26, 1.0 - (2.0/3.0)); + EXPECT_FLOAT_EQ(a36, 1.0 - (2.0/3.0)); +} + +TEST(DistanceFunctionsTest, innerproduct_gives_expected_score) +{ + auto ct = vespalib::eval::ValueType::CellType::DOUBLE; + + auto innerproduct = make_distance_function(DistanceMetric::InnerProduct, ct); + + std::vector<double> p0{0.0, 0.0, 0.0}; + std::vector<double> p1{1.0, 0.0, 0.0}; + std::vector<double> p2{0.0, 1.0, 0.0}; + std::vector<double> p3{0.0, 0.0, 1.0}; + std::vector<double> p4{0.5, 0.5, 0.707107}; + std::vector<double> p5{0.0,-1.0, 0.0}; + std::vector<double> p6{1.0, 2.0, 2.0}; + + double i12 = innerproduct->calc(t(p1), t(p2)); + double i13 = innerproduct->calc(t(p1), t(p3)); + double i23 = innerproduct->calc(t(p2), t(p3)); + EXPECT_DOUBLE_EQ(i12, 1.0); + EXPECT_DOUBLE_EQ(i13, 1.0); + EXPECT_DOUBLE_EQ(i23, 1.0); + double i14 = innerproduct->calc(t(p1), t(p4)); + double i24 = innerproduct->calc(t(p2), t(p4)); + EXPECT_DOUBLE_EQ(i14, 0.5); + EXPECT_DOUBLE_EQ(i24, 0.5); + double i34 = innerproduct->calc(t(p3), t(p4)); + EXPECT_FLOAT_EQ(i34, 1.0 - 0.707107); + + double i25 = innerproduct->calc(t(p2), t(p5)); + EXPECT_DOUBLE_EQ(i25, 2.0); + + double i44 = innerproduct->calc(t(p4), t(p4)); + EXPECT_GE(i44, 0.0); + EXPECT_LT(i44, 0.000001); } TEST(GeoDegreesTest, gives_expected_score) diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp index e97b0364af8..acf0d3d2fd6 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp @@ -27,6 +27,7 @@ const vespalib::string hnsw_distance_metric = "hnsw.distance_metric"; const vespalib::string euclidean = "euclidean"; const vespalib::string angular = "angular"; const vespalib::string geodegrees = "geodegrees"; +const vespalib::string innerproduct = "innerproduct"; const vespalib::string doc_id_limit_tag = "docIdLimit"; const vespalib::string enumerated_tag = "enumerated"; const vespalib::string unique_value_count_tag = "uniqueValueCount"; @@ -97,6 +98,7 @@ to_string(DistanceMetric metric) case DistanceMetric::Euclidean: return euclidean; case DistanceMetric::Angular: return angular; case DistanceMetric::GeoDegrees: return geodegrees; + case DistanceMetric::InnerProduct: return innerproduct; } throw vespalib::IllegalArgumentException("Unknown distance metric " + std::to_string(static_cast<int>(metric))); } diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp index f435f79bf65..5a8b32ec01b 100644 --- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp @@ -85,6 +85,9 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) case CfgDm::GEODEGREES: dm = DistanceMetric::GeoDegrees; break; + case CfgDm::INNERPRODUCT: + dm = DistanceMetric::InnerProduct; + break; } retval.set_distance_metric(dm); if (cfg.index.hnsw.enabled) { diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 0f106f693f8..35615b255c0 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -6,6 +6,7 @@ vespa_add_library(searchlib_tensor OBJECT dense_tensor_attribute_saver.cpp dense_tensor_store.cpp distance_function_factory.cpp + distance_functions.cpp generic_tensor_attribute.cpp generic_tensor_attribute_saver.cpp generic_tensor_store.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp index 6b24a062727..b76994d6092 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp @@ -33,6 +33,13 @@ make_distance_function(DistanceMetric variant, ValueType::CellType cell_type) return std::make_unique<GeoDegreesDistance<double>>(); } break; + case DistanceMetric::InnerProduct: + if (cell_type == ValueType::CellType::FLOAT) { + return std::make_unique<InnerProductDistance<float>>(); + } else { + return std::make_unique<InnerProductDistance<double>>(); + } + break; } // not reached: return DistanceFunction::UP(); diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp b/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp new file mode 100644 index 00000000000..9017628d42c --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp @@ -0,0 +1,19 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "distance_functions.h" + +namespace search::tensor { + +template class SquaredEuclideanDistance<float>; +template class SquaredEuclideanDistance<double>; + +template class AngularDistance<float>; +template class AngularDistance<double>; + +template class InnerProductDistance<float>; +template class InnerProductDistance<double>; + +template class GeoDegreesDistance<float>; +template class GeoDegreesDistance<double>; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h index d37495e85da..7e75920619f 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h @@ -50,11 +50,8 @@ public: const vespalib::hwaccelrated::IAccelrated & _computer; }; -template class SquaredEuclideanDistance<float>; -template class SquaredEuclideanDistance<double>; - /** - * Calculates angular distance between vectors with assumed norm 1. + * Calculates angular distance between vectors */ template <typename FloatType> class AngularDistance : public DistanceFunction { @@ -67,6 +64,51 @@ public: auto rhs_vector = rhs.typify<FloatType>(); size_t sz = lhs_vector.size(); assert(sz == rhs_vector.size()); + auto a = &lhs_vector[0]; + auto b = &rhs_vector[0]; + double a_norm_sq = _computer.dotProduct(a, a, sz); + double b_norm_sq = _computer.dotProduct(b, b, sz); + double squared_norms = a_norm_sq * b_norm_sq; + double dot_product = _computer.dotProduct(a, b, sz); + double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0; + double cosine_similarity = dot_product / div; + double distance = 1.0 - cosine_similarity; // in range [0,2] + return distance; + } + double to_rawscore(double distance) const override { + double cosine_similarity = 1.0 - distance; + // should be in in range [-1,1] but roundoff may cause problems: + cosine_similarity = std::min(1.0, cosine_similarity); + cosine_similarity = std::max(-1.0, cosine_similarity); + double angle_distance = acos(cosine_similarity); // in range [0,pi] + double score = 1.0 / (1.0 + angle_distance); + return score; + } + double calc_with_limit(const vespalib::tensor::TypedCells& lhs, + const vespalib::tensor::TypedCells& rhs, + double /*limit*/) const override + { + return calc(lhs, rhs); + } + + const vespalib::hwaccelrated::IAccelrated & _computer; +}; + +/** + * Calculates inner-product "distance" between vectors with assumed norm 1. + * Should give same ordering as Angular distance, but is less expensive. + */ +template <typename FloatType> +class InnerProductDistance : public DistanceFunction { +public: + InnerProductDistance() + : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) + {} + double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override { + auto lhs_vector = lhs.typify<FloatType>(); + auto rhs_vector = rhs.typify<FloatType>(); + size_t sz = lhs_vector.size(); + assert(sz == rhs_vector.size()); double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz); return std::max(0.0, score); } @@ -84,9 +126,6 @@ public: const vespalib::hwaccelrated::IAccelrated & _computer; }; -template class AngularDistance<float>; -template class AngularDistance<double>; - /** * Calculates great-circle distance between Latitude/Longitude pairs, * measured in degrees. Output distance is measured in meters. @@ -139,7 +178,4 @@ public: }; -template class GeoDegreesDistance<float>; -template class GeoDegreesDistance<double>; - } |