diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-06-25 21:03:07 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-06-25 21:03:07 +0000 |
commit | 3e273894fc49c215764c9beb4270ea206038af5f (patch) | |
tree | 764ffe7c7eb13cb3bc73cb05238b7bede32ce558 /searchlib | |
parent | 06edd9d62c6abd9d6de96bca493095474d663060 (diff) |
add "InnerProduct" distance metric
Diffstat (limited to 'searchlib')
6 files changed, 92 insertions, 11 deletions
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp index b94186626c2..1191a7aa2e2 100644 --- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -289,6 +289,12 @@ AttributeManagerTest::testConfigConvert() auto out = ConfigConverter::convert(a); EXPECT_TRUE(out.distance_metric() == DistanceMetric::GeoDegrees); } + { // distance metric (explicit) + CACA a; + a.distancemetric = AttributesConfig::Attribute::Distancemetric::INNERPRODUCT; + auto out = ConfigConverter::convert(a); + EXPECT_TRUE(out.distance_metric() == DistanceMetric::InnerProduct); + } { // hnsw index params (enabled) auto dm_in = AttributesConfig::Attribute::Distancemetric::ANGULAR; auto dm_out = DistanceMetric::Angular; @@ -306,6 +312,7 @@ AttributeManagerTest::testConfigConvert() EXPECT_TRUE(params.distance_metric() == dm_out); EXPECT_TRUE(params.multi_threaded_indexing()); } + { // hnsw index params (disabled) CACA a; a.index.hnsw.enabled = false; diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp index 59532919347..283a38ec95d 100644 --- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp @@ -37,6 +37,7 @@ TEST(DistanceFunctionsTest, gives_expected_score) auto euclid = make_distance_function(DistanceMetric::Euclidean, ct); auto angular = make_distance_function(DistanceMetric::Angular, ct); + auto innerproduct = make_distance_function(DistanceMetric::InnerProduct, ct); std::vector<double> p0{0.0, 0.0, 0.0}; std::vector<double> p1{1.0, 0.0, 0.0}; @@ -54,23 +55,43 @@ TEST(DistanceFunctionsTest, gives_expected_score) double a12 = angular->calc(t(p1), t(p2)); double a13 = angular->calc(t(p1), t(p3)); double a23 = angular->calc(t(p2), t(p3)); - EXPECT_EQ(a12, 1.0); - EXPECT_EQ(a13, 1.0); - EXPECT_EQ(a23, 1.0); + EXPECT_DOUBLE_EQ(a12, 0.5); + EXPECT_DOUBLE_EQ(a13, 0.5); + EXPECT_DOUBLE_EQ(a23, 0.5); double a14 = angular->calc(t(p1), t(p4)); double a24 = angular->calc(t(p2), t(p4)); - EXPECT_EQ(a14, 0.5); - EXPECT_EQ(a24, 0.5); + EXPECT_FLOAT_EQ(a14, 0.25); + EXPECT_FLOAT_EQ(a24, 0.25); double a34 = angular->calc(t(p3), t(p4)); - EXPECT_GT(a34, 0.999999 - 0.707107); - EXPECT_LT(a34, 1.000001 - 0.707107); + EXPECT_FLOAT_EQ(a34, (1.0 - 0.707107)*0.5); double a25 = angular->calc(t(p2), t(p5)); - EXPECT_EQ(a25, 2.0); + EXPECT_DOUBLE_EQ(a25, 1.0); double a44 = angular->calc(t(p4), t(p4)); EXPECT_GE(a44, 0.0); EXPECT_LT(a44, 0.000001); + + double i12 = innerproduct->calc(t(p1), t(p2)); + double i13 = innerproduct->calc(t(p1), t(p3)); + double i23 = innerproduct->calc(t(p2), t(p3)); + EXPECT_DOUBLE_EQ(i12, 1.0); + EXPECT_DOUBLE_EQ(i13, 1.0); + EXPECT_DOUBLE_EQ(i23, 1.0); + double i14 = innerproduct->calc(t(p1), t(p4)); + double i24 = innerproduct->calc(t(p2), t(p4)); + EXPECT_DOUBLE_EQ(i14, 0.5); + EXPECT_DOUBLE_EQ(i24, 0.5); + double i34 = innerproduct->calc(t(p3), t(p4)); + EXPECT_FLOAT_EQ(i34, 1.0 - 0.707107); + + double i25 = innerproduct->calc(t(p2), t(p5)); + EXPECT_DOUBLE_EQ(i25, 2.0); + + double i44 = innerproduct->calc(t(p4), t(p4)); + EXPECT_GE(i44, 0.0); + EXPECT_LT(i44, 0.000001); + } TEST(GeoDegreesTest, gives_expected_score) diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp index e97b0364af8..acf0d3d2fd6 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp @@ -27,6 +27,7 @@ const vespalib::string hnsw_distance_metric = "hnsw.distance_metric"; const vespalib::string euclidean = "euclidean"; const vespalib::string angular = "angular"; const vespalib::string geodegrees = "geodegrees"; +const vespalib::string innerproduct = "innerproduct"; const vespalib::string doc_id_limit_tag = "docIdLimit"; const vespalib::string enumerated_tag = "enumerated"; const vespalib::string unique_value_count_tag = "uniqueValueCount"; @@ -97,6 +98,7 @@ to_string(DistanceMetric metric) case DistanceMetric::Euclidean: return euclidean; case DistanceMetric::Angular: return angular; case DistanceMetric::GeoDegrees: return geodegrees; + case DistanceMetric::InnerProduct: return innerproduct; } throw vespalib::IllegalArgumentException("Unknown distance metric " + std::to_string(static_cast<int>(metric))); } diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp index f435f79bf65..5a8b32ec01b 100644 --- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp +++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp @@ -85,6 +85,9 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg) case CfgDm::GEODEGREES: dm = DistanceMetric::GeoDegrees; break; + case CfgDm::INNERPRODUCT: + dm = DistanceMetric::InnerProduct; + break; } retval.set_distance_metric(dm); if (cfg.index.hnsw.enabled) { diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp index 6b24a062727..b76994d6092 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp @@ -33,6 +33,13 @@ make_distance_function(DistanceMetric variant, ValueType::CellType cell_type) return std::make_unique<GeoDegreesDistance<double>>(); } break; + case DistanceMetric::InnerProduct: + if (cell_type == ValueType::CellType::FLOAT) { + return std::make_unique<InnerProductDistance<float>>(); + } else { + return std::make_unique<InnerProductDistance<double>>(); + } + break; } // not reached: return DistanceFunction::UP(); diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h index d37495e85da..6cb3f120ae9 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h @@ -54,7 +54,7 @@ template class SquaredEuclideanDistance<float>; template class SquaredEuclideanDistance<double>; /** - * Calculates angular distance between vectors with assumed norm 1. + * Calculates angular distance between vectors */ template <typename FloatType> class AngularDistance : public DistanceFunction { @@ -67,6 +67,47 @@ public: auto rhs_vector = rhs.typify<FloatType>(); size_t sz = lhs_vector.size(); assert(sz == rhs_vector.size()); + auto a = &lhs_vector[0]; + auto b = &rhs_vector[0]; + double a_norm_sq = _computer.dotProduct(a, a, sz); + double b_norm_sq = _computer.dotProduct(b, b, sz); + double dot_product = _computer.dotProduct(a, b, sz); + double div = sqrt(a_norm_sq * b_norm_sq); + double cosine_similarity = (div > 0) ? (dot_product / div) : 0.0; // [-1, 1] + double score = (1.0 - cosine_similarity) * 0.5; // [1, 0] + return score; + } + double to_rawscore(double distance) const override { + double score = 1.0 - distance; + return score; + } + double calc_with_limit(const vespalib::tensor::TypedCells& lhs, + const vespalib::tensor::TypedCells& rhs, + double /*limit*/) const override + { + return calc(lhs, rhs); + } + + const vespalib::hwaccelrated::IAccelrated & _computer; +}; + +template class AngularDistance<float>; +template class AngularDistance<double>; + +/** + * Calculates angular distance between vectors with assumed norm 1. + */ +template <typename FloatType> +class InnerProductDistance : public DistanceFunction { +public: + InnerProductDistance() + : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) + {} + double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override { + auto lhs_vector = lhs.typify<FloatType>(); + auto rhs_vector = rhs.typify<FloatType>(); + size_t sz = lhs_vector.size(); + assert(sz == rhs_vector.size()); double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz); return std::max(0.0, score); } @@ -84,8 +125,8 @@ public: const vespalib::hwaccelrated::IAccelrated & _computer; }; -template class AngularDistance<float>; -template class AngularDistance<double>; +template class InnerProductDistance<float>; +template class InnerProductDistance<double>; /** * Calculates great-circle distance between Latitude/Longitude pairs, |