aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-06-25 21:03:07 +0000
committerArne Juul <arnej@verizonmedia.com>2020-06-25 21:03:07 +0000
commit3e273894fc49c215764c9beb4270ea206038af5f (patch)
tree764ffe7c7eb13cb3bc73cb05238b7bede32ce558 /searchlib
parent06edd9d62c6abd9d6de96bca493095474d663060 (diff)
add "InnerProduct" distance metric
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp7
-rw-r--r--searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp37
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_header.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/configconverter.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_functions.h47
6 files changed, 92 insertions, 11 deletions
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
index b94186626c2..1191a7aa2e2 100644
--- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
+++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
@@ -289,6 +289,12 @@ AttributeManagerTest::testConfigConvert()
auto out = ConfigConverter::convert(a);
EXPECT_TRUE(out.distance_metric() == DistanceMetric::GeoDegrees);
}
+ { // distance metric (explicit)
+ CACA a;
+ a.distancemetric = AttributesConfig::Attribute::Distancemetric::INNERPRODUCT;
+ auto out = ConfigConverter::convert(a);
+ EXPECT_TRUE(out.distance_metric() == DistanceMetric::InnerProduct);
+ }
{ // hnsw index params (enabled)
auto dm_in = AttributesConfig::Attribute::Distancemetric::ANGULAR;
auto dm_out = DistanceMetric::Angular;
@@ -306,6 +312,7 @@ AttributeManagerTest::testConfigConvert()
EXPECT_TRUE(params.distance_metric() == dm_out);
EXPECT_TRUE(params.multi_threaded_indexing());
}
+
{ // hnsw index params (disabled)
CACA a;
a.index.hnsw.enabled = false;
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
index 59532919347..283a38ec95d 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
@@ -37,6 +37,7 @@ TEST(DistanceFunctionsTest, gives_expected_score)
auto euclid = make_distance_function(DistanceMetric::Euclidean, ct);
auto angular = make_distance_function(DistanceMetric::Angular, ct);
+ auto innerproduct = make_distance_function(DistanceMetric::InnerProduct, ct);
std::vector<double> p0{0.0, 0.0, 0.0};
std::vector<double> p1{1.0, 0.0, 0.0};
@@ -54,23 +55,43 @@ TEST(DistanceFunctionsTest, gives_expected_score)
double a12 = angular->calc(t(p1), t(p2));
double a13 = angular->calc(t(p1), t(p3));
double a23 = angular->calc(t(p2), t(p3));
- EXPECT_EQ(a12, 1.0);
- EXPECT_EQ(a13, 1.0);
- EXPECT_EQ(a23, 1.0);
+ EXPECT_DOUBLE_EQ(a12, 0.5);
+ EXPECT_DOUBLE_EQ(a13, 0.5);
+ EXPECT_DOUBLE_EQ(a23, 0.5);
double a14 = angular->calc(t(p1), t(p4));
double a24 = angular->calc(t(p2), t(p4));
- EXPECT_EQ(a14, 0.5);
- EXPECT_EQ(a24, 0.5);
+ EXPECT_FLOAT_EQ(a14, 0.25);
+ EXPECT_FLOAT_EQ(a24, 0.25);
double a34 = angular->calc(t(p3), t(p4));
- EXPECT_GT(a34, 0.999999 - 0.707107);
- EXPECT_LT(a34, 1.000001 - 0.707107);
+ EXPECT_FLOAT_EQ(a34, (1.0 - 0.707107)*0.5);
double a25 = angular->calc(t(p2), t(p5));
- EXPECT_EQ(a25, 2.0);
+ EXPECT_DOUBLE_EQ(a25, 1.0);
double a44 = angular->calc(t(p4), t(p4));
EXPECT_GE(a44, 0.0);
EXPECT_LT(a44, 0.000001);
+
+ double i12 = innerproduct->calc(t(p1), t(p2));
+ double i13 = innerproduct->calc(t(p1), t(p3));
+ double i23 = innerproduct->calc(t(p2), t(p3));
+ EXPECT_DOUBLE_EQ(i12, 1.0);
+ EXPECT_DOUBLE_EQ(i13, 1.0);
+ EXPECT_DOUBLE_EQ(i23, 1.0);
+ double i14 = innerproduct->calc(t(p1), t(p4));
+ double i24 = innerproduct->calc(t(p2), t(p4));
+ EXPECT_DOUBLE_EQ(i14, 0.5);
+ EXPECT_DOUBLE_EQ(i24, 0.5);
+ double i34 = innerproduct->calc(t(p3), t(p4));
+ EXPECT_FLOAT_EQ(i34, 1.0 - 0.707107);
+
+ double i25 = innerproduct->calc(t(p2), t(p5));
+ EXPECT_DOUBLE_EQ(i25, 2.0);
+
+ double i44 = innerproduct->calc(t(p4), t(p4));
+ EXPECT_GE(i44, 0.0);
+ EXPECT_LT(i44, 0.000001);
+
}
TEST(GeoDegreesTest, gives_expected_score)
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
index e97b0364af8..acf0d3d2fd6 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_header.cpp
@@ -27,6 +27,7 @@ const vespalib::string hnsw_distance_metric = "hnsw.distance_metric";
const vespalib::string euclidean = "euclidean";
const vespalib::string angular = "angular";
const vespalib::string geodegrees = "geodegrees";
+const vespalib::string innerproduct = "innerproduct";
const vespalib::string doc_id_limit_tag = "docIdLimit";
const vespalib::string enumerated_tag = "enumerated";
const vespalib::string unique_value_count_tag = "uniqueValueCount";
@@ -97,6 +98,7 @@ to_string(DistanceMetric metric)
case DistanceMetric::Euclidean: return euclidean;
case DistanceMetric::Angular: return angular;
case DistanceMetric::GeoDegrees: return geodegrees;
+ case DistanceMetric::InnerProduct: return innerproduct;
}
throw vespalib::IllegalArgumentException("Unknown distance metric " + std::to_string(static_cast<int>(metric)));
}
diff --git a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
index f435f79bf65..5a8b32ec01b 100644
--- a/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/configconverter.cpp
@@ -85,6 +85,9 @@ ConfigConverter::convert(const AttributesConfig::Attribute & cfg)
case CfgDm::GEODEGREES:
dm = DistanceMetric::GeoDegrees;
break;
+ case CfgDm::INNERPRODUCT:
+ dm = DistanceMetric::InnerProduct;
+ break;
}
retval.set_distance_metric(dm);
if (cfg.index.hnsw.enabled) {
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index 6b24a062727..b76994d6092 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -33,6 +33,13 @@ make_distance_function(DistanceMetric variant, ValueType::CellType cell_type)
return std::make_unique<GeoDegreesDistance<double>>();
}
break;
+ case DistanceMetric::InnerProduct:
+ if (cell_type == ValueType::CellType::FLOAT) {
+ return std::make_unique<InnerProductDistance<float>>();
+ } else {
+ return std::make_unique<InnerProductDistance<double>>();
+ }
+ break;
}
// not reached:
return DistanceFunction::UP();
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
index d37495e85da..6cb3f120ae9 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
@@ -54,7 +54,7 @@ template class SquaredEuclideanDistance<float>;
template class SquaredEuclideanDistance<double>;
/**
- * Calculates angular distance between vectors with assumed norm 1.
+ * Calculates angular distance between vectors
*/
template <typename FloatType>
class AngularDistance : public DistanceFunction {
@@ -67,6 +67,47 @@ public:
auto rhs_vector = rhs.typify<FloatType>();
size_t sz = lhs_vector.size();
assert(sz == rhs_vector.size());
+ auto a = &lhs_vector[0];
+ auto b = &rhs_vector[0];
+ double a_norm_sq = _computer.dotProduct(a, a, sz);
+ double b_norm_sq = _computer.dotProduct(b, b, sz);
+ double dot_product = _computer.dotProduct(a, b, sz);
+ double div = sqrt(a_norm_sq * b_norm_sq);
+ double cosine_similarity = (div > 0) ? (dot_product / div) : 0.0; // [-1, 1]
+ double score = (1.0 - cosine_similarity) * 0.5; // [1, 0]
+ return score;
+ }
+ double to_rawscore(double distance) const override {
+ double score = 1.0 - distance;
+ return score;
+ }
+ double calc_with_limit(const vespalib::tensor::TypedCells& lhs,
+ const vespalib::tensor::TypedCells& rhs,
+ double /*limit*/) const override
+ {
+ return calc(lhs, rhs);
+ }
+
+ const vespalib::hwaccelrated::IAccelrated & _computer;
+};
+
+template class AngularDistance<float>;
+template class AngularDistance<double>;
+
+/**
+ * Calculates angular distance between vectors with assumed norm 1.
+ */
+template <typename FloatType>
+class InnerProductDistance : public DistanceFunction {
+public:
+ InnerProductDistance()
+ : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
+ {}
+ double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override {
+ auto lhs_vector = lhs.typify<FloatType>();
+ auto rhs_vector = rhs.typify<FloatType>();
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz);
return std::max(0.0, score);
}
@@ -84,8 +125,8 @@ public:
const vespalib::hwaccelrated::IAccelrated & _computer;
};
-template class AngularDistance<float>;
-template class AngularDistance<double>;
+template class InnerProductDistance<float>;
+template class InnerProductDistance<double>;
/**
* Calculates great-circle distance between Latitude/Longitude pairs,