summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@yahooinc.com>2023-04-20 09:27:12 +0000
committerArne Juul <arnej@yahooinc.com>2023-04-20 09:30:24 +0000
commit808175e4afc92fe5e6a7eddeaf23110df7d46f8f (patch)
treebbda83e2d55cbbcf5f954f7057ac5ad2951ef0e1 /searchlib
parent4e3fd9eeebeb403d4ad23bf70470d895cbdfbd1c (diff)
add BoundAngularDistance
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp72
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.h13
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp6
4 files changed, 144 insertions, 18 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
index f4faabde559..86b83b2c651 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
@@ -91,12 +91,26 @@ TEST(DistanceFunctionsTest, euclidean_int8_smoketest)
}
+double computeAngularChecked(TypedCells a, TypedCells b) {
+ static AngularDistanceFunctionFactory<float> flt_dff;
+ static AngularDistanceFunctionFactory<double> dbl_dff;
+ auto d_n = dbl_dff.for_query_vector(a);
+ auto d_f = flt_dff.for_query_vector(a);
+ auto d_r = dbl_dff.for_query_vector(b);
+ auto d_i = dbl_dff.for_insertion_vector(a);
+ // normal:
+ double result = d_n->calc(b);
+ // insert is exactly same:
+ EXPECT_EQ(d_i->calc(b), result);
+ // reverse:
+ EXPECT_DOUBLE_EQ(d_r->calc(a), result);
+ // float factory:
+ EXPECT_FLOAT_EQ(d_f->calc(b), result);
+ return result;
+}
+
TEST(DistanceFunctionsTest, angular_gives_expected_score)
{
- auto ct = vespalib::eval::CellType::DOUBLE;
-
- auto angular = make_distance_function(DistanceMetric::Angular, ct);
-
std::vector<double> p0{0.0, 0.0, 0.0};
std::vector<double> p1{1.0, 0.0, 0.0};
std::vector<double> p2{0.0, 1.0, 0.0};
@@ -105,10 +119,13 @@ TEST(DistanceFunctionsTest, angular_gives_expected_score)
std::vector<double> p5{0.0,-1.0, 0.0};
std::vector<double> p6{1.0, 2.0, 2.0};
+ AngularDistanceFunctionFactory<double> dff;
+ auto angular = dff.for_query_vector(t(p0));
+
constexpr double pi = 3.14159265358979323846;
- double a12 = angular->calc(t(p1), t(p2));
- double a13 = angular->calc(t(p1), t(p3));
- double a23 = angular->calc(t(p2), t(p3));
+ double a12 = computeAngularChecked(t(p1), t(p2));
+ double a13 = computeAngularChecked(t(p1), t(p3));
+ double a23 = computeAngularChecked(t(p2), t(p3));
EXPECT_DOUBLE_EQ(a12, 1.0);
EXPECT_DOUBLE_EQ(a13, 1.0);
EXPECT_DOUBLE_EQ(a23, 1.0);
@@ -117,44 +134,63 @@ TEST(DistanceFunctionsTest, angular_gives_expected_score)
double threshold = angular->convert_threshold(pi/2);
EXPECT_DOUBLE_EQ(threshold, 1.0);
- double a14 = angular->calc(t(p1), t(p4));
- double a24 = angular->calc(t(p2), t(p4));
+ double a14 = computeAngularChecked(t(p1), t(p4));
+ double a24 = computeAngularChecked(t(p2), t(p4));
EXPECT_FLOAT_EQ(a14, 0.5);
EXPECT_FLOAT_EQ(a24, 0.5);
EXPECT_FLOAT_EQ(angular->to_rawscore(a14), 1.0/(1.0 + pi/3));
threshold = angular->convert_threshold(pi/3);
EXPECT_DOUBLE_EQ(threshold, 0.5);
- double a34 = angular->calc(t(p3), t(p4));
+ double a34 = computeAngularChecked(t(p3), t(p4));
EXPECT_FLOAT_EQ(a34, (1.0 - 0.707107));
EXPECT_FLOAT_EQ(angular->to_rawscore(a34), 1.0/(1.0 + pi/4));
threshold = angular->convert_threshold(pi/4);
EXPECT_FLOAT_EQ(threshold, a34);
- double a25 = angular->calc(t(p2), t(p5));
+ double a25 = computeAngularChecked(t(p2), t(p5));
EXPECT_DOUBLE_EQ(a25, 2.0);
EXPECT_FLOAT_EQ(angular->to_rawscore(a25), 1.0/(1.0 + pi));
threshold = angular->convert_threshold(pi);
EXPECT_FLOAT_EQ(threshold, 2.0);
- double a44 = angular->calc(t(p4), t(p4));
+ double a44 = computeAngularChecked(t(p4), t(p4));
EXPECT_GE(a44, 0.0);
EXPECT_LT(a44, 0.000001);
EXPECT_FLOAT_EQ(angular->to_rawscore(a44), 1.0);
- double a66 = angular->calc(t(p6), t(p6));
+ double a66 = computeAngularChecked(t(p6), t(p6));
EXPECT_GE(a66, 0.0);
EXPECT_LT(a66, 0.000001);
EXPECT_FLOAT_EQ(angular->to_rawscore(a66), 1.0);
threshold = angular->convert_threshold(0.0);
EXPECT_FLOAT_EQ(threshold, 0.0);
- double a16 = angular->calc(t(p1), t(p6));
- double a26 = angular->calc(t(p2), t(p6));
- double a36 = angular->calc(t(p3), t(p6));
+ double a16 = computeAngularChecked(t(p1), t(p6));
+ double a26 = computeAngularChecked(t(p2), t(p6));
+ double a36 = computeAngularChecked(t(p3), t(p6));
EXPECT_FLOAT_EQ(a16, 1.0 - (1.0/3.0));
EXPECT_FLOAT_EQ(a26, 1.0 - (2.0/3.0));
EXPECT_FLOAT_EQ(a36, 1.0 - (2.0/3.0));
+
+ // check also that cell type conversion works:
+ std::vector<Int8Float> iv0{0.0, 0.0, 0.0};
+ std::vector<Int8Float> iv1{1.0, 0.0, 0.0};
+ std::vector<Int8Float> iv2{0.0, 1.0, 0.0};
+ std::vector<Int8Float> iv3{0.0, 0.0, 1.0};
+ std::vector<Int8Float> iv5{0.0,-1.0, 0.0};
+ std::vector<Int8Float> iv6{1.0, 2.0, 2.0};
+
+ EXPECT_DOUBLE_EQ(a12, computeAngularChecked(t(iv1), t(iv2)));
+ EXPECT_DOUBLE_EQ(a13, computeAngularChecked(t(iv1), t(iv3)));
+ EXPECT_DOUBLE_EQ(a14, computeAngularChecked(t(iv1), t(p4)));
+ EXPECT_DOUBLE_EQ(a24, computeAngularChecked(t(iv2), t(p4)));
+ EXPECT_DOUBLE_EQ(a34, computeAngularChecked(t(iv3), t(p4)));
+ EXPECT_DOUBLE_EQ(a25, computeAngularChecked(t(iv2), t(iv5)));
+ EXPECT_DOUBLE_EQ(a16, computeAngularChecked(t(iv1), t(iv6)));
+ EXPECT_DOUBLE_EQ(a26, computeAngularChecked(t(iv2), t(iv6)));
+ EXPECT_DOUBLE_EQ(a36, computeAngularChecked(t(iv3), t(iv6)));
+ EXPECT_DOUBLE_EQ(a66, computeAngularChecked(t(iv6), t(iv6)));
}
TEST(DistanceFunctionsTest, innerproduct_gives_expected_score)
@@ -177,7 +213,7 @@ TEST(DistanceFunctionsTest, innerproduct_gives_expected_score)
EXPECT_DOUBLE_EQ(i12, 1.0);
EXPECT_DOUBLE_EQ(i13, 1.0);
EXPECT_DOUBLE_EQ(i23, 1.0);
-
+
double i14 = innerproduct->calc(t(p1), t(p4));
double i24 = innerproduct->calc(t(p2), t(p4));
EXPECT_DOUBLE_EQ(i14, 0.5);
@@ -238,7 +274,7 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score)
double d25 = hamming->calc(t(points[2]), t(points[5]));
EXPECT_EQ(d25, 1.0);
EXPECT_DOUBLE_EQ(hamming->to_rawscore(d25), 1.0/(1.0 + 1.0));
-
+
double threshold = hamming->convert_threshold(0.25);
EXPECT_DOUBLE_EQ(threshold, 0.25);
threshold = hamming->convert_threshold(0.5);
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index 263bf91877a..6c6a1b34097 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -4,6 +4,7 @@
using vespalib::typify_invoke;
using vespalib::eval::TypifyCellType;
+using vespalib::eval::TypedCells;
namespace search::tensor {
@@ -49,4 +50,74 @@ AngularDistance::calc(const vespalib::eval::TypedCells& lhs,
template class AngularDistanceHW<float>;
template class AngularDistanceHW<double>;
+
+template<typename FloatType>
+class BoundAngularDistance : public BoundDistanceFunction {
+private:
+ const vespalib::hwaccelrated::IAccelrated & _computer;
+ mutable TemporaryVectorStore<FloatType> _tmpSpace;
+ const vespalib::ConstArrayRef<FloatType> _lhs;
+ double _lhs_norm_sq;
+public:
+ BoundAngularDistance(const vespalib::eval::TypedCells& lhs)
+ : BoundDistanceFunction(vespalib::eval::get_cell_type<FloatType>()),
+ _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()),
+ _tmpSpace(lhs.size),
+ _lhs(_tmpSpace.storeLhs(lhs))
+ {
+ auto a = &_lhs[0];
+ _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
+ }
+ double calc(const vespalib::eval::TypedCells& rhs) const override {
+ size_t sz = _lhs.size();
+ vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
+ assert(sz == rhs_vector.size());
+ auto a = &_lhs[0];
+ auto b = &rhs_vector[0];
+ double b_norm_sq = _computer.dotProduct(b, b, sz);
+ double squared_norms = _lhs_norm_sq * b_norm_sq;
+ double dot_product = _computer.dotProduct(a, b, sz);
+ double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
+ double cosine_similarity = dot_product / div;
+ double distance = 1.0 - cosine_similarity; // in range [0,2]
+ return distance;
+ }
+ double convert_threshold(double threshold) const override {
+ double cosine_similarity = cos(threshold);
+ return 1.0 - cosine_similarity;
+ }
+ double to_rawscore(double distance) const override {
+ double cosine_similarity = 1.0 - distance;
+ // should be in in range [-1,1] but roundoff may cause problems:
+ cosine_similarity = std::min(1.0, cosine_similarity);
+ cosine_similarity = std::max(-1.0, cosine_similarity);
+ double angle_distance = acos(cosine_similarity); // in range [0,pi]
+ double score = 1.0 / (1.0 + angle_distance);
+ return score;
+ }
+ double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
+ return calc(rhs);
+ }
+};
+
+template class BoundAngularDistance<float>;
+template class BoundAngularDistance<double>;
+
+template <typename FloatType>
+BoundDistanceFunction::UP
+AngularDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) {
+ using DFT = BoundAngularDistance<FloatType>;
+ return std::make_unique<DFT>(lhs);
+}
+
+template <typename FloatType>
+BoundDistanceFunction::UP
+AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) {
+ using DFT = BoundAngularDistance<FloatType>;
+ return std::make_unique<DFT>(lhs);
+}
+
+template class AngularDistanceFunctionFactory<float>;
+template class AngularDistanceFunctionFactory<double>;
+
}
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
index e3b21f9546e..4f10c25bdde 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -3,6 +3,8 @@
#pragma once
#include "distance_function.h"
+#include "bound_distance_function.h"
+#include "distance_function_factory.h"
#include <vespa/eval/eval/typed_cells.h>
#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
#include <cmath>
@@ -73,4 +75,15 @@ private:
const vespalib::hwaccelrated::IAccelrated & _computer;
};
+template <typename FloatType>
+class AngularDistanceFunctionFactory : public DistanceFunctionFactory {
+public:
+ AngularDistanceFunctionFactory()
+ : DistanceFunctionFactory(vespalib::eval::get_cell_type<FloatType>())
+ {}
+
+ BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override;
+ BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override;
+};
+
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index f96715bcf60..3d2beec2d19 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -93,6 +93,12 @@ std::unique_ptr<DistanceFunctionFactory>
make_distance_function_factory(search::attribute::DistanceMetric variant,
vespalib::eval::CellType cell_type)
{
+ if (variant == DistanceMetric::Angular) {
+ if (cell_type == CellType::DOUBLE) {
+ return std::make_unique<AngularDistanceFunctionFactory<double>>();
+ }
+ return std::make_unique<AngularDistanceFunctionFactory<float>>();
+ }
auto df = make_distance_function(variant, cell_type);
return std::make_unique<SimpleDistanceFunctionFactory>(std::move(df));
}