Merge pull request #26893 from vespa-engine/arnej/remove-unused-distance-functions-3

remove unused distance functions
author: Arne H Juul <arnej27959@users.noreply.github.com> 2023-04-27 23:20:10 +0200
committer: GitHub <noreply@github.com> 2023-04-27 23:20:10 +0200
commit: 9c04252f9c8573bdbb787b5e3a0f671487eb412e (patch)
tree: 2c111deed492fa8cb09ede189e2ca080c60c590d
parent: 27cff4355a2f2bced9da6357ec6c1d7024ba70ff (diff)
parent: 40d65770f45b6d4a16d19668487291c02597fa29 (diff)
21 files changed, 56 insertions, 590 deletions
diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp
index fd07529795a..b9599a0c75d 100644
--- a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp
+++ b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp
@@ -29,7 +29,6 @@ using search::attribute::DistanceMetric;
 using search::feature_t;
 using search::tensor::DenseTensorAttribute;
 using search::tensor::DistanceCalculator;
-using search::tensor::DistanceFunction;
 using search::tensor::SerializedFastValueAttribute;
 using search::tensor::TensorAttribute;
 using vespalib::eval::CellType;
@@ -49,9 +48,6 @@ vespalib::string denseSpecDouble("tensor(x[2])");
 vespalib::string denseSpecFloat("tensor<float>(x[2])");
 vespalib::string mixed_spec("tensor(m{},x[2])");
 
-DistanceFunction::UP euclid_d = search::tensor::make_distance_function(DistanceMetric::Euclidean, CellType::DOUBLE);
-DistanceFunction::UP euclid_f = search::tensor::make_distance_function(DistanceMetric::Euclidean, CellType::FLOAT);
-
 std::unique_ptr<Value> createTensor(const TensorSpec &spec) {
     return SimpleValue::from_spec(spec);
 }
@@ -119,14 +115,6 @@ struct Fixture {
         auto t = createTensor(_typeSpec, v1, v2);
         setTensor(docId, *t);
     }
-
-    const DistanceFunction &dist_fun() const {
-        if (_cfg.tensorType().cell_type() == CellType::FLOAT) {
-            return *euclid_f;
-        } else {
-            return *euclid_d;
-        }
-    }
 };
 
 template <bool strict>
@@ -136,9 +124,11 @@ SimpleResult find_matches(Fixture &env, const Value &qtv, double threshold = std
     auto &attr = *(env._attr);
 
     auto dff = search::tensor::make_distance_function_factory(DistanceMetric::Euclidean, qtv.cells().type);
-    DistanceCalculator dist_calc(attr, dff->for_query_vector(qtv.cells()));
+    auto df = dff->for_query_vector(qtv.cells());
+    threshold = df->convert_threshold(threshold);
+    DistanceCalculator dist_calc(attr, std::move(df));
     NearestNeighborDistanceHeap dh(2);
-    dh.set_distance_threshold(env.dist_fun().convert_threshold(threshold));
+    dh.set_distance_threshold(threshold);
     const GlobalFilter &filter = *env._global_filter;
     auto search = NearestNeighborIterator::create(strict, tfmd, dist_calc, dh, filter);
     if (strict) {
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
index 9b8ad0d26ce..a1b29c90986 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
@@ -76,10 +76,6 @@ namespace { const double sq_root_half = std::sqrt(0.5); }
 
 TEST(DistanceFunctionsTest, euclidean_gives_expected_score)
 {
-    auto ct = vespalib::eval::CellType::DOUBLE;
-
-    auto euclid = make_distance_function(DistanceMetric::Euclidean, ct);
-
     std::vector<double> p0{0.0, 0.0, 0.0};
     std::vector<double> p1{1.0, 0.0, 0.0};
     std::vector<double> p2{0.0, 1.0, 0.0};
@@ -92,6 +88,9 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score)
     EXPECT_FLOAT_EQ(n4, 1.0);
     double d12 = computeEuclideanChecked(t(p1), t(p2));
     EXPECT_EQ(d12, 2.0);
+
+    EuclideanDistanceFunctionFactory<double> dff;
+    auto euclid = dff.for_query_vector(t(p0));
     EXPECT_DOUBLE_EQ(euclid->to_rawscore(d12), 1.0/(1.0 + sqrt(2.0)));
     double threshold = euclid->convert_threshold(8.0);
     EXPECT_EQ(threshold, 64.0);
@@ -142,10 +141,6 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score)
 
 TEST(DistanceFunctionsTest, euclidean_int8_smoketest)
 {
-    auto ct = vespalib::eval::CellType::INT8;
-
-    auto euclid = make_distance_function(DistanceMetric::Euclidean, ct);
-
     std::vector<Int8Float> p0{0.0, 0.0, 0.0};
     std::vector<Int8Float> p1{1.0, 0.0, 0.0};
     std::vector<Int8Float> p5{0.0,-1.0, 0.0};
@@ -345,60 +340,10 @@ TEST(DistanceFunctionsTest, prenormalized_angular_gives_expected_score)
     EXPECT_DOUBLE_EQ(threshold, 1.0);
 }
 
-TEST(DistanceFunctionsTest, innerproduct_gives_expected_score)
-{
-    auto ct = vespalib::eval::CellType::DOUBLE;
-
-    auto innerproduct = make_distance_function(DistanceMetric::InnerProduct, ct);
-
-    std::vector<double> p0{0.0, 0.0, 0.0};
-    std::vector<double> p1{1.0, 0.0, 0.0};
-    std::vector<double> p2{0.0, 1.0, 0.0};
-    std::vector<double> p3{0.0, 0.0, 1.0};
-    std::vector<double> p4{0.5, 0.5, sq_root_half};
-    std::vector<double> p5{0.0,-1.0, 0.0};
-    std::vector<double> p6{1.0, 2.0, 2.0};
-
-    double i12 = innerproduct->calc(t(p1), t(p2));
-    double i13 = innerproduct->calc(t(p1), t(p3));
-    double i23 = innerproduct->calc(t(p2), t(p3));
-    EXPECT_DOUBLE_EQ(i12, 1.0);
-    EXPECT_DOUBLE_EQ(i13, 1.0);
-    EXPECT_DOUBLE_EQ(i23, 1.0);
-
-    double i14 = innerproduct->calc(t(p1), t(p4));
-    double i24 = innerproduct->calc(t(p2), t(p4));
-    EXPECT_DOUBLE_EQ(i14, 0.5);
-    EXPECT_DOUBLE_EQ(i24, 0.5);
-    double i34 = innerproduct->calc(t(p3), t(p4));
-    EXPECT_FLOAT_EQ(i34, 1.0 - sq_root_half);
-
-    double i25 = innerproduct->calc(t(p2), t(p5));
-    EXPECT_DOUBLE_EQ(i25, 2.0);
-
-    double i44 = innerproduct->calc(t(p4), t(p4));
-    EXPECT_GE(i44, 0.0);
-    EXPECT_LT(i44, 0.000001);
-
-    double i66 = innerproduct->calc(t(p6), t(p6));
-    EXPECT_GE(i66, 0.0);
-    EXPECT_LT(i66, 0.000001);
-
-    double threshold = innerproduct->convert_threshold(0.25);
-    EXPECT_DOUBLE_EQ(threshold, 0.25);
-    threshold = innerproduct->convert_threshold(0.5);
-    EXPECT_DOUBLE_EQ(threshold, 0.5);
-    threshold = innerproduct->convert_threshold(1.0);
-    EXPECT_DOUBLE_EQ(threshold, 1.0);
-}
 
 TEST(DistanceFunctionsTest, hamming_gives_expected_score)
 {
-    static HammingDistanceFunctionFactory<Int8Float> dff;
-    auto ct = vespalib::eval::CellType::DOUBLE;
-
-    auto hamming = make_distance_function(DistanceMetric::Hamming, ct);
-
+    static HammingDistanceFunctionFactory<double> dff;
     std::vector<std::vector<double>>
         points{{0.0, 0.0, 0.0},
                {1.0, 0.0, 0.0},
@@ -407,31 +352,30 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score)
                {0.5, 0.5, 0.5},
                {0.0,-1.0, 1.0},
                {1.0, 1.0, 1.0}};
+    auto hamming = dff.for_query_vector(t(points[0]));
     for (const auto & p : points) {
-        double h0 = hamming->calc(t(p), t(p));
-        EXPECT_EQ(h0, 0.0);
-        EXPECT_EQ(hamming->to_rawscore(h0), 1.0);
         auto dist_fun = dff.for_query_vector(t(p));
-        EXPECT_EQ(dist_fun->calc(t(p)), 0.0);
+        double h0 = dist_fun->calc(t(p));
+        EXPECT_EQ(h0, 0.0);
         EXPECT_EQ(dist_fun->to_rawscore(h0), 1.0);
     }
-    double d12 = hamming->calc(t(points[1]), t(points[2]));
+    double d12 = dff.for_query_vector(t(points[1]))->calc(t(points[2]));
     EXPECT_EQ(d12, 3.0);
     EXPECT_DOUBLE_EQ(hamming->to_rawscore(d12), 1.0/(1.0 + 3.0));
 
-    double d16 = hamming->calc(t(points[1]), t(points[6]));
+    double d16 = dff.for_query_vector(t(points[1]))->calc(t(points[6]));
     EXPECT_EQ(d16, 2.0);
     EXPECT_DOUBLE_EQ(hamming->to_rawscore(d16), 1.0/(1.0 + 2.0));
 
-    double d23 = hamming->calc(t(points[2]), t(points[3]));
+    double d23 = dff.for_query_vector(t(points[2]))->calc(t(points[3]));
     EXPECT_EQ(d23, 3.0);
     EXPECT_DOUBLE_EQ(hamming->to_rawscore(d23), 1.0/(1.0 + 3.0));
 
-    double d24 = hamming->calc(t(points[2]), t(points[4]));
+    double d24 = dff.for_query_vector(t(points[2]))->calc(t(points[4]));
     EXPECT_EQ(d24, 3.0);
     EXPECT_DOUBLE_EQ(hamming->to_rawscore(d24), 1.0/(1.0 + 3.0));
 
-    double d25 = hamming->calc(t(points[2]), t(points[5]));
+    double d25 = dff.for_query_vector(t(points[2]))->calc(t(points[5]));
     EXPECT_EQ(d25, 1.0);
     EXPECT_DOUBLE_EQ(hamming->to_rawscore(d25), 1.0/(1.0 + 1.0));
 
@@ -445,8 +389,8 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score)
     std::vector<Int8Float> bytes_a = { 0, 1, 2, 4, 8, 16, 32, 64, -128,  0, 1, 2, 4, 8, 16, 32, 64, -128, 0, 1, 2 };
     std::vector<Int8Float> bytes_b = { 1, 2, 2, 4, 8, 16, 32, 65, -128,  0, 1, 0, 4, 8, 16, 32, 64, -128, 0, 1, -1 };
     // expect diff:                    1  2                    1               1                                7
-    EXPECT_EQ(hamming->calc(TypedCells(bytes_a), TypedCells(bytes_b)), 12.0);
-    auto dist_fun = dff.for_query_vector(TypedCells(bytes_a));
+    HammingDistanceFunctionFactory<Int8Float> factory_int8;
+    auto dist_fun = factory_int8.for_query_vector(TypedCells(bytes_a));
     EXPECT_EQ(dist_fun->calc(TypedCells(bytes_b)), 12.0);
 }
 
diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
index 0dcd77ec392..b2e5f8863c3 100644
--- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
@@ -133,8 +133,6 @@ public:
     }
 };
 
-using FloatSqEuclideanDistance = SquaredEuclideanDistanceHW<float>;
-
 template <typename IndexType>
 class Stressor : public ::testing::Test {
 private:
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index 2e874ffa4ae..362bceaf0da 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -25,7 +25,6 @@ vespa_add_library(searchlib_tensor OBJECT
     hnsw_single_best_neighbors.cpp
     imported_tensor_attribute_vector.cpp
     imported_tensor_attribute_vector_read_guard.cpp
-    inner_product_distance.cpp
     inv_log_level_generator.cpp
     large_subspaces_buffer_type.cpp
     nearest_neighbor_index.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index a7ae02bb9f4..ab8f2b30df9 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -41,17 +41,6 @@ struct CalcAngular {
 
 }
 
-double
-AngularDistance::calc(const vespalib::eval::TypedCells& lhs,
-                      const vespalib::eval::TypedCells& rhs) const
-{
-    return typify_invoke<2,TypifyCellType,CalcAngular>(lhs.type, rhs.type, lhs, rhs);
-}
-
-template class AngularDistanceHW<float>;
-template class AngularDistanceHW<double>;
-
-
 template<typename FloatType>
 class BoundAngularDistance : public BoundDistanceFunction {
 private:
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
index caed5d8c216..50d3e617cf4 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -13,69 +13,10 @@ namespace search::tensor {
 
 /**
  * Calculates angular distance between vectors
- */
-class AngularDistance : public DistanceFunction {
-public:
-    AngularDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
-    double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
-    double convert_threshold(double threshold) const override {
-        double cosine_similarity = cos(threshold);
-        return 1.0 - cosine_similarity;
-    }
-    double to_rawscore(double distance) const override {
-        double cosine_similarity = 1.0 - distance;
-        // should be in the range [-1,1] but roundoff may cause problems:
-        cosine_similarity = std::min(1.0, cosine_similarity);
-        cosine_similarity = std::max(-1.0, cosine_similarity);
-        double angle_distance = acos(cosine_similarity); // in range [0,pi]
-        double score = 1.0 / (1.0 + angle_distance);
-        return score;
-    }
-    double calc_with_limit(const vespalib::eval::TypedCells& lhs,
-                           const vespalib::eval::TypedCells& rhs,
-                           double /*limit*/) const override
-    {
-        return calc(lhs, rhs);
-    }
-};
-
-/**
- * Calculates angular distance between vectors
  * Will use instruction optimal for the cpu it is running on
- * when both vectors have the expected cell type.
+ * after converting both vectors to an optimal cell type.
  */
 template <typename FloatType>
-class AngularDistanceHW : public AngularDistance {
-public:
-    AngularDistanceHW()
-      : AngularDistance(vespalib::eval::get_cell_type<FloatType>()),
-        _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
-    {
-        assert(expected_cell_type() == vespalib::eval::get_cell_type<FloatType>());
-    }
-    double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
-        constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
-        assert(lhs.type == expected && rhs.type == expected);
-        auto lhs_vector = lhs.typify<FloatType>();
-        auto rhs_vector = rhs.typify<FloatType>();
-        size_t sz = lhs_vector.size();
-        assert(sz == rhs_vector.size());
-        auto a = lhs_vector.data();
-        auto b = rhs_vector.data();
-        double a_norm_sq = _computer.dotProduct(a, a, sz);
-        double b_norm_sq = _computer.dotProduct(b, b, sz);
-        double squared_norms = a_norm_sq * b_norm_sq;
-        double dot_product = _computer.dotProduct(a, b, sz);
-        double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
-        double cosine_similarity = dot_product / div;
-        double distance = 1.0 - cosine_similarity; // in range [0,2]
-        return distance;
-    }
-private:
-    const vespalib::hwaccelrated::IAccelrated & _computer;
-};
-
-template <typename FloatType>
 class AngularDistanceFunctionFactory : public DistanceFunctionFactory {
 public:
     AngularDistanceFunctionFactory() = default;
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h
index 443191a272c..31a837d17f4 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h
@@ -20,34 +20,4 @@ public:
     virtual double to_rawscore(double distance) const = 0;
 };
 
-/**
- * Interface used to calculate the distance between two n-dimensional vectors.
- *
- * The vectors must be of same size and same cell type (float or double).
- * The actual implementation must know which type the vectors are.
- */
-class DistanceFunction : public DistanceConverter {
-private:
-    vespalib::eval::CellType _expect_cell_type;
-public:
-    using UP = std::unique_ptr<DistanceFunction>;
-
-    DistanceFunction(vespalib::eval::CellType expected) : _expect_cell_type(expected) {}
-
-    virtual ~DistanceFunction() = default;
-
-    // input (query) vectors must be converted to this cell type:
-    vespalib::eval::CellType expected_cell_type() const {
-        return _expect_cell_type;
-    }
-
-    // calculate internal distance (comparable)
-    virtual double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const = 0;
-
-    // calculate internal distance, early return allowed if > limit
-    virtual double calc_with_limit(const vespalib::eval::TypedCells& lhs,
-                                   const vespalib::eval::TypedCells& rhs,
-                                   double limit) const = 0;
-};
-
 }
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index 3a4bd94de23..bec0f430a51 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -15,79 +15,6 @@ using vespalib::eval::ValueType;
 
 namespace search::tensor {
 
-DistanceFunction::UP
-make_distance_function(DistanceMetric variant, CellType cell_type)
-{
-    switch (variant) {
-    case DistanceMetric::Euclidean:
-        switch (cell_type) {
-        case CellType::FLOAT:  return std::make_unique<SquaredEuclideanDistanceHW<float>>();
-        case CellType::DOUBLE: return std::make_unique<SquaredEuclideanDistanceHW<double>>();
-        case CellType::INT8:   return std::make_unique<SquaredEuclideanDistanceHW<vespalib::eval::Int8Float>>();
-        default:               return std::make_unique<SquaredEuclideanDistance>(CellType::FLOAT);
-        }
-    case DistanceMetric::Angular:
-        switch (cell_type) {
-        case CellType::FLOAT:  return std::make_unique<AngularDistanceHW<float>>();
-        case CellType::DOUBLE: return std::make_unique<AngularDistanceHW<double>>();
-        default:               return std::make_unique<AngularDistance>(CellType::FLOAT);
-        }
-    case DistanceMetric::GeoDegrees:
-        return std::make_unique<GeoDegreesDistance>(CellType::DOUBLE);
-    case DistanceMetric::PrenormalizedAngular:
-    case DistanceMetric::InnerProduct:
-        switch (cell_type) {
-        case CellType::FLOAT:  return std::make_unique<InnerProductDistanceHW<float>>();
-        case CellType::DOUBLE: return std::make_unique<InnerProductDistanceHW<double>>();
-        default:               return std::make_unique<InnerProductDistance>(CellType::FLOAT);
-        }
-    case DistanceMetric::Hamming:
-        return std::make_unique<HammingDistance>(cell_type);
-    }
-    // not reached:
-    return DistanceFunction::UP();
-}
-
-
-class SimpleBoundDistanceFunction : public BoundDistanceFunction {
-    const vespalib::eval::TypedCells _lhs;
-    const DistanceFunction &_df;
-public:
-    SimpleBoundDistanceFunction(const vespalib::eval::TypedCells& lhs,
-                                const DistanceFunction &df)
-        : _lhs(lhs),
-          _df(df)
-        {}
-
-    double calc(const vespalib::eval::TypedCells& rhs) const override {
-        return _df.calc(_lhs, rhs);
-    }
-    double convert_threshold(double threshold) const override {
-        return _df.convert_threshold(threshold);
-    }
-    double to_rawscore(double distance) const override {
-        return _df.to_rawscore(distance);
-    }
-    double calc_with_limit(const vespalib::eval::TypedCells& rhs, double limit) const override {
-        return _df.calc_with_limit(_lhs, rhs, limit);
-    }
-};
-
-class SimpleDistanceFunctionFactory : public DistanceFunctionFactory {
-    DistanceFunction::UP _df;
-public:
-    SimpleDistanceFunctionFactory(DistanceFunction::UP df)
-        : _df(std::move(df))
-        {}
-
-    BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override {
-        return std::make_unique<SimpleBoundDistanceFunction>(lhs, *_df);
-    }
-    BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override {
-        return std::make_unique<SimpleBoundDistanceFunction>(lhs, *_df);
-    }
-};
-
 std::unique_ptr<DistanceFunctionFactory>
 make_distance_function_factory(search::attribute::DistanceMetric variant,
                                vespalib::eval::CellType cell_type)
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
index 433c8e1962a..a69c7cff739 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
@@ -23,13 +23,9 @@ struct DistanceFunctionFactory {
 };
 
 /**
- * Create a distance function object customized for the given metric
+ * Create a distance function factory customized for the given metric
  * variant and (attribute) cell type.
  **/
-DistanceFunction::UP
-make_distance_function(search::attribute::DistanceMetric variant,
-                       vespalib::eval::CellType cell_type);
-
 DistanceFunctionFactory::UP
 make_distance_function_factory(search::attribute::DistanceMetric variant,
                                vespalib::eval::CellType cell_type);
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
index 2300dba2db1..ec401b9313d 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
@@ -7,5 +7,4 @@
 #include "euclidean_distance.h"
 #include "geo_degrees_distance.h"
 #include "hamming_distance.h"
-#include "inner_product_distance.h"
 #include "prenormalized_angular_distance.h"
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index a98b37cb6cc..1b8924ab351 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -30,25 +30,6 @@ struct CalcEuclidean {
 
 }
 
-double
-SquaredEuclideanDistance::calc(const vespalib::eval::TypedCells& lhs,
-                               const vespalib::eval::TypedCells& rhs) const
-{
-    return typify_invoke<2,TypifyCellType,CalcEuclidean>(lhs.type, rhs.type, lhs, rhs);
-}
-
-double
-SquaredEuclideanDistance::calc_with_limit(const vespalib::eval::TypedCells& lhs,
-                                          const vespalib::eval::TypedCells& rhs,
-                                          double) const
-{
-    // maybe optimize this:
-    return typify_invoke<2,TypifyCellType,CalcEuclidean>(lhs.type, rhs.type, lhs, rhs);
-}
-
-template class SquaredEuclideanDistanceHW<float>;
-template class SquaredEuclideanDistanceHW<double>;
-
 using vespalib::eval::Int8Float;
 using vespalib::BFloat16;
 
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
index 06295caee9e..fc663c50ccd 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
@@ -12,75 +12,10 @@ namespace search::tensor {
 
 /**
  * Calculates the square of the standard Euclidean distance.
- */
-class SquaredEuclideanDistance : public DistanceFunction {
-public:
-    SquaredEuclideanDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
-    double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
-    double calc_with_limit(const vespalib::eval::TypedCells& lhs,
-                           const vespalib::eval::TypedCells& rhs,
-                           double limit) const override;
-    double convert_threshold(double threshold) const override {
-        return threshold*threshold;
-    }
-    double to_rawscore(double distance) const override {
-        double d = sqrt(distance);
-        double score = 1.0 / (1.0 + d);
-        return score;
-    }
-};
-
-/**
- * Calculates the square of the standard Euclidean distance.
  * Will use instruction optimal for the cpu it is running on
- * when both vectors have the expected cell type.
+ * after converting both vectors to an optimal cell type.
  */
 template <typename FloatType>
-class SquaredEuclideanDistanceHW : public SquaredEuclideanDistance {
-public:
-    SquaredEuclideanDistanceHW()
-      : SquaredEuclideanDistance(vespalib::eval::get_cell_type<FloatType>()),
-        _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
-    {
-        assert(expected_cell_type() == vespalib::eval::get_cell_type<FloatType>());
-    }
-
-    static const double *cast(const double * p) { return p; }
-    static const float *cast(const float * p) { return p; }
-    static const int8_t *cast(const vespalib::eval::Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
-    double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
-        constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
-        assert(lhs.type == expected && rhs.type == expected);
-        auto lhs_vector = lhs.typify<FloatType>();
-        auto rhs_vector = rhs.typify<FloatType>();
-        size_t sz = lhs_vector.size();
-        assert(sz == rhs_vector.size());
-        return _computer.squaredEuclideanDistance(cast(&lhs_vector[0]), cast(&rhs_vector[0]), sz);
-    }
-
-    double calc_with_limit(const vespalib::eval::TypedCells& lhs,
-                           const vespalib::eval::TypedCells& rhs,
-                           double limit) const override
-    {
-        constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
-        assert(lhs.type == expected && rhs.type == expected);
-        auto lhs_vector = lhs.typify<FloatType>();
-        auto rhs_vector = rhs.typify<FloatType>();
-        double sum = 0.0;
-        size_t sz = lhs_vector.size();
-        assert(sz == rhs_vector.size());
-        for (size_t i = 0; i < sz && sum <= limit; ++i) {
-            double diff = lhs_vector[i] - rhs_vector[i];
-            sum += diff*diff;
-        }
-        return sum;
-    }
-private:
-    const vespalib::hwaccelrated::IAccelrated & _computer;
-};
-
-
-template <typename FloatType>
 class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory {
 public:
     EuclideanDistanceFunctionFactory() = default;
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
index 38ba8205c90..0212830efb6 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
@@ -8,54 +8,28 @@ using vespalib::eval::TypifyCellType;
 
 namespace search::tensor {
 
-namespace {
-
-struct CalcGeoDegrees {
-    template <typename LCT, typename RCT>
-    static double invoke(const vespalib::eval::TypedCells& lhs,
-                         const vespalib::eval::TypedCells& rhs)
-    {
-        auto lhs_vector = lhs.unsafe_typify<LCT>();
-        auto rhs_vector = rhs.unsafe_typify<RCT>();
-
-        assert(2 == lhs_vector.size());
-        assert(2 == rhs_vector.size());
-        // convert to radians:
-        double lat_A = lhs_vector[0] * GeoDegreesDistance::degrees_to_radians;
-        double lat_B = rhs_vector[0] * GeoDegreesDistance::degrees_to_radians;
-        double lon_A = lhs_vector[1] * GeoDegreesDistance::degrees_to_radians;
-        double lon_B = rhs_vector[1] * GeoDegreesDistance::degrees_to_radians;
-
-        double lat_diff = lat_A - lat_B;
-        double lon_diff = lon_A - lon_B;
-
-        // haversines of differences:
-        double hav_lat = GeoDegreesDistance::hav(lat_diff);
-        double hav_lon = GeoDegreesDistance::hav(lon_diff);
-
-        // haversine of central angle between the two points:
-        double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon;
-        return hav_central_angle;
-    }
-};
-
-}
-
-double
-GeoDegreesDistance::calc(const vespalib::eval::TypedCells& lhs,
-                         const vespalib::eval::TypedCells& rhs) const
-{
-    return typify_invoke<2,TypifyCellType,CalcGeoDegrees>(lhs.type, rhs.type, lhs, rhs);
-}
-
-using vespalib::eval::TypedCells;
-
+/**
+ * Calculates great-circle distance between Latitude/Longitude pairs,
+ * measured in degrees.  Output distance is measured in kilometers.
+ * Uses the haversine formula directly from:
+ * https://en.wikipedia.org/wiki/Haversine_formula
+ **/
 class BoundGeoDistance : public BoundDistanceFunction {
 private:
     mutable TemporaryVectorStore<double> _tmpSpace;
     const vespalib::ConstArrayRef<double> _lh_vector;
-    static GeoDegreesDistance _g_d_helper;
 public:
+    // in km, as defined by IUGG, see:
+    // https://en.wikipedia.org/wiki/Earth_radius#Mean_radius
+    static constexpr double earth_mean_radius = 6371.0088;
+    static constexpr double degrees_to_radians = M_PI / 180.0;
+
+    // haversine function:
+    static double haversine(double angle) {
+        double s = sin(0.5*angle);
+        return s*s;
+    }
+
     BoundGeoDistance(const vespalib::eval::TypedCells& lhs)
         : _tmpSpace(lhs.size),
           _lh_vector(_tmpSpace.storeLhs(lhs))
@@ -65,27 +39,33 @@ public:
         assert(2 == _lh_vector.size());
         assert(2 == rhs_vector.size());
         // convert to radians:
-        double lat_A = _lh_vector[0] * GeoDegreesDistance::degrees_to_radians;
-        double lat_B = rhs_vector[0] * GeoDegreesDistance::degrees_to_radians;
-        double lon_A = _lh_vector[1] * GeoDegreesDistance::degrees_to_radians;
-        double lon_B = rhs_vector[1] * GeoDegreesDistance::degrees_to_radians;
+        double lat_A = _lh_vector[0] * degrees_to_radians;
+        double lat_B = rhs_vector[0] * degrees_to_radians;
+        double lon_A = _lh_vector[1] * degrees_to_radians;
+        double lon_B = rhs_vector[1] * degrees_to_radians;
 
         double lat_diff = lat_A - lat_B;
         double lon_diff = lon_A - lon_B;
 
         // haversines of differences:
-        double hav_lat = GeoDegreesDistance::hav(lat_diff);
-        double hav_lon = GeoDegreesDistance::hav(lon_diff);
+        double hav_lat = haversine(lat_diff);
+        double hav_lon = haversine(lon_diff);
 
         // haversine of central angle between the two points:
         double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon;
         return hav_central_angle;
     }
     double convert_threshold(double threshold) const override {
-        return _g_d_helper.convert_threshold(threshold);
+        double half_angle = threshold / (2 * earth_mean_radius);
+        double rt_hav = sin(half_angle);
+        return rt_hav * rt_hav;
     }
     double to_rawscore(double distance) const override {
-        return _g_d_helper.to_rawscore(distance);
+        double hav_diff = sqrt(distance);
+        // distance in kilometers:
+        double d = 2 * asin(hav_diff) * earth_mean_radius;
+        // km to rawscore:
+        return 1.0 / (1.0 + d);
     }
     double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
         return calc(rhs);
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
index 56f75f6daed..801ee56aacf 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
@@ -13,44 +13,9 @@ namespace search::tensor {
 
 /**
  * Calculates great-circle distance between Latitude/Longitude pairs,
- * measured in degrees.  Output distance is measured in meters.
- * Uses the haversine formula directly from:
- * https://en.wikipedia.org/wiki/Haversine_formula
+ * where input is given as degrees.
+ * Output distance is measured in kilometers.
  **/
-class GeoDegreesDistance : public DistanceFunction {
-public:
-    // in km, as defined by IUGG, see:
-    // https://en.wikipedia.org/wiki/Earth_radius#Mean_radius
-    static constexpr double earth_mean_radius = 6371.0088;
-    static constexpr double degrees_to_radians = M_PI / 180.0;
-
-    GeoDegreesDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
-    // haversine function:
-    static double hav(double angle) {
-        double s = sin(0.5*angle);
-        return s*s;
-    }
-    double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
-    double convert_threshold(double threshold) const override {
-        double half_angle = threshold / (2 * earth_mean_radius);
-        double rt_hav = sin(half_angle);
-        return rt_hav * rt_hav;
-    }
-    double to_rawscore(double distance) const override {
-        double hav_diff = sqrt(distance);
-        // distance in kilometers:
-        double d = 2 * asin(hav_diff) * earth_mean_radius;
-        // km to rawscore:
-        return 1.0 / (1.0 + d);
-    }
-    double calc_with_limit(const vespalib::eval::TypedCells& lhs,
-                           const vespalib::eval::TypedCells& rhs,
-                           double /*limit*/) const override
-    {
-        return calc(lhs, rhs);
-    }
-};
-
 class GeoDistanceFunctionFactory : public DistanceFunctionFactory {
 public:
     GeoDistanceFunctionFactory() = default;
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
index f4f6842715f..5b6528ef403 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -30,29 +30,6 @@ struct CalcHamming {
 
 }
 
-double
-HammingDistance::calc(const vespalib::eval::TypedCells& lhs,
-                      const vespalib::eval::TypedCells& rhs) const
-{
-    constexpr auto expected = vespalib::eval::CellType::INT8;
-    if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) {
-        size_t sz = lhs.size;
-        assert(sz == rhs.size);
-        return (double) vespalib::binary_hamming_distance(lhs.data, rhs.data, sz);
-    } else {
-        return typify_invoke<2,TypifyCellType,CalcHamming>(lhs.type, rhs.type, lhs, rhs);
-    }
-}
-
-double
-HammingDistance::calc_with_limit(const vespalib::eval::TypedCells& lhs,
-                                 const vespalib::eval::TypedCells& rhs,
-                                 double) const
-{
-    // consider optimizing:
-    return calc(lhs, rhs);
-}
-
 using vespalib::eval::Int8Float;
 
 template<typename FloatType>
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
index 3022e0da503..47b022afc13 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -16,20 +16,6 @@ namespace search::tensor {
  * or (for int8 cells, aka binary data only)
  * "number of bits that are different"
  */
-class HammingDistance final : public DistanceFunction {
-public:
-    HammingDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
-    double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
-    double convert_threshold(double threshold) const override {
-        return threshold;
-    }
-    double to_rawscore(double distance) const override {
-        double score = 1.0 / (1.0 + distance);
-        return score;
-    }
-    double calc_with_limit(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs, double) const override;
-};
-
 template <typename FloatType>
 class HammingDistanceFunctionFactory : public DistanceFunctionFactory {
 public:
diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp
deleted file mode 100644
index 2e8c970ba91..00000000000
--- a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "inner_product_distance.h"
-
-using vespalib::typify_invoke;
-using vespalib::eval::TypifyCellType;
-
-namespace search::tensor {
-
-namespace {
-
-struct CalcInnerProduct {
-    template <typename LCT, typename RCT>
-    static double invoke(const vespalib::eval::TypedCells& lhs,
-                         const vespalib::eval::TypedCells& rhs)
-    {
-        auto lhs_vector = lhs.unsafe_typify<LCT>();
-        auto rhs_vector = rhs.unsafe_typify<RCT>();
-        size_t sz = lhs_vector.size();
-        assert(sz == rhs_vector.size());
-        double dot_product = 0.0;
-        for (size_t i = 0; i < sz; ++i) {
-            double a = lhs_vector[i];
-            double b = rhs_vector[i];
-            dot_product += a*b;
-        }
-        double score = 1.0 - dot_product; // in range [0,2]
-        return std::max(0.0, score);
-    }
-};
-
-}
-
-double
-InnerProductDistance::calc(const vespalib::eval::TypedCells& lhs,
-                           const vespalib::eval::TypedCells& rhs) const
-{
-    return typify_invoke<2,TypifyCellType,CalcInnerProduct>(lhs.type, rhs.type, lhs, rhs);
-}
-
-template class InnerProductDistanceHW<float>;
-template class InnerProductDistanceHW<double>;
-
-}
diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h
deleted file mode 100644
index 135bb186fd4..00000000000
--- a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "distance_function.h"
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <cmath>
-
-namespace search::tensor {
-
-/**
- * Calculates inner-product "distance" between vectors with assumed norm 1.
- * Should give same ordering as Angular distance, but is less expensive.
- */
-class InnerProductDistance : public DistanceFunction {
-public:
-    InnerProductDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
-    double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
-    double convert_threshold(double threshold) const override {
-        return threshold;
-    }
-    double to_rawscore(double distance) const override {
-        double score = 1.0 / (1.0 + distance);
-        return score;
-    }
-    double calc_with_limit(const vespalib::eval::TypedCells& lhs,
-                           const vespalib::eval::TypedCells& rhs,
-                           double /*limit*/) const override
-    {
-        return calc(lhs, rhs);
-    }
-};
-
-/**
- * Calculates inner-product "distance" between vectors with assumed norm 1.
- * Should give same ordering as Angular distance, but is less expensive.
- * Will use instruction optimal for the cpu it is running on
- * when both vectors have the expected cell type.
- */
-template <typename FloatType>
-class InnerProductDistanceHW : public InnerProductDistance {
-public:
-    InnerProductDistanceHW()
-      : InnerProductDistance(vespalib::eval::get_cell_type<FloatType>()),
-        _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
-    {
-        assert(expected_cell_type() == vespalib::eval::get_cell_type<FloatType>());
-    }
-    double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
-        constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
-        assert(lhs.type == expected && rhs.type == expected);
-        auto lhs_vector = lhs.typify<FloatType>();
-        auto rhs_vector = rhs.typify<FloatType>();
-        size_t sz = lhs_vector.size();
-        assert(sz == rhs_vector.size());
-        double score = 1.0 - _computer.dotProduct(lhs_vector.data(), rhs_vector.data(), sz);
-        return std::max(0.0, score);
-    }
-private:
-    const vespalib::hwaccelrated::IAccelrated & _computer;
-};
-
-}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
index be1699c36ad..b91efbddb3e 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
@@ -11,7 +11,7 @@
 namespace search::tensor {
 
 /**
- * Calculates inner-product "distance" between vectors with assumed norm 1.
+ * Calculates inner-product "distance" between vectors assuming a common norm.
  * Should give same ordering as Angular distance, but is less expensive.
  */
 template <typename FloatType>
diff --git a/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp b/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp
index b64d477fd4c..2cd9017653f 100644
--- a/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp
+++ b/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp
@@ -57,7 +57,6 @@ public:
     vsm::test::MockFieldSearcherEnv env;
     ValueType tensor_type;
     TensorDataType data_type;
-    SquaredEuclideanDistance dist_func;
     vsm::FieldIdT field_id;
     NearestNeighborFieldSearcher searcher;
     MockQuery query;
@@ -66,7 +65,6 @@ public:
         : env(),
           tensor_type(ValueType::from_spec("tensor(x[2])")),
           data_type(tensor_type),
-          dist_func(CellType::DOUBLE),
           field_id(2),
           searcher(field_id, DistanceMetric::Euclidean),
           query()
@@ -97,7 +95,7 @@ public:
         expect_match(exp_square_distance, node);
     }
     void expect_match(double exp_square_distance, const NearestNeighborQueryNode& node) {
-        double exp_raw_score = dist_func.to_rawscore(exp_square_distance);
+        double exp_raw_score = 1.0 / (1.0 + std::sqrt(exp_square_distance));
         EXPECT_TRUE(node.evaluate());
         EXPECT_DOUBLE_EQ(exp_square_distance, node.get_distance().value());
         EXPECT_DOUBLE_EQ(exp_raw_score, node.get_raw_score().value());
diff --git a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
index db4ee12438e..d4aee8ad652 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
@@ -21,7 +21,6 @@ using search::attribute::Config;
 using search::fef::QueryValue;
 using search::tensor::DistanceCalculator;
 using search::tensor::TensorExtAttribute;
-using search::tensor::make_distance_function;
 using vespalib::eval::ValueType;
 
 namespace {
author	Arne H Juul <arnej27959@users.noreply.github.com>	2023-04-27 23:20:10 +0200
committer	GitHub <noreply@github.com>	2023-04-27 23:20:10 +0200
commit	9c04252f9c8573bdbb787b5e3a0f671487eb412e (patch)
tree	2c111deed492fa8cb09ede189e2ca080c60c590d
parent	27cff4355a2f2bced9da6357ec6c1d7024ba70ff (diff)
parent	40d65770f45b6d4a16d19668487291c02597fa29 (diff)