summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2023-04-27 23:20:10 +0200
committerGitHub <noreply@github.com>2023-04-27 23:20:10 +0200
commit9c04252f9c8573bdbb787b5e3a0f671487eb412e (patch)
tree2c111deed492fa8cb09ede189e2ca080c60c590d
parent27cff4355a2f2bced9da6357ec6c1d7024ba70ff (diff)
parent40d65770f45b6d4a16d19668487291c02597fa29 (diff)
Merge pull request #26893 from vespa-engine/arnej/remove-unused-distance-functions-3
remove unused distance functions
-rw-r--r--searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp18
-rw-r--r--searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp84
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.h61
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function.h30
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp73
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_functions.h1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.h67
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp82
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h39
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.h14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp44
-rw-r--r--searchlib/src/vespa/searchlib/tensor/inner_product_distance.h64
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h2
-rw-r--r--streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp4
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp1
21 files changed, 56 insertions, 590 deletions
diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp
index fd07529795a..b9599a0c75d 100644
--- a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp
+++ b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp
@@ -29,7 +29,6 @@ using search::attribute::DistanceMetric;
using search::feature_t;
using search::tensor::DenseTensorAttribute;
using search::tensor::DistanceCalculator;
-using search::tensor::DistanceFunction;
using search::tensor::SerializedFastValueAttribute;
using search::tensor::TensorAttribute;
using vespalib::eval::CellType;
@@ -49,9 +48,6 @@ vespalib::string denseSpecDouble("tensor(x[2])");
vespalib::string denseSpecFloat("tensor<float>(x[2])");
vespalib::string mixed_spec("tensor(m{},x[2])");
-DistanceFunction::UP euclid_d = search::tensor::make_distance_function(DistanceMetric::Euclidean, CellType::DOUBLE);
-DistanceFunction::UP euclid_f = search::tensor::make_distance_function(DistanceMetric::Euclidean, CellType::FLOAT);
-
std::unique_ptr<Value> createTensor(const TensorSpec &spec) {
return SimpleValue::from_spec(spec);
}
@@ -119,14 +115,6 @@ struct Fixture {
auto t = createTensor(_typeSpec, v1, v2);
setTensor(docId, *t);
}
-
- const DistanceFunction &dist_fun() const {
- if (_cfg.tensorType().cell_type() == CellType::FLOAT) {
- return *euclid_f;
- } else {
- return *euclid_d;
- }
- }
};
template <bool strict>
@@ -136,9 +124,11 @@ SimpleResult find_matches(Fixture &env, const Value &qtv, double threshold = std
auto &attr = *(env._attr);
auto dff = search::tensor::make_distance_function_factory(DistanceMetric::Euclidean, qtv.cells().type);
- DistanceCalculator dist_calc(attr, dff->for_query_vector(qtv.cells()));
+ auto df = dff->for_query_vector(qtv.cells());
+ threshold = df->convert_threshold(threshold);
+ DistanceCalculator dist_calc(attr, std::move(df));
NearestNeighborDistanceHeap dh(2);
- dh.set_distance_threshold(env.dist_fun().convert_threshold(threshold));
+ dh.set_distance_threshold(threshold);
const GlobalFilter &filter = *env._global_filter;
auto search = NearestNeighborIterator::create(strict, tfmd, dist_calc, dh, filter);
if (strict) {
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
index 9b8ad0d26ce..a1b29c90986 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
@@ -76,10 +76,6 @@ namespace { const double sq_root_half = std::sqrt(0.5); }
TEST(DistanceFunctionsTest, euclidean_gives_expected_score)
{
- auto ct = vespalib::eval::CellType::DOUBLE;
-
- auto euclid = make_distance_function(DistanceMetric::Euclidean, ct);
-
std::vector<double> p0{0.0, 0.0, 0.0};
std::vector<double> p1{1.0, 0.0, 0.0};
std::vector<double> p2{0.0, 1.0, 0.0};
@@ -92,6 +88,9 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score)
EXPECT_FLOAT_EQ(n4, 1.0);
double d12 = computeEuclideanChecked(t(p1), t(p2));
EXPECT_EQ(d12, 2.0);
+
+ EuclideanDistanceFunctionFactory<double> dff;
+ auto euclid = dff.for_query_vector(t(p0));
EXPECT_DOUBLE_EQ(euclid->to_rawscore(d12), 1.0/(1.0 + sqrt(2.0)));
double threshold = euclid->convert_threshold(8.0);
EXPECT_EQ(threshold, 64.0);
@@ -142,10 +141,6 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score)
TEST(DistanceFunctionsTest, euclidean_int8_smoketest)
{
- auto ct = vespalib::eval::CellType::INT8;
-
- auto euclid = make_distance_function(DistanceMetric::Euclidean, ct);
-
std::vector<Int8Float> p0{0.0, 0.0, 0.0};
std::vector<Int8Float> p1{1.0, 0.0, 0.0};
std::vector<Int8Float> p5{0.0,-1.0, 0.0};
@@ -345,60 +340,10 @@ TEST(DistanceFunctionsTest, prenormalized_angular_gives_expected_score)
EXPECT_DOUBLE_EQ(threshold, 1.0);
}
-TEST(DistanceFunctionsTest, innerproduct_gives_expected_score)
-{
- auto ct = vespalib::eval::CellType::DOUBLE;
-
- auto innerproduct = make_distance_function(DistanceMetric::InnerProduct, ct);
-
- std::vector<double> p0{0.0, 0.0, 0.0};
- std::vector<double> p1{1.0, 0.0, 0.0};
- std::vector<double> p2{0.0, 1.0, 0.0};
- std::vector<double> p3{0.0, 0.0, 1.0};
- std::vector<double> p4{0.5, 0.5, sq_root_half};
- std::vector<double> p5{0.0,-1.0, 0.0};
- std::vector<double> p6{1.0, 2.0, 2.0};
-
- double i12 = innerproduct->calc(t(p1), t(p2));
- double i13 = innerproduct->calc(t(p1), t(p3));
- double i23 = innerproduct->calc(t(p2), t(p3));
- EXPECT_DOUBLE_EQ(i12, 1.0);
- EXPECT_DOUBLE_EQ(i13, 1.0);
- EXPECT_DOUBLE_EQ(i23, 1.0);
-
- double i14 = innerproduct->calc(t(p1), t(p4));
- double i24 = innerproduct->calc(t(p2), t(p4));
- EXPECT_DOUBLE_EQ(i14, 0.5);
- EXPECT_DOUBLE_EQ(i24, 0.5);
- double i34 = innerproduct->calc(t(p3), t(p4));
- EXPECT_FLOAT_EQ(i34, 1.0 - sq_root_half);
-
- double i25 = innerproduct->calc(t(p2), t(p5));
- EXPECT_DOUBLE_EQ(i25, 2.0);
-
- double i44 = innerproduct->calc(t(p4), t(p4));
- EXPECT_GE(i44, 0.0);
- EXPECT_LT(i44, 0.000001);
-
- double i66 = innerproduct->calc(t(p6), t(p6));
- EXPECT_GE(i66, 0.0);
- EXPECT_LT(i66, 0.000001);
-
- double threshold = innerproduct->convert_threshold(0.25);
- EXPECT_DOUBLE_EQ(threshold, 0.25);
- threshold = innerproduct->convert_threshold(0.5);
- EXPECT_DOUBLE_EQ(threshold, 0.5);
- threshold = innerproduct->convert_threshold(1.0);
- EXPECT_DOUBLE_EQ(threshold, 1.0);
-}
TEST(DistanceFunctionsTest, hamming_gives_expected_score)
{
- static HammingDistanceFunctionFactory<Int8Float> dff;
- auto ct = vespalib::eval::CellType::DOUBLE;
-
- auto hamming = make_distance_function(DistanceMetric::Hamming, ct);
-
+ static HammingDistanceFunctionFactory<double> dff;
std::vector<std::vector<double>>
points{{0.0, 0.0, 0.0},
{1.0, 0.0, 0.0},
@@ -407,31 +352,30 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score)
{0.5, 0.5, 0.5},
{0.0,-1.0, 1.0},
{1.0, 1.0, 1.0}};
+ auto hamming = dff.for_query_vector(t(points[0]));
for (const auto & p : points) {
- double h0 = hamming->calc(t(p), t(p));
- EXPECT_EQ(h0, 0.0);
- EXPECT_EQ(hamming->to_rawscore(h0), 1.0);
auto dist_fun = dff.for_query_vector(t(p));
- EXPECT_EQ(dist_fun->calc(t(p)), 0.0);
+ double h0 = dist_fun->calc(t(p));
+ EXPECT_EQ(h0, 0.0);
EXPECT_EQ(dist_fun->to_rawscore(h0), 1.0);
}
- double d12 = hamming->calc(t(points[1]), t(points[2]));
+ double d12 = dff.for_query_vector(t(points[1]))->calc(t(points[2]));
EXPECT_EQ(d12, 3.0);
EXPECT_DOUBLE_EQ(hamming->to_rawscore(d12), 1.0/(1.0 + 3.0));
- double d16 = hamming->calc(t(points[1]), t(points[6]));
+ double d16 = dff.for_query_vector(t(points[1]))->calc(t(points[6]));
EXPECT_EQ(d16, 2.0);
EXPECT_DOUBLE_EQ(hamming->to_rawscore(d16), 1.0/(1.0 + 2.0));
- double d23 = hamming->calc(t(points[2]), t(points[3]));
+ double d23 = dff.for_query_vector(t(points[2]))->calc(t(points[3]));
EXPECT_EQ(d23, 3.0);
EXPECT_DOUBLE_EQ(hamming->to_rawscore(d23), 1.0/(1.0 + 3.0));
- double d24 = hamming->calc(t(points[2]), t(points[4]));
+ double d24 = dff.for_query_vector(t(points[2]))->calc(t(points[4]));
EXPECT_EQ(d24, 3.0);
EXPECT_DOUBLE_EQ(hamming->to_rawscore(d24), 1.0/(1.0 + 3.0));
- double d25 = hamming->calc(t(points[2]), t(points[5]));
+ double d25 = dff.for_query_vector(t(points[2]))->calc(t(points[5]));
EXPECT_EQ(d25, 1.0);
EXPECT_DOUBLE_EQ(hamming->to_rawscore(d25), 1.0/(1.0 + 1.0));
@@ -445,8 +389,8 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score)
std::vector<Int8Float> bytes_a = { 0, 1, 2, 4, 8, 16, 32, 64, -128, 0, 1, 2, 4, 8, 16, 32, 64, -128, 0, 1, 2 };
std::vector<Int8Float> bytes_b = { 1, 2, 2, 4, 8, 16, 32, 65, -128, 0, 1, 0, 4, 8, 16, 32, 64, -128, 0, 1, -1 };
// expect diff: 1 2 1 1 7
- EXPECT_EQ(hamming->calc(TypedCells(bytes_a), TypedCells(bytes_b)), 12.0);
- auto dist_fun = dff.for_query_vector(TypedCells(bytes_a));
+ HammingDistanceFunctionFactory<Int8Float> factory_int8;
+ auto dist_fun = factory_int8.for_query_vector(TypedCells(bytes_a));
EXPECT_EQ(dist_fun->calc(TypedCells(bytes_b)), 12.0);
}
diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
index 0dcd77ec392..b2e5f8863c3 100644
--- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
@@ -133,8 +133,6 @@ public:
}
};
-using FloatSqEuclideanDistance = SquaredEuclideanDistanceHW<float>;
-
template <typename IndexType>
class Stressor : public ::testing::Test {
private:
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index 2e874ffa4ae..362bceaf0da 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -25,7 +25,6 @@ vespa_add_library(searchlib_tensor OBJECT
hnsw_single_best_neighbors.cpp
imported_tensor_attribute_vector.cpp
imported_tensor_attribute_vector_read_guard.cpp
- inner_product_distance.cpp
inv_log_level_generator.cpp
large_subspaces_buffer_type.cpp
nearest_neighbor_index.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index a7ae02bb9f4..ab8f2b30df9 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -41,17 +41,6 @@ struct CalcAngular {
}
-double
-AngularDistance::calc(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs) const
-{
- return typify_invoke<2,TypifyCellType,CalcAngular>(lhs.type, rhs.type, lhs, rhs);
-}
-
-template class AngularDistanceHW<float>;
-template class AngularDistanceHW<double>;
-
-
template<typename FloatType>
class BoundAngularDistance : public BoundDistanceFunction {
private:
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
index caed5d8c216..50d3e617cf4 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -13,69 +13,10 @@ namespace search::tensor {
/**
* Calculates angular distance between vectors
- */
-class AngularDistance : public DistanceFunction {
-public:
- AngularDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
- double convert_threshold(double threshold) const override {
- double cosine_similarity = cos(threshold);
- return 1.0 - cosine_similarity;
- }
- double to_rawscore(double distance) const override {
- double cosine_similarity = 1.0 - distance;
- // should be in the range [-1,1] but roundoff may cause problems:
- cosine_similarity = std::min(1.0, cosine_similarity);
- cosine_similarity = std::max(-1.0, cosine_similarity);
- double angle_distance = acos(cosine_similarity); // in range [0,pi]
- double score = 1.0 / (1.0 + angle_distance);
- return score;
- }
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double /*limit*/) const override
- {
- return calc(lhs, rhs);
- }
-};
-
-/**
- * Calculates angular distance between vectors
* Will use instruction optimal for the cpu it is running on
- * when both vectors have the expected cell type.
+ * after converting both vectors to an optimal cell type.
*/
template <typename FloatType>
-class AngularDistanceHW : public AngularDistance {
-public:
- AngularDistanceHW()
- : AngularDistance(vespalib::eval::get_cell_type<FloatType>()),
- _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
- {
- assert(expected_cell_type() == vespalib::eval::get_cell_type<FloatType>());
- }
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
- constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
- assert(lhs.type == expected && rhs.type == expected);
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- auto a = lhs_vector.data();
- auto b = rhs_vector.data();
- double a_norm_sq = _computer.dotProduct(a, a, sz);
- double b_norm_sq = _computer.dotProduct(b, b, sz);
- double squared_norms = a_norm_sq * b_norm_sq;
- double dot_product = _computer.dotProduct(a, b, sz);
- double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
- double cosine_similarity = dot_product / div;
- double distance = 1.0 - cosine_similarity; // in range [0,2]
- return distance;
- }
-private:
- const vespalib::hwaccelrated::IAccelrated & _computer;
-};
-
-template <typename FloatType>
class AngularDistanceFunctionFactory : public DistanceFunctionFactory {
public:
AngularDistanceFunctionFactory() = default;
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h
index 443191a272c..31a837d17f4 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h
@@ -20,34 +20,4 @@ public:
virtual double to_rawscore(double distance) const = 0;
};
-/**
- * Interface used to calculate the distance between two n-dimensional vectors.
- *
- * The vectors must be of same size and same cell type (float or double).
- * The actual implementation must know which type the vectors are.
- */
-class DistanceFunction : public DistanceConverter {
-private:
- vespalib::eval::CellType _expect_cell_type;
-public:
- using UP = std::unique_ptr<DistanceFunction>;
-
- DistanceFunction(vespalib::eval::CellType expected) : _expect_cell_type(expected) {}
-
- virtual ~DistanceFunction() = default;
-
- // input (query) vectors must be converted to this cell type:
- vespalib::eval::CellType expected_cell_type() const {
- return _expect_cell_type;
- }
-
- // calculate internal distance (comparable)
- virtual double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const = 0;
-
- // calculate internal distance, early return allowed if > limit
- virtual double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double limit) const = 0;
-};
-
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index 3a4bd94de23..bec0f430a51 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -15,79 +15,6 @@ using vespalib::eval::ValueType;
namespace search::tensor {
-DistanceFunction::UP
-make_distance_function(DistanceMetric variant, CellType cell_type)
-{
- switch (variant) {
- case DistanceMetric::Euclidean:
- switch (cell_type) {
- case CellType::FLOAT: return std::make_unique<SquaredEuclideanDistanceHW<float>>();
- case CellType::DOUBLE: return std::make_unique<SquaredEuclideanDistanceHW<double>>();
- case CellType::INT8: return std::make_unique<SquaredEuclideanDistanceHW<vespalib::eval::Int8Float>>();
- default: return std::make_unique<SquaredEuclideanDistance>(CellType::FLOAT);
- }
- case DistanceMetric::Angular:
- switch (cell_type) {
- case CellType::FLOAT: return std::make_unique<AngularDistanceHW<float>>();
- case CellType::DOUBLE: return std::make_unique<AngularDistanceHW<double>>();
- default: return std::make_unique<AngularDistance>(CellType::FLOAT);
- }
- case DistanceMetric::GeoDegrees:
- return std::make_unique<GeoDegreesDistance>(CellType::DOUBLE);
- case DistanceMetric::PrenormalizedAngular:
- case DistanceMetric::InnerProduct:
- switch (cell_type) {
- case CellType::FLOAT: return std::make_unique<InnerProductDistanceHW<float>>();
- case CellType::DOUBLE: return std::make_unique<InnerProductDistanceHW<double>>();
- default: return std::make_unique<InnerProductDistance>(CellType::FLOAT);
- }
- case DistanceMetric::Hamming:
- return std::make_unique<HammingDistance>(cell_type);
- }
- // not reached:
- return DistanceFunction::UP();
-}
-
-
-class SimpleBoundDistanceFunction : public BoundDistanceFunction {
- const vespalib::eval::TypedCells _lhs;
- const DistanceFunction &_df;
-public:
- SimpleBoundDistanceFunction(const vespalib::eval::TypedCells& lhs,
- const DistanceFunction &df)
- : _lhs(lhs),
- _df(df)
- {}
-
- double calc(const vespalib::eval::TypedCells& rhs) const override {
- return _df.calc(_lhs, rhs);
- }
- double convert_threshold(double threshold) const override {
- return _df.convert_threshold(threshold);
- }
- double to_rawscore(double distance) const override {
- return _df.to_rawscore(distance);
- }
- double calc_with_limit(const vespalib::eval::TypedCells& rhs, double limit) const override {
- return _df.calc_with_limit(_lhs, rhs, limit);
- }
-};
-
-class SimpleDistanceFunctionFactory : public DistanceFunctionFactory {
- DistanceFunction::UP _df;
-public:
- SimpleDistanceFunctionFactory(DistanceFunction::UP df)
- : _df(std::move(df))
- {}
-
- BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override {
- return std::make_unique<SimpleBoundDistanceFunction>(lhs, *_df);
- }
- BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override {
- return std::make_unique<SimpleBoundDistanceFunction>(lhs, *_df);
- }
-};
-
std::unique_ptr<DistanceFunctionFactory>
make_distance_function_factory(search::attribute::DistanceMetric variant,
vespalib::eval::CellType cell_type)
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
index 433c8e1962a..a69c7cff739 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h
@@ -23,13 +23,9 @@ struct DistanceFunctionFactory {
};
/**
- * Create a distance function object customized for the given metric
+ * Create a distance function factory customized for the given metric
* variant and (attribute) cell type.
**/
-DistanceFunction::UP
-make_distance_function(search::attribute::DistanceMetric variant,
- vespalib::eval::CellType cell_type);
-
DistanceFunctionFactory::UP
make_distance_function_factory(search::attribute::DistanceMetric variant,
vespalib::eval::CellType cell_type);
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
index 2300dba2db1..ec401b9313d 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
@@ -7,5 +7,4 @@
#include "euclidean_distance.h"
#include "geo_degrees_distance.h"
#include "hamming_distance.h"
-#include "inner_product_distance.h"
#include "prenormalized_angular_distance.h"
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index a98b37cb6cc..1b8924ab351 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -30,25 +30,6 @@ struct CalcEuclidean {
}
-double
-SquaredEuclideanDistance::calc(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs) const
-{
- return typify_invoke<2,TypifyCellType,CalcEuclidean>(lhs.type, rhs.type, lhs, rhs);
-}
-
-double
-SquaredEuclideanDistance::calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double) const
-{
- // maybe optimize this:
- return typify_invoke<2,TypifyCellType,CalcEuclidean>(lhs.type, rhs.type, lhs, rhs);
-}
-
-template class SquaredEuclideanDistanceHW<float>;
-template class SquaredEuclideanDistanceHW<double>;
-
using vespalib::eval::Int8Float;
using vespalib::BFloat16;
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
index 06295caee9e..fc663c50ccd 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
@@ -12,75 +12,10 @@ namespace search::tensor {
/**
* Calculates the square of the standard Euclidean distance.
- */
-class SquaredEuclideanDistance : public DistanceFunction {
-public:
- SquaredEuclideanDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double limit) const override;
- double convert_threshold(double threshold) const override {
- return threshold*threshold;
- }
- double to_rawscore(double distance) const override {
- double d = sqrt(distance);
- double score = 1.0 / (1.0 + d);
- return score;
- }
-};
-
-/**
- * Calculates the square of the standard Euclidean distance.
* Will use instruction optimal for the cpu it is running on
- * when both vectors have the expected cell type.
+ * after converting both vectors to an optimal cell type.
*/
template <typename FloatType>
-class SquaredEuclideanDistanceHW : public SquaredEuclideanDistance {
-public:
- SquaredEuclideanDistanceHW()
- : SquaredEuclideanDistance(vespalib::eval::get_cell_type<FloatType>()),
- _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
- {
- assert(expected_cell_type() == vespalib::eval::get_cell_type<FloatType>());
- }
-
- static const double *cast(const double * p) { return p; }
- static const float *cast(const float * p) { return p; }
- static const int8_t *cast(const vespalib::eval::Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
- constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
- assert(lhs.type == expected && rhs.type == expected);
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- return _computer.squaredEuclideanDistance(cast(&lhs_vector[0]), cast(&rhs_vector[0]), sz);
- }
-
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double limit) const override
- {
- constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
- assert(lhs.type == expected && rhs.type == expected);
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- double sum = 0.0;
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- for (size_t i = 0; i < sz && sum <= limit; ++i) {
- double diff = lhs_vector[i] - rhs_vector[i];
- sum += diff*diff;
- }
- return sum;
- }
-private:
- const vespalib::hwaccelrated::IAccelrated & _computer;
-};
-
-
-template <typename FloatType>
class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory {
public:
EuclideanDistanceFunctionFactory() = default;
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
index 38ba8205c90..0212830efb6 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
@@ -8,54 +8,28 @@ using vespalib::eval::TypifyCellType;
namespace search::tensor {
-namespace {
-
-struct CalcGeoDegrees {
- template <typename LCT, typename RCT>
- static double invoke(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs)
- {
- auto lhs_vector = lhs.unsafe_typify<LCT>();
- auto rhs_vector = rhs.unsafe_typify<RCT>();
-
- assert(2 == lhs_vector.size());
- assert(2 == rhs_vector.size());
- // convert to radians:
- double lat_A = lhs_vector[0] * GeoDegreesDistance::degrees_to_radians;
- double lat_B = rhs_vector[0] * GeoDegreesDistance::degrees_to_radians;
- double lon_A = lhs_vector[1] * GeoDegreesDistance::degrees_to_radians;
- double lon_B = rhs_vector[1] * GeoDegreesDistance::degrees_to_radians;
-
- double lat_diff = lat_A - lat_B;
- double lon_diff = lon_A - lon_B;
-
- // haversines of differences:
- double hav_lat = GeoDegreesDistance::hav(lat_diff);
- double hav_lon = GeoDegreesDistance::hav(lon_diff);
-
- // haversine of central angle between the two points:
- double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon;
- return hav_central_angle;
- }
-};
-
-}
-
-double
-GeoDegreesDistance::calc(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs) const
-{
- return typify_invoke<2,TypifyCellType,CalcGeoDegrees>(lhs.type, rhs.type, lhs, rhs);
-}
-
-using vespalib::eval::TypedCells;
-
+/**
+ * Calculates great-circle distance between Latitude/Longitude pairs,
+ * measured in degrees. Output distance is measured in kilometers.
+ * Uses the haversine formula directly from:
+ * https://en.wikipedia.org/wiki/Haversine_formula
+ **/
class BoundGeoDistance : public BoundDistanceFunction {
private:
mutable TemporaryVectorStore<double> _tmpSpace;
const vespalib::ConstArrayRef<double> _lh_vector;
- static GeoDegreesDistance _g_d_helper;
public:
+ // in km, as defined by IUGG, see:
+ // https://en.wikipedia.org/wiki/Earth_radius#Mean_radius
+ static constexpr double earth_mean_radius = 6371.0088;
+ static constexpr double degrees_to_radians = M_PI / 180.0;
+
+ // haversine function:
+ static double haversine(double angle) {
+ double s = sin(0.5*angle);
+ return s*s;
+ }
+
BoundGeoDistance(const vespalib::eval::TypedCells& lhs)
: _tmpSpace(lhs.size),
_lh_vector(_tmpSpace.storeLhs(lhs))
@@ -65,27 +39,33 @@ public:
assert(2 == _lh_vector.size());
assert(2 == rhs_vector.size());
// convert to radians:
- double lat_A = _lh_vector[0] * GeoDegreesDistance::degrees_to_radians;
- double lat_B = rhs_vector[0] * GeoDegreesDistance::degrees_to_radians;
- double lon_A = _lh_vector[1] * GeoDegreesDistance::degrees_to_radians;
- double lon_B = rhs_vector[1] * GeoDegreesDistance::degrees_to_radians;
+ double lat_A = _lh_vector[0] * degrees_to_radians;
+ double lat_B = rhs_vector[0] * degrees_to_radians;
+ double lon_A = _lh_vector[1] * degrees_to_radians;
+ double lon_B = rhs_vector[1] * degrees_to_radians;
double lat_diff = lat_A - lat_B;
double lon_diff = lon_A - lon_B;
// haversines of differences:
- double hav_lat = GeoDegreesDistance::hav(lat_diff);
- double hav_lon = GeoDegreesDistance::hav(lon_diff);
+ double hav_lat = haversine(lat_diff);
+ double hav_lon = haversine(lon_diff);
// haversine of central angle between the two points:
double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon;
return hav_central_angle;
}
double convert_threshold(double threshold) const override {
- return _g_d_helper.convert_threshold(threshold);
+ double half_angle = threshold / (2 * earth_mean_radius);
+ double rt_hav = sin(half_angle);
+ return rt_hav * rt_hav;
}
double to_rawscore(double distance) const override {
- return _g_d_helper.to_rawscore(distance);
+ double hav_diff = sqrt(distance);
+ // distance in kilometers:
+ double d = 2 * asin(hav_diff) * earth_mean_radius;
+ // km to rawscore:
+ return 1.0 / (1.0 + d);
}
double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override {
return calc(rhs);
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
index 56f75f6daed..801ee56aacf 100644
--- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
@@ -13,44 +13,9 @@ namespace search::tensor {
/**
* Calculates great-circle distance between Latitude/Longitude pairs,
- * measured in degrees. Output distance is measured in meters.
- * Uses the haversine formula directly from:
- * https://en.wikipedia.org/wiki/Haversine_formula
+ * where input is given as degrees.
+ * Output distance is measured in kilometers.
**/
-class GeoDegreesDistance : public DistanceFunction {
-public:
- // in km, as defined by IUGG, see:
- // https://en.wikipedia.org/wiki/Earth_radius#Mean_radius
- static constexpr double earth_mean_radius = 6371.0088;
- static constexpr double degrees_to_radians = M_PI / 180.0;
-
- GeoDegreesDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
- // haversine function:
- static double hav(double angle) {
- double s = sin(0.5*angle);
- return s*s;
- }
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
- double convert_threshold(double threshold) const override {
- double half_angle = threshold / (2 * earth_mean_radius);
- double rt_hav = sin(half_angle);
- return rt_hav * rt_hav;
- }
- double to_rawscore(double distance) const override {
- double hav_diff = sqrt(distance);
- // distance in kilometers:
- double d = 2 * asin(hav_diff) * earth_mean_radius;
- // km to rawscore:
- return 1.0 / (1.0 + d);
- }
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double /*limit*/) const override
- {
- return calc(lhs, rhs);
- }
-};
-
class GeoDistanceFunctionFactory : public DistanceFunctionFactory {
public:
GeoDistanceFunctionFactory() = default;
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
index f4f6842715f..5b6528ef403 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -30,29 +30,6 @@ struct CalcHamming {
}
-double
-HammingDistance::calc(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs) const
-{
- constexpr auto expected = vespalib::eval::CellType::INT8;
- if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) {
- size_t sz = lhs.size;
- assert(sz == rhs.size);
- return (double) vespalib::binary_hamming_distance(lhs.data, rhs.data, sz);
- } else {
- return typify_invoke<2,TypifyCellType,CalcHamming>(lhs.type, rhs.type, lhs, rhs);
- }
-}
-
-double
-HammingDistance::calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double) const
-{
- // consider optimizing:
- return calc(lhs, rhs);
-}
-
using vespalib::eval::Int8Float;
template<typename FloatType>
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
index 3022e0da503..47b022afc13 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -16,20 +16,6 @@ namespace search::tensor {
* or (for int8 cells, aka binary data only)
* "number of bits that are different"
*/
-class HammingDistance final : public DistanceFunction {
-public:
- HammingDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
- double convert_threshold(double threshold) const override {
- return threshold;
- }
- double to_rawscore(double distance) const override {
- double score = 1.0 / (1.0 + distance);
- return score;
- }
- double calc_with_limit(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs, double) const override;
-};
-
template <typename FloatType>
class HammingDistanceFunctionFactory : public DistanceFunctionFactory {
public:
diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp
deleted file mode 100644
index 2e8c970ba91..00000000000
--- a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "inner_product_distance.h"
-
-using vespalib::typify_invoke;
-using vespalib::eval::TypifyCellType;
-
-namespace search::tensor {
-
-namespace {
-
-struct CalcInnerProduct {
- template <typename LCT, typename RCT>
- static double invoke(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs)
- {
- auto lhs_vector = lhs.unsafe_typify<LCT>();
- auto rhs_vector = rhs.unsafe_typify<RCT>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- double dot_product = 0.0;
- for (size_t i = 0; i < sz; ++i) {
- double a = lhs_vector[i];
- double b = rhs_vector[i];
- dot_product += a*b;
- }
- double score = 1.0 - dot_product; // in range [0,2]
- return std::max(0.0, score);
- }
-};
-
-}
-
-double
-InnerProductDistance::calc(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs) const
-{
- return typify_invoke<2,TypifyCellType,CalcInnerProduct>(lhs.type, rhs.type, lhs, rhs);
-}
-
-template class InnerProductDistanceHW<float>;
-template class InnerProductDistanceHW<double>;
-
-}
diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h
deleted file mode 100644
index 135bb186fd4..00000000000
--- a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "distance_function.h"
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <cmath>
-
-namespace search::tensor {
-
-/**
- * Calculates inner-product "distance" between vectors with assumed norm 1.
- * Should give same ordering as Angular distance, but is less expensive.
- */
-class InnerProductDistance : public DistanceFunction {
-public:
- InnerProductDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {}
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
- double convert_threshold(double threshold) const override {
- return threshold;
- }
- double to_rawscore(double distance) const override {
- double score = 1.0 / (1.0 + distance);
- return score;
- }
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double /*limit*/) const override
- {
- return calc(lhs, rhs);
- }
-};
-
-/**
- * Calculates inner-product "distance" between vectors with assumed norm 1.
- * Should give same ordering as Angular distance, but is less expensive.
- * Will use instruction optimal for the cpu it is running on
- * when both vectors have the expected cell type.
- */
-template <typename FloatType>
-class InnerProductDistanceHW : public InnerProductDistance {
-public:
- InnerProductDistanceHW()
- : InnerProductDistance(vespalib::eval::get_cell_type<FloatType>()),
- _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
- {
- assert(expected_cell_type() == vespalib::eval::get_cell_type<FloatType>());
- }
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
- constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
- assert(lhs.type == expected && rhs.type == expected);
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- double score = 1.0 - _computer.dotProduct(lhs_vector.data(), rhs_vector.data(), sz);
- return std::max(0.0, score);
- }
-private:
- const vespalib::hwaccelrated::IAccelrated & _computer;
-};
-
-}
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
index be1699c36ad..b91efbddb3e 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
@@ -11,7 +11,7 @@
namespace search::tensor {
/**
- * Calculates inner-product "distance" between vectors with assumed norm 1.
+ * Calculates inner-product "distance" between vectors assuming a common norm.
* Should give same ordering as Angular distance, but is less expensive.
*/
template <typename FloatType>
diff --git a/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp b/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp
index b64d477fd4c..2cd9017653f 100644
--- a/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp
+++ b/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp
@@ -57,7 +57,6 @@ public:
vsm::test::MockFieldSearcherEnv env;
ValueType tensor_type;
TensorDataType data_type;
- SquaredEuclideanDistance dist_func;
vsm::FieldIdT field_id;
NearestNeighborFieldSearcher searcher;
MockQuery query;
@@ -66,7 +65,6 @@ public:
: env(),
tensor_type(ValueType::from_spec("tensor(x[2])")),
data_type(tensor_type),
- dist_func(CellType::DOUBLE),
field_id(2),
searcher(field_id, DistanceMetric::Euclidean),
query()
@@ -97,7 +95,7 @@ public:
expect_match(exp_square_distance, node);
}
void expect_match(double exp_square_distance, const NearestNeighborQueryNode& node) {
- double exp_raw_score = dist_func.to_rawscore(exp_square_distance);
+ double exp_raw_score = 1.0 / (1.0 + std::sqrt(exp_square_distance));
EXPECT_TRUE(node.evaluate());
EXPECT_DOUBLE_EQ(exp_square_distance, node.get_distance().value());
EXPECT_DOUBLE_EQ(exp_raw_score, node.get_raw_score().value());
diff --git a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
index db4ee12438e..d4aee8ad652 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp
@@ -21,7 +21,6 @@ using search::attribute::Config;
using search::fef::QueryValue;
using search::tensor::DistanceCalculator;
using search::tensor::TensorExtAttribute;
-using search::tensor::make_distance_function;
using vespalib::eval::ValueType;
namespace {