summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2021-04-12 11:22:25 +0000
committerArne Juul <arnej@verizonmedia.com>2021-04-12 11:22:25 +0000
commit05391413632841596bd0cd8e40389a185461a0af (patch)
tree4ed11de18d5a2ace1f4822e6edfb039e554ab4eb
parent791c4b163669d5ef8ea671be1efacb89655d3935 (diff)
fix NNS distance for new cell types
This reverts commit f167fe4362c5e4e20a7605b99205cfbee77c569a.
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp2
-rw-r--r--searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp6
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp3
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.cpp52
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.h76
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp61
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_functions.cpp22
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_functions.h247
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp51
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.h59
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp50
-rw-r--r--searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h53
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp61
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.h38
-rw-r--r--searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp44
-rw-r--r--searchlib/src/vespa/searchlib/tensor/inner_product_distance.h64
21 files changed, 609 insertions, 321 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index f1d910f2635..2a509031e24 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -230,7 +230,7 @@ public:
const search::tensor::DistanceFunction *distance_function() const override {
- static search::tensor::SquaredEuclideanDistance<double> my_dist_fun;
+ static search::tensor::SquaredEuclideanDistance my_dist_fun;
return &my_dist_fun;
}
};
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
index ee0a2aff80e..6f8c9b4c885 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
@@ -10,6 +10,7 @@
LOG_SETUP("distance_function_test");
using namespace search::tensor;
+using vespalib::eval::Int8Float;
using vespalib::eval::TypedCells;
using search::attribute::DistanceMetric;
@@ -212,6 +213,11 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score)
EXPECT_DOUBLE_EQ(threshold, 0.5);
threshold = hamming->convert_threshold(1.0);
EXPECT_DOUBLE_EQ(threshold, 1.0);
+
+ std::vector<Int8Float> bytes_a = { 0, 1, 2, 4, 8, 16, 32, 64, -128, 0, 1, 2, 4, 8, 16, 32, 64, -128, 0, 1, 2 };
+ std::vector<Int8Float> bytes_b = { 1, 2, 2, 4, 8, 16, 32, 65, -128, 0, 1, 0, 4, 8, 16, 32, 64, -128, 0, 1, -1 };
+ // expect diff: 1 2 1 1 7
+ EXPECT_EQ(hamming->calc(TypedCells(bytes_a), TypedCells(bytes_b)), 12.0);
}
TEST(GeoDegreesTest, gives_expected_score)
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 20dc55df329..6ffe118aa65 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -51,7 +51,6 @@ struct LevelGenerator : public RandomLevelGenerator {
};
using FloatVectors = MyDocVectorAccess<float>;
-using FloatSqEuclideanDistance = SquaredEuclideanDistance<float>;
using HnswIndexUP = std::unique_ptr<HnswIndex>;
class HnswIndexTest : public ::testing::Test {
@@ -79,7 +78,7 @@ public:
void init(bool heuristic_select_neighbors) {
auto generator = std::make_unique<LevelGenerator>();
level_generator = generator.get();
- index = std::make_unique<HnswIndex>(vectors, std::make_unique<FloatSqEuclideanDistance>(),
+ index = std::make_unique<HnswIndex>(vectors, std::make_unique<SquaredEuclideanDistance>(),
std::move(generator),
HnswIndex::Config(5, 2, 10, 0, heuristic_select_neighbors));
}
diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
index 8e3bb95a776..7acdb4df983 100644
--- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp
@@ -117,7 +117,6 @@ public:
}
};
-using FloatSqEuclideanDistance = SquaredEuclideanDistance<float>;
using HnswIndexUP = std::unique_ptr<HnswIndex>;
class Stressor : public ::testing::Test {
@@ -232,7 +231,7 @@ public:
void init() {
uint32_t m = 16;
- index = std::make_unique<HnswIndex>(vectors, std::make_unique<FloatSqEuclideanDistance>(),
+ index = std::make_unique<HnswIndex>(vectors, std::make_unique<SquaredEuclideanDistance>(),
std::make_unique<InvLogLevelGenerator>(m),
HnswIndex::Config(2*m, m, 200, 10, true));
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
index 4ec23b993b6..8012c48a04a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
@@ -65,9 +65,12 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f
{
auto lct = _query_tensor->cells().type;
auto rct = _attr_tensor.getTensorType().cell_type();
- using MyTypify = vespalib::eval::TypifyCellType;
- auto fixup_fun = vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(lct, rct);
- fixup_fun(_query_tensor, _attr_tensor.getTensorType());
+ if (rct == vespalib::eval::CellType::FLOAT || rct == vespalib::eval::CellType::DOUBLE) {
+ // avoid downcasting to bfloat16 etc, that is just extra work
+ using MyTypify = vespalib::eval::TypifyCellType;
+ auto fixup_fun = vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(lct, rct);
+ fixup_fun(_query_tensor, _attr_tensor.getTensorType());
+ }
_fallback_dist_fun = search::tensor::make_distance_function(_attr_tensor.distance_metric(), rct);
_dist_fun = _fallback_dist_fun.get();
assert(_dist_fun);
@@ -123,18 +126,13 @@ NearestNeighborBlueprint::perform_top_k()
{
auto nns_index = _attr_tensor.nearest_neighbor_index();
if (_approximate && nns_index) {
- auto lhs_type = _query_tensor->type();
- auto rhs_type = _attr_tensor.getTensorType();
- // different cell types should be converted already
- if (lhs_type == rhs_type) {
- auto lhs = _query_tensor->cells();
- uint32_t k = _target_num_hits;
- if (_global_filter->has_filter()) {
- auto filter = _global_filter->filter();
- _found_hits = nns_index->find_top_k_with_filter(k, lhs, *filter, k + _explore_additional_hits, _distance_threshold);
- } else {
- _found_hits = nns_index->find_top_k(k, lhs, k + _explore_additional_hits, _distance_threshold);
- }
+ auto lhs = _query_tensor->cells();
+ uint32_t k = _target_num_hits;
+ if (_global_filter->has_filter()) {
+ auto filter = _global_filter->filter();
+ _found_hits = nns_index->find_top_k_with_filter(k, lhs, *filter, k + _explore_additional_hits, _distance_threshold);
+ } else {
+ _found_hits = nns_index->find_top_k(k, lhs, k + _explore_additional_hits, _distance_threshold);
}
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
index d85da49c2f7..6379a06e6d6 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_iterator.cpp
@@ -16,7 +16,7 @@ bool
is_compatible(const vespalib::eval::ValueType& lhs,
const vespalib::eval::ValueType& rhs)
{
- return (lhs == rhs);
+ return (lhs.dimensions() == rhs.dimensions());
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index 8af77a57a44..46bafa189e1 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -1,6 +1,7 @@
# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(searchlib_tensor OBJECT
SOURCES
+ angular_distance.cpp
default_nearest_neighbor_index_factory.cpp
dense_tensor_attribute.cpp
dense_tensor_attribute_saver.cpp
@@ -9,13 +10,16 @@ vespa_add_library(searchlib_tensor OBJECT
direct_tensor_saver.cpp
direct_tensor_store.cpp
distance_function_factory.cpp
- distance_functions.cpp
+ euclidean_distance.cpp
+ geo_degrees_distance.cpp
+ hamming_distance.cpp
hnsw_graph.cpp
hnsw_index.cpp
hnsw_index_loader.cpp
hnsw_index_saver.cpp
imported_tensor_attribute_vector.cpp
imported_tensor_attribute_vector_read_guard.cpp
+ inner_product_distance.cpp
inv_log_level_generator.cpp
nearest_neighbor_index.cpp
nearest_neighbor_index_saver.cpp
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
new file mode 100644
index 00000000000..21b2622283c
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -0,0 +1,52 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "angular_distance.h"
+
+using vespalib::typify_invoke;
+using vespalib::eval::TypifyCellType;
+
+namespace search::tensor {
+
+namespace {
+
+struct CalcAngular {
+ template <typename LCT, typename RCT>
+ static double invoke(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs)
+ {
+ auto lhs_vector = lhs.unsafe_typify<LCT>();
+ auto rhs_vector = rhs.unsafe_typify<RCT>();
+
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ double a_norm_sq = 0.0;
+ double b_norm_sq = 0.0;
+ double dot_product = 0.0;
+ for (size_t i = 0; i < sz; ++i) {
+ double a = lhs_vector[i];
+ double b = rhs_vector[i];
+ a_norm_sq += a*a;
+ b_norm_sq += b*b;
+ dot_product += a*b;
+ }
+ double squared_norms = a_norm_sq * b_norm_sq;
+ double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
+ double cosine_similarity = dot_product / div;
+ double distance = 1.0 - cosine_similarity; // in range [0,2]
+ return std::max(0.0, distance);
+ }
+};
+
+}
+
+double
+AngularDistance::calc(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs) const
+{
+ return typify_invoke<2,TypifyCellType,CalcAngular>(lhs.type, rhs.type, lhs, rhs);
+}
+
+template class AngularDistanceHW<float>;
+template class AngularDistanceHW<double>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
new file mode 100644
index 00000000000..c480ba2879e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -0,0 +1,76 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "distance_function.h"
+#include <vespa/eval/eval/typed_cells.h>
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include <cmath>
+
+namespace search::tensor {
+
+/**
+ * Calculates angular distance between vectors
+ */
+class AngularDistance : public DistanceFunction {
+public:
+ AngularDistance() {}
+ double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
+ double convert_threshold(double threshold) const override {
+ double cosine_similarity = cos(threshold);
+ return 1.0 - cosine_similarity;
+ }
+ double to_rawscore(double distance) const override {
+ double cosine_similarity = 1.0 - distance;
+ // should be in in range [-1,1] but roundoff may cause problems:
+ cosine_similarity = std::min(1.0, cosine_similarity);
+ cosine_similarity = std::max(-1.0, cosine_similarity);
+ double angle_distance = acos(cosine_similarity); // in range [0,pi]
+ double score = 1.0 / (1.0 + angle_distance);
+ return score;
+ }
+ double calc_with_limit(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs,
+ double /*limit*/) const override
+ {
+ return calc(lhs, rhs);
+ }
+};
+
+/**
+ * Calculates angular distance between vectors
+ * Will use instruction optimal for the cpu it is running on
+ * when both vectors have the expected cell type.
+ */
+template <typename FloatType>
+class AngularDistanceHW : public AngularDistance {
+public:
+ AngularDistanceHW()
+ : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
+ {}
+ double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
+ constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
+ if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) {
+ auto lhs_vector = lhs.unsafe_typify<FloatType>();
+ auto rhs_vector = rhs.unsafe_typify<FloatType>();
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ auto a = &lhs_vector[0];
+ auto b = &rhs_vector[0];
+ double a_norm_sq = _computer.dotProduct(a, a, sz);
+ double b_norm_sq = _computer.dotProduct(b, b, sz);
+ double squared_norms = a_norm_sq * b_norm_sq;
+ double dot_product = _computer.dotProduct(a, b, sz);
+ double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
+ double cosine_similarity = dot_product / div;
+ double distance = 1.0 - cosine_similarity; // in range [0,2]
+ return distance;
+ } else {
+ return AngularDistance::calc(lhs, rhs);
+ }
+ }
+private:
+ const vespalib::hwaccelrated::IAccelrated & _computer;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h
index 724f83b6129..08f90fec041 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h
@@ -1,4 +1,4 @@
-// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index 81b27b56258..1d58c01fd99 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -2,6 +2,10 @@
#include "distance_function_factory.h"
#include "distance_functions.h"
+#include <vespa/vespalib/util/typify.h>
+#include <vespa/log/log.h>
+
+LOG_SETUP(".searchlib.tensor.distance_function_factory");
using search::attribute::DistanceMetric;
using vespalib::eval::CellType;
@@ -13,41 +17,28 @@ DistanceFunction::UP
make_distance_function(DistanceMetric variant, CellType cell_type)
{
switch (variant) {
- case DistanceMetric::Euclidean:
- if (cell_type == CellType::FLOAT) {
- return std::make_unique<SquaredEuclideanDistance<float>>();
- } else {
- return std::make_unique<SquaredEuclideanDistance<double>>();
- }
- break;
- case DistanceMetric::Angular:
- if (cell_type == CellType::FLOAT) {
- return std::make_unique<AngularDistance<float>>();
- } else {
- return std::make_unique<AngularDistance<double>>();
- }
- break;
- case DistanceMetric::GeoDegrees:
- if (cell_type == CellType::FLOAT) {
- return std::make_unique<GeoDegreesDistance<float>>();
- } else {
- return std::make_unique<GeoDegreesDistance<double>>();
- }
- break;
- case DistanceMetric::InnerProduct:
- if (cell_type == CellType::FLOAT) {
- return std::make_unique<InnerProductDistance<float>>();
- } else {
- return std::make_unique<InnerProductDistance<double>>();
- }
- break;
- case DistanceMetric::Hamming:
- if (cell_type == CellType::FLOAT) {
- return std::make_unique<HammingDistance<float>>();
- } else {
- return std::make_unique<HammingDistance<double>>();
- }
- break;
+ case DistanceMetric::Euclidean:
+ switch (cell_type) {
+ case CellType::FLOAT: return std::make_unique<SquaredEuclideanDistanceHW<float>>();
+ case CellType::DOUBLE: return std::make_unique<SquaredEuclideanDistanceHW<double>>();
+ default: return std::make_unique<SquaredEuclideanDistance>();
+ }
+ case DistanceMetric::Angular:
+ switch (cell_type) {
+ case CellType::FLOAT: return std::make_unique<AngularDistanceHW<float>>();
+ case CellType::DOUBLE: return std::make_unique<AngularDistanceHW<double>>();
+ default: return std::make_unique<AngularDistance>();
+ }
+ case DistanceMetric::GeoDegrees:
+ return std::make_unique<GeoDegreesDistance>();
+ case DistanceMetric::InnerProduct:
+ switch (cell_type) {
+ case CellType::FLOAT: return std::make_unique<InnerProductDistanceHW<float>>();
+ case CellType::DOUBLE: return std::make_unique<InnerProductDistanceHW<double>>();
+ default: return std::make_unique<InnerProductDistance>();
+ }
+ case DistanceMetric::Hamming:
+ return std::make_unique<HammingDistance>();
}
// not reached:
return DistanceFunction::UP();
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp b/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp
deleted file mode 100644
index 8cf3a95db19..00000000000
--- a/searchlib/src/vespa/searchlib/tensor/distance_functions.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "distance_functions.h"
-
-namespace search::tensor {
-
-template class SquaredEuclideanDistance<float>;
-template class SquaredEuclideanDistance<double>;
-
-template class AngularDistance<float>;
-template class AngularDistance<double>;
-
-template class InnerProductDistance<float>;
-template class InnerProductDistance<double>;
-
-template class GeoDegreesDistance<float>;
-template class GeoDegreesDistance<double>;
-
-template class HammingDistance<float>;
-template class HammingDistance<double>;
-
-}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
index 2557a51e0d7..8ad8c07bd3c 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
@@ -1,245 +1,10 @@
-// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
#include "distance_function.h"
-#include <vespa/eval/eval/typed_cells.h>
-#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
-#include <cmath>
-
-namespace search::tensor {
-
-/**
- * Calculates the square of the standard Euclidean distance.
- * Will use instruction optimal for the cpu it is running on.
- */
-template <typename FloatType>
-class SquaredEuclideanDistance : public DistanceFunction {
-public:
- SquaredEuclideanDistance()
- : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
- {}
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- return _computer.squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz);
- }
- double convert_threshold(double threshold) const override {
- return threshold*threshold;
- }
- double to_rawscore(double distance) const override {
- double d = sqrt(distance);
- double score = 1.0 / (1.0 + d);
- return score;
- }
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double limit) const override
- {
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- double sum = 0.0;
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- for (size_t i = 0; i < sz && sum <= limit; ++i) {
- double diff = lhs_vector[i] - rhs_vector[i];
- sum += diff*diff;
- }
- return sum;
- }
-
- const vespalib::hwaccelrated::IAccelrated & _computer;
-};
-
-/**
- * Calculates angular distance between vectors
- */
-template <typename FloatType>
-class AngularDistance : public DistanceFunction {
-public:
- AngularDistance()
- : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
- {}
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- auto a = &lhs_vector[0];
- auto b = &rhs_vector[0];
- double a_norm_sq = _computer.dotProduct(a, a, sz);
- double b_norm_sq = _computer.dotProduct(b, b, sz);
- double squared_norms = a_norm_sq * b_norm_sq;
- double dot_product = _computer.dotProduct(a, b, sz);
- double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
- double cosine_similarity = dot_product / div;
- double distance = 1.0 - cosine_similarity; // in range [0,2]
- return distance;
- }
- double convert_threshold(double threshold) const override {
- double cosine_similarity = cos(threshold);
- return 1.0 - cosine_similarity;
- }
- double to_rawscore(double distance) const override {
- double cosine_similarity = 1.0 - distance;
- // should be in in range [-1,1] but roundoff may cause problems:
- cosine_similarity = std::min(1.0, cosine_similarity);
- cosine_similarity = std::max(-1.0, cosine_similarity);
- double angle_distance = acos(cosine_similarity); // in range [0,pi]
- double score = 1.0 / (1.0 + angle_distance);
- return score;
- }
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double /*limit*/) const override
- {
- return calc(lhs, rhs);
- }
-
- const vespalib::hwaccelrated::IAccelrated & _computer;
-};
-
-/**
- * Calculates inner-product "distance" between vectors with assumed norm 1.
- * Should give same ordering as Angular distance, but is less expensive.
- */
-template <typename FloatType>
-class InnerProductDistance : public DistanceFunction {
-public:
- InnerProductDistance()
- : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
- {}
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz);
- return std::max(0.0, score);
- }
- double convert_threshold(double threshold) const override {
- return threshold;
- }
- double to_rawscore(double distance) const override {
- double score = 1.0 / (1.0 + distance);
- return score;
- }
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double /*limit*/) const override
- {
- return calc(lhs, rhs);
- }
-
- const vespalib::hwaccelrated::IAccelrated & _computer;
-};
-
-/**
- * Calculates great-circle distance between Latitude/Longitude pairs,
- * measured in degrees. Output distance is measured in meters.
- * Uses the haversine formula directly from:
- * https://en.wikipedia.org/wiki/Haversine_formula
- **/
-template <typename FloatType>
-class GeoDegreesDistance : public DistanceFunction {
-public:
- // in km, as defined by IUGG, see:
- // https://en.wikipedia.org/wiki/Earth_radius#Mean_radius
- static constexpr double earth_mean_radius = 6371.0088;
- static constexpr double degrees_to_radians = M_PI / 180.0;
-
- GeoDegreesDistance() {}
- // haversine function:
- static double hav(double angle) {
- double s = sin(0.5*angle);
- return s*s;
- }
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- assert(2 == lhs_vector.size());
- assert(2 == rhs_vector.size());
- // convert to radians:
- double lat_A = lhs_vector[0] * degrees_to_radians;
- double lat_B = rhs_vector[0] * degrees_to_radians;
- double lon_A = lhs_vector[1] * degrees_to_radians;
- double lon_B = rhs_vector[1] * degrees_to_radians;
-
- double lat_diff = lat_A - lat_B;
- double lon_diff = lon_A - lon_B;
-
- // haversines of differences:
- double hav_lat = hav(lat_diff);
- double hav_lon = hav(lon_diff);
-
- // haversine of central angle between the two points:
- double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon;
- return hav_central_angle;
- }
- double convert_threshold(double threshold) const override {
- double half_angle = threshold / (2 * earth_mean_radius);
- double rt_hav = sin(half_angle);
- return rt_hav * rt_hav;
- }
- double to_rawscore(double distance) const override {
- double hav_diff = sqrt(distance);
- // distance in kilometers:
- double d = 2 * asin(hav_diff) * earth_mean_radius;
- // km to rawscore:
- return 1.0 / (1.0 + d);
- }
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double /*limit*/) const override
- {
- return calc(lhs, rhs);
- }
-
-};
-
-/**
- * Calculates the Hamming distance defined as
- * "number of cells where the values are different"
- */
-template <typename FloatType>
-class HammingDistance : public DistanceFunction {
-public:
- HammingDistance() {}
- double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- size_t sum = 0;
- for (size_t i = 0; i < sz; ++i) {
- sum += (lhs_vector[i] == rhs_vector[i]) ? 0 : 1;
- }
- return (double)sum;
- }
- double convert_threshold(double threshold) const override {
- return threshold;
- }
- double to_rawscore(double distance) const override {
- double score = 1.0 / (1.0 + distance);
- return score;
- }
- double calc_with_limit(const vespalib::eval::TypedCells& lhs,
- const vespalib::eval::TypedCells& rhs,
- double limit) const override
- {
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- size_t sum = 0;
- for (size_t i = 0; i < sz && sum <= limit; ++i) {
- sum += (lhs_vector[i] == rhs_vector[i]) ? 0 : 1;
- }
- return (double)sum;
- }
-};
-
-
-}
+#include "angular_distance.h"
+#include "euclidean_distance.h"
+#include "geo_degrees_distance.h"
+#include "hamming_distance.h"
+#include "inner_product_distance.h"
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
new file mode 100644
index 00000000000..223ceeed940
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -0,0 +1,51 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "euclidean_distance.h"
+
+using vespalib::typify_invoke;
+using vespalib::eval::TypifyCellType;
+
+namespace search::tensor {
+
+namespace {
+
+struct CalcEuclidean {
+ template <typename LCT, typename RCT>
+ static double invoke(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs)
+ {
+ auto lhs_vector = lhs.unsafe_typify<LCT>();
+ auto rhs_vector = rhs.unsafe_typify<RCT>();
+ double sum = 0.0;
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ for (size_t i = 0; i < sz; ++i) {
+ double diff = lhs_vector[i] - rhs_vector[i];
+ sum += diff*diff;
+ }
+ return sum;
+ }
+};
+
+}
+
+double
+SquaredEuclideanDistance::calc(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs) const
+{
+ return typify_invoke<2,TypifyCellType,CalcEuclidean>(lhs.type, rhs.type, lhs, rhs);
+}
+
+double
+SquaredEuclideanDistance::calc_with_limit(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs,
+ double) const
+{
+ // maybe optimize this:
+ return typify_invoke<2,TypifyCellType,CalcEuclidean>(lhs.type, rhs.type, lhs, rhs);
+}
+
+template class SquaredEuclideanDistanceHW<float>;
+template class SquaredEuclideanDistanceHW<double>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
new file mode 100644
index 00000000000..6d4d982834f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
@@ -0,0 +1,59 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "distance_function.h"
+#include <vespa/eval/eval/typed_cells.h>
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include <cmath>
+
+namespace search::tensor {
+
+/**
+ * Calculates the square of the standard Euclidean distance.
+ */
+class SquaredEuclideanDistance : public DistanceFunction {
+public:
+ SquaredEuclideanDistance() {}
+ double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
+ double calc_with_limit(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs,
+ double limit) const override;
+ double convert_threshold(double threshold) const override {
+ return threshold*threshold;
+ }
+ double to_rawscore(double distance) const override {
+ double d = sqrt(distance);
+ double score = 1.0 / (1.0 + d);
+ return score;
+ }
+};
+
+/**
+ * Calculates the square of the standard Euclidean distance.
+ * Will use instruction optimal for the cpu it is running on
+ * when both vectors have the expected cell type.
+ */
+template <typename FloatType>
+class SquaredEuclideanDistanceHW : public SquaredEuclideanDistance {
+public:
+ SquaredEuclideanDistanceHW()
+ : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
+ {}
+ double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
+ constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
+ if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) {
+ auto lhs_vector = lhs.unsafe_typify<FloatType>();
+ auto rhs_vector = rhs.unsafe_typify<FloatType>();
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ return _computer.squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz);
+ } else {
+ return SquaredEuclideanDistance::calc(lhs, rhs);
+ }
+ }
+private:
+ const vespalib::hwaccelrated::IAccelrated & _computer;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
new file mode 100644
index 00000000000..946caa0d2cb
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp
@@ -0,0 +1,50 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "geo_degrees_distance.h"
+
+using vespalib::typify_invoke;
+using vespalib::eval::TypifyCellType;
+
+namespace search::tensor {
+
+namespace {
+
+struct CalcGeoDegrees {
+ template <typename LCT, typename RCT>
+ static double invoke(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs)
+ {
+ auto lhs_vector = lhs.unsafe_typify<LCT>();
+ auto rhs_vector = rhs.unsafe_typify<RCT>();
+
+ assert(2 == lhs_vector.size());
+ assert(2 == rhs_vector.size());
+ // convert to radians:
+ double lat_A = lhs_vector[0] * GeoDegreesDistance::degrees_to_radians;
+ double lat_B = rhs_vector[0] * GeoDegreesDistance::degrees_to_radians;
+ double lon_A = lhs_vector[1] * GeoDegreesDistance::degrees_to_radians;
+ double lon_B = rhs_vector[1] * GeoDegreesDistance::degrees_to_radians;
+
+ double lat_diff = lat_A - lat_B;
+ double lon_diff = lon_A - lon_B;
+
+ // haversines of differences:
+ double hav_lat = GeoDegreesDistance::hav(lat_diff);
+ double hav_lon = GeoDegreesDistance::hav(lon_diff);
+
+ // haversine of central angle between the two points:
+ double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon;
+ return hav_central_angle;
+ }
+};
+
+}
+
+double
+GeoDegreesDistance::calc(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs) const
+{
+ return typify_invoke<2,TypifyCellType,CalcGeoDegrees>(lhs.type, rhs.type, lhs, rhs);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
new file mode 100644
index 00000000000..b8b9bec50d5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h
@@ -0,0 +1,53 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "distance_function.h"
+#include <vespa/eval/eval/typed_cells.h>
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include <vespa/vespalib/util/typify.h>
+#include <cmath>
+
+namespace search::tensor {
+
+/**
+ * Calculates great-circle distance between Latitude/Longitude pairs,
+ * measured in degrees. Output distance is measured in meters.
+ * Uses the haversine formula directly from:
+ * https://en.wikipedia.org/wiki/Haversine_formula
+ **/
+class GeoDegreesDistance : public DistanceFunction {
+public:
+ // in km, as defined by IUGG, see:
+ // https://en.wikipedia.org/wiki/Earth_radius#Mean_radius
+ static constexpr double earth_mean_radius = 6371.0088;
+ static constexpr double degrees_to_radians = M_PI / 180.0;
+
+ GeoDegreesDistance() {}
+ // haversine function:
+ static double hav(double angle) {
+ double s = sin(0.5*angle);
+ return s*s;
+ }
+ double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
+ double convert_threshold(double threshold) const override {
+ double half_angle = threshold / (2 * earth_mean_radius);
+ double rt_hav = sin(half_angle);
+ return rt_hav * rt_hav;
+ }
+ double to_rawscore(double distance) const override {
+ double hav_diff = sqrt(distance);
+ // distance in kilometers:
+ double d = 2 * asin(hav_diff) * earth_mean_radius;
+ // km to rawscore:
+ return 1.0 / (1.0 + d);
+ }
+ double calc_with_limit(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs,
+ double /*limit*/) const override
+ {
+ return calc(lhs, rhs);
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
new file mode 100644
index 00000000000..30ba979a6d8
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -0,0 +1,61 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "hamming_distance.h"
+
+using vespalib::typify_invoke;
+using vespalib::eval::TypifyCellType;
+
+namespace search::tensor {
+
+namespace {
+
+struct CalcHamming {
+ template <typename LCT, typename RCT>
+ static double invoke(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs)
+ {
+ auto lhs_vector = lhs.unsafe_typify<LCT>();
+ auto rhs_vector = rhs.unsafe_typify<RCT>();
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ size_t sum = 0;
+ for (size_t i = 0; i < sz; ++i) {
+ sum += (lhs_vector[i] == rhs_vector[i]) ? 0 : 1;
+ }
+ return (double)sum;
+ }
+};
+
+}
+
+double
+HammingDistance::calc(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs) const
+{
+ constexpr auto expected = vespalib::eval::CellType::INT8;
+ if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) {
+ const uint64_t *words_a = static_cast<const uint64_t *>(lhs.data);
+ const uint64_t *words_b = static_cast<const uint64_t *>(rhs.data);
+ size_t sum = 0;
+ size_t sz = lhs.size;
+ assert(sz == rhs.size);
+ size_t i = 0;
+ for (; i * 8 < sz; ++i) {
+ uint64_t xor_bits = words_a[i] ^ words_b[i];
+ sum += __builtin_popcountl(xor_bits);
+ }
+ if (__builtin_expect((i * 8 < sz), false)) {
+ const uint8_t *bytes_a = static_cast<const uint8_t *>(lhs.data);
+ const uint8_t *bytes_b = static_cast<const uint8_t *>(rhs.data);
+ for (i *= 8; i < sz; ++i) {
+ uint64_t xor_bits = bytes_a[i] ^ bytes_b[i];
+ sum += __builtin_popcountl(xor_bits);
+ }
+ }
+ return (double)sum;
+ } else {
+ return typify_invoke<2,TypifyCellType,CalcHamming>(lhs.type, rhs.type, lhs, rhs);
+ }
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
new file mode 100644
index 00000000000..c2cd7af3863
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -0,0 +1,38 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "distance_function.h"
+#include <vespa/eval/eval/typed_cells.h>
+#include <vespa/vespalib/util/typify.h>
+#include <cmath>
+
+namespace search::tensor {
+
+/**
+ * Calculates the Hamming distance defined as
+ * "number of cells where the values are different"
+ * or (for int8 cells, aka binary data only)
+ * "number of bits that are different"
+ */
+class HammingDistance : public DistanceFunction {
+public:
+ HammingDistance() {}
+ double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
+ double convert_threshold(double threshold) const override {
+ return threshold;
+ }
+ double to_rawscore(double distance) const override {
+ double score = 1.0 / (1.0 + distance);
+ return score;
+ }
+ double calc_with_limit(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs,
+ double) const override
+ {
+ // consider optimizing:
+ return calc(lhs, rhs);
+ }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp
new file mode 100644
index 00000000000..8a48c6c936a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp
@@ -0,0 +1,44 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "inner_product_distance.h"
+
+using vespalib::typify_invoke;
+using vespalib::eval::TypifyCellType;
+
+namespace search::tensor {
+
+namespace {
+
+struct CalcInnerProduct {
+ template <typename LCT, typename RCT>
+ static double invoke(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs)
+ {
+ auto lhs_vector = lhs.unsafe_typify<LCT>();
+ auto rhs_vector = rhs.unsafe_typify<RCT>();
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ double dot_product = 0.0;
+ for (size_t i = 0; i < sz; ++i) {
+ double a = lhs_vector[i];
+ double b = rhs_vector[i];
+ dot_product += a*b;
+ }
+ double score = 1.0 - dot_product; // in range [0,2]
+ return std::max(0.0, score);
+ }
+};
+
+}
+
+double
+InnerProductDistance::calc(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs) const
+{
+ return typify_invoke<2,TypifyCellType,CalcInnerProduct>(lhs.type, rhs.type, lhs, rhs);
+}
+
+template class InnerProductDistanceHW<float>;
+template class InnerProductDistanceHW<double>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h
new file mode 100644
index 00000000000..cb60d18c0f5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h
@@ -0,0 +1,64 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "distance_function.h"
+#include <vespa/eval/eval/typed_cells.h>
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include <cmath>
+
+namespace search::tensor {
+
+/**
+ * Calculates inner-product "distance" between vectors with assumed norm 1.
+ * Should give same ordering as Angular distance, but is less expensive.
+ */
+class InnerProductDistance : public DistanceFunction {
+public:
+ InnerProductDistance() {}
+ double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override;
+ double convert_threshold(double threshold) const override {
+ return threshold;
+ }
+ double to_rawscore(double distance) const override {
+ double score = 1.0 / (1.0 + distance);
+ return score;
+ }
+ double calc_with_limit(const vespalib::eval::TypedCells& lhs,
+ const vespalib::eval::TypedCells& rhs,
+ double /*limit*/) const override
+ {
+ return calc(lhs, rhs);
+ }
+};
+
+/**
+ * Calculates inner-product "distance" between vectors with assumed norm 1.
+ * Should give same ordering as Angular distance, but is less expensive.
+ * Will use instruction optimal for the cpu it is running on
+ * when both vectors have the expected cell type.
+ */
+template <typename FloatType>
+class InnerProductDistanceHW : public InnerProductDistance {
+public:
+ InnerProductDistanceHW()
+ : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator())
+ {}
+ double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override {
+ constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>();
+ if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) {
+ auto lhs_vector = lhs.unsafe_typify<FloatType>();
+ auto rhs_vector = rhs.unsafe_typify<FloatType>();
+ size_t sz = lhs_vector.size();
+ assert(sz == rhs_vector.size());
+ double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz);
+ return std::max(0.0, score);
+ } else {
+ return InnerProductDistance::calc(lhs, rhs);
+ }
+ }
+private:
+ const vespalib::hwaccelrated::IAccelrated & _computer;
+};
+
+}