diff options
22 files changed, 57 insertions, 594 deletions
diff --git a/container-core/src/main/java/com/yahoo/jdisc/http/server/jetty/HttpRequestDispatch.java b/container-core/src/main/java/com/yahoo/jdisc/http/server/jetty/HttpRequestDispatch.java index 4cdbab5f89b..f9a2e3bc1a6 100644 --- a/container-core/src/main/java/com/yahoo/jdisc/http/server/jetty/HttpRequestDispatch.java +++ b/container-core/src/main/java/com/yahoo/jdisc/http/server/jetty/HttpRequestDispatch.java @@ -17,14 +17,11 @@ import jakarta.servlet.AsyncEvent; import jakarta.servlet.AsyncListener; import jakarta.servlet.http.HttpServletRequest; import jakarta.servlet.http.HttpServletResponse; -import org.eclipse.jetty.http2.HTTP2Session; -import org.eclipse.jetty.http2.frames.GoAwayFrame; import org.eclipse.jetty.http2.server.HTTP2ServerConnection; import org.eclipse.jetty.io.Connection; import org.eclipse.jetty.io.EofException; import org.eclipse.jetty.server.HttpConnection; import org.eclipse.jetty.server.Request; -import org.eclipse.jetty.util.Callback; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -182,7 +179,7 @@ class HttpRequestDispatch { // Graceful shutdown implies a GOAWAY frame with 'Error Code' = 'NO_ERROR' and 'Last-Stream-ID' = 2^31-1. // In-flight requests will be allowed to complete before connection is terminated. // See https://datatracker.ietf.org/doc/html/rfc9113#name-goaway for details - ((HTTP2Session)http2.getSession()).goAway(GoAwayFrame.GRACEFUL, Callback.NOOP); + http2.getSession().shutdown(); } } diff --git a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp index fd07529795a..b9599a0c75d 100644 --- a/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp +++ b/searchlib/src/tests/queryeval/nearest_neighbor/nearest_neighbor_test.cpp @@ -29,7 +29,6 @@ using search::attribute::DistanceMetric; using search::feature_t; using search::tensor::DenseTensorAttribute; using search::tensor::DistanceCalculator; -using search::tensor::DistanceFunction; using search::tensor::SerializedFastValueAttribute; using search::tensor::TensorAttribute; using vespalib::eval::CellType; @@ -49,9 +48,6 @@ vespalib::string denseSpecDouble("tensor(x[2])"); vespalib::string denseSpecFloat("tensor<float>(x[2])"); vespalib::string mixed_spec("tensor(m{},x[2])"); -DistanceFunction::UP euclid_d = search::tensor::make_distance_function(DistanceMetric::Euclidean, CellType::DOUBLE); -DistanceFunction::UP euclid_f = search::tensor::make_distance_function(DistanceMetric::Euclidean, CellType::FLOAT); - std::unique_ptr<Value> createTensor(const TensorSpec &spec) { return SimpleValue::from_spec(spec); } @@ -119,14 +115,6 @@ struct Fixture { auto t = createTensor(_typeSpec, v1, v2); setTensor(docId, *t); } - - const DistanceFunction &dist_fun() const { - if (_cfg.tensorType().cell_type() == CellType::FLOAT) { - return *euclid_f; - } else { - return *euclid_d; - } - } }; template <bool strict> @@ -136,9 +124,11 @@ SimpleResult find_matches(Fixture &env, const Value &qtv, double threshold = std auto &attr = *(env._attr); auto dff = search::tensor::make_distance_function_factory(DistanceMetric::Euclidean, qtv.cells().type); - DistanceCalculator dist_calc(attr, dff->for_query_vector(qtv.cells())); + auto df = dff->for_query_vector(qtv.cells()); + threshold = df->convert_threshold(threshold); + DistanceCalculator dist_calc(attr, std::move(df)); NearestNeighborDistanceHeap dh(2); - dh.set_distance_threshold(env.dist_fun().convert_threshold(threshold)); + dh.set_distance_threshold(threshold); const GlobalFilter &filter = *env._global_filter; auto search = NearestNeighborIterator::create(strict, tfmd, dist_calc, dh, filter); if (strict) { diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp index 9b8ad0d26ce..a1b29c90986 100644 --- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp @@ -76,10 +76,6 @@ namespace { const double sq_root_half = std::sqrt(0.5); } TEST(DistanceFunctionsTest, euclidean_gives_expected_score) { - auto ct = vespalib::eval::CellType::DOUBLE; - - auto euclid = make_distance_function(DistanceMetric::Euclidean, ct); - std::vector<double> p0{0.0, 0.0, 0.0}; std::vector<double> p1{1.0, 0.0, 0.0}; std::vector<double> p2{0.0, 1.0, 0.0}; @@ -92,6 +88,9 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score) EXPECT_FLOAT_EQ(n4, 1.0); double d12 = computeEuclideanChecked(t(p1), t(p2)); EXPECT_EQ(d12, 2.0); + + EuclideanDistanceFunctionFactory<double> dff; + auto euclid = dff.for_query_vector(t(p0)); EXPECT_DOUBLE_EQ(euclid->to_rawscore(d12), 1.0/(1.0 + sqrt(2.0))); double threshold = euclid->convert_threshold(8.0); EXPECT_EQ(threshold, 64.0); @@ -142,10 +141,6 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score) TEST(DistanceFunctionsTest, euclidean_int8_smoketest) { - auto ct = vespalib::eval::CellType::INT8; - - auto euclid = make_distance_function(DistanceMetric::Euclidean, ct); - std::vector<Int8Float> p0{0.0, 0.0, 0.0}; std::vector<Int8Float> p1{1.0, 0.0, 0.0}; std::vector<Int8Float> p5{0.0,-1.0, 0.0}; @@ -345,60 +340,10 @@ TEST(DistanceFunctionsTest, prenormalized_angular_gives_expected_score) EXPECT_DOUBLE_EQ(threshold, 1.0); } -TEST(DistanceFunctionsTest, innerproduct_gives_expected_score) -{ - auto ct = vespalib::eval::CellType::DOUBLE; - - auto innerproduct = make_distance_function(DistanceMetric::InnerProduct, ct); - - std::vector<double> p0{0.0, 0.0, 0.0}; - std::vector<double> p1{1.0, 0.0, 0.0}; - std::vector<double> p2{0.0, 1.0, 0.0}; - std::vector<double> p3{0.0, 0.0, 1.0}; - std::vector<double> p4{0.5, 0.5, sq_root_half}; - std::vector<double> p5{0.0,-1.0, 0.0}; - std::vector<double> p6{1.0, 2.0, 2.0}; - - double i12 = innerproduct->calc(t(p1), t(p2)); - double i13 = innerproduct->calc(t(p1), t(p3)); - double i23 = innerproduct->calc(t(p2), t(p3)); - EXPECT_DOUBLE_EQ(i12, 1.0); - EXPECT_DOUBLE_EQ(i13, 1.0); - EXPECT_DOUBLE_EQ(i23, 1.0); - - double i14 = innerproduct->calc(t(p1), t(p4)); - double i24 = innerproduct->calc(t(p2), t(p4)); - EXPECT_DOUBLE_EQ(i14, 0.5); - EXPECT_DOUBLE_EQ(i24, 0.5); - double i34 = innerproduct->calc(t(p3), t(p4)); - EXPECT_FLOAT_EQ(i34, 1.0 - sq_root_half); - - double i25 = innerproduct->calc(t(p2), t(p5)); - EXPECT_DOUBLE_EQ(i25, 2.0); - - double i44 = innerproduct->calc(t(p4), t(p4)); - EXPECT_GE(i44, 0.0); - EXPECT_LT(i44, 0.000001); - - double i66 = innerproduct->calc(t(p6), t(p6)); - EXPECT_GE(i66, 0.0); - EXPECT_LT(i66, 0.000001); - - double threshold = innerproduct->convert_threshold(0.25); - EXPECT_DOUBLE_EQ(threshold, 0.25); - threshold = innerproduct->convert_threshold(0.5); - EXPECT_DOUBLE_EQ(threshold, 0.5); - threshold = innerproduct->convert_threshold(1.0); - EXPECT_DOUBLE_EQ(threshold, 1.0); -} TEST(DistanceFunctionsTest, hamming_gives_expected_score) { - static HammingDistanceFunctionFactory<Int8Float> dff; - auto ct = vespalib::eval::CellType::DOUBLE; - - auto hamming = make_distance_function(DistanceMetric::Hamming, ct); - + static HammingDistanceFunctionFactory<double> dff; std::vector<std::vector<double>> points{{0.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, @@ -407,31 +352,30 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score) {0.5, 0.5, 0.5}, {0.0,-1.0, 1.0}, {1.0, 1.0, 1.0}}; + auto hamming = dff.for_query_vector(t(points[0])); for (const auto & p : points) { - double h0 = hamming->calc(t(p), t(p)); - EXPECT_EQ(h0, 0.0); - EXPECT_EQ(hamming->to_rawscore(h0), 1.0); auto dist_fun = dff.for_query_vector(t(p)); - EXPECT_EQ(dist_fun->calc(t(p)), 0.0); + double h0 = dist_fun->calc(t(p)); + EXPECT_EQ(h0, 0.0); EXPECT_EQ(dist_fun->to_rawscore(h0), 1.0); } - double d12 = hamming->calc(t(points[1]), t(points[2])); + double d12 = dff.for_query_vector(t(points[1]))->calc(t(points[2])); EXPECT_EQ(d12, 3.0); EXPECT_DOUBLE_EQ(hamming->to_rawscore(d12), 1.0/(1.0 + 3.0)); - double d16 = hamming->calc(t(points[1]), t(points[6])); + double d16 = dff.for_query_vector(t(points[1]))->calc(t(points[6])); EXPECT_EQ(d16, 2.0); EXPECT_DOUBLE_EQ(hamming->to_rawscore(d16), 1.0/(1.0 + 2.0)); - double d23 = hamming->calc(t(points[2]), t(points[3])); + double d23 = dff.for_query_vector(t(points[2]))->calc(t(points[3])); EXPECT_EQ(d23, 3.0); EXPECT_DOUBLE_EQ(hamming->to_rawscore(d23), 1.0/(1.0 + 3.0)); - double d24 = hamming->calc(t(points[2]), t(points[4])); + double d24 = dff.for_query_vector(t(points[2]))->calc(t(points[4])); EXPECT_EQ(d24, 3.0); EXPECT_DOUBLE_EQ(hamming->to_rawscore(d24), 1.0/(1.0 + 3.0)); - double d25 = hamming->calc(t(points[2]), t(points[5])); + double d25 = dff.for_query_vector(t(points[2]))->calc(t(points[5])); EXPECT_EQ(d25, 1.0); EXPECT_DOUBLE_EQ(hamming->to_rawscore(d25), 1.0/(1.0 + 1.0)); @@ -445,8 +389,8 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score) std::vector<Int8Float> bytes_a = { 0, 1, 2, 4, 8, 16, 32, 64, -128, 0, 1, 2, 4, 8, 16, 32, 64, -128, 0, 1, 2 }; std::vector<Int8Float> bytes_b = { 1, 2, 2, 4, 8, 16, 32, 65, -128, 0, 1, 0, 4, 8, 16, 32, 64, -128, 0, 1, -1 }; // expect diff: 1 2 1 1 7 - EXPECT_EQ(hamming->calc(TypedCells(bytes_a), TypedCells(bytes_b)), 12.0); - auto dist_fun = dff.for_query_vector(TypedCells(bytes_a)); + HammingDistanceFunctionFactory<Int8Float> factory_int8; + auto dist_fun = factory_int8.for_query_vector(TypedCells(bytes_a)); EXPECT_EQ(dist_fun->calc(TypedCells(bytes_b)), 12.0); } diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index 0dcd77ec392..b2e5f8863c3 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -133,8 +133,6 @@ public: } }; -using FloatSqEuclideanDistance = SquaredEuclideanDistanceHW<float>; - template <typename IndexType> class Stressor : public ::testing::Test { private: diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt index 2e874ffa4ae..362bceaf0da 100644 --- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt @@ -25,7 +25,6 @@ vespa_add_library(searchlib_tensor OBJECT hnsw_single_best_neighbors.cpp imported_tensor_attribute_vector.cpp imported_tensor_attribute_vector_read_guard.cpp - inner_product_distance.cpp inv_log_level_generator.cpp large_subspaces_buffer_type.cpp nearest_neighbor_index.cpp diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp index a7ae02bb9f4..ab8f2b30df9 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp @@ -41,17 +41,6 @@ struct CalcAngular { } -double -AngularDistance::calc(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) const -{ - return typify_invoke<2,TypifyCellType,CalcAngular>(lhs.type, rhs.type, lhs, rhs); -} - -template class AngularDistanceHW<float>; -template class AngularDistanceHW<double>; - - template<typename FloatType> class BoundAngularDistance : public BoundDistanceFunction { private: diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h index caed5d8c216..50d3e617cf4 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h @@ -13,69 +13,10 @@ namespace search::tensor { /** * Calculates angular distance between vectors - */ -class AngularDistance : public DistanceFunction { -public: - AngularDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} - double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; - double convert_threshold(double threshold) const override { - double cosine_similarity = cos(threshold); - return 1.0 - cosine_similarity; - } - double to_rawscore(double distance) const override { - double cosine_similarity = 1.0 - distance; - // should be in the range [-1,1] but roundoff may cause problems: - cosine_similarity = std::min(1.0, cosine_similarity); - cosine_similarity = std::max(-1.0, cosine_similarity); - double angle_distance = acos(cosine_similarity); // in range [0,pi] - double score = 1.0 / (1.0 + angle_distance); - return score; - } - double calc_with_limit(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs, - double /*limit*/) const override - { - return calc(lhs, rhs); - } -}; - -/** - * Calculates angular distance between vectors * Will use instruction optimal for the cpu it is running on - * when both vectors have the expected cell type. + * after converting both vectors to an optimal cell type. */ template <typename FloatType> -class AngularDistanceHW : public AngularDistance { -public: - AngularDistanceHW() - : AngularDistance(vespalib::eval::get_cell_type<FloatType>()), - _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) - { - assert(expected_cell_type() == vespalib::eval::get_cell_type<FloatType>()); - } - double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override { - constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>(); - assert(lhs.type == expected && rhs.type == expected); - auto lhs_vector = lhs.typify<FloatType>(); - auto rhs_vector = rhs.typify<FloatType>(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - auto a = lhs_vector.data(); - auto b = rhs_vector.data(); - double a_norm_sq = _computer.dotProduct(a, a, sz); - double b_norm_sq = _computer.dotProduct(b, b, sz); - double squared_norms = a_norm_sq * b_norm_sq; - double dot_product = _computer.dotProduct(a, b, sz); - double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0; - double cosine_similarity = dot_product / div; - double distance = 1.0 - cosine_similarity; // in range [0,2] - return distance; - } -private: - const vespalib::hwaccelrated::IAccelrated & _computer; -}; - -template <typename FloatType> class AngularDistanceFunctionFactory : public DistanceFunctionFactory { public: AngularDistanceFunctionFactory() = default; diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h index 443191a272c..31a837d17f4 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h @@ -20,34 +20,4 @@ public: virtual double to_rawscore(double distance) const = 0; }; -/** - * Interface used to calculate the distance between two n-dimensional vectors. - * - * The vectors must be of same size and same cell type (float or double). - * The actual implementation must know which type the vectors are. - */ -class DistanceFunction : public DistanceConverter { -private: - vespalib::eval::CellType _expect_cell_type; -public: - using UP = std::unique_ptr<DistanceFunction>; - - DistanceFunction(vespalib::eval::CellType expected) : _expect_cell_type(expected) {} - - virtual ~DistanceFunction() = default; - - // input (query) vectors must be converted to this cell type: - vespalib::eval::CellType expected_cell_type() const { - return _expect_cell_type; - } - - // calculate internal distance (comparable) - virtual double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const = 0; - - // calculate internal distance, early return allowed if > limit - virtual double calc_with_limit(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs, - double limit) const = 0; -}; - } diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp index 3a4bd94de23..bec0f430a51 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp @@ -15,79 +15,6 @@ using vespalib::eval::ValueType; namespace search::tensor { -DistanceFunction::UP -make_distance_function(DistanceMetric variant, CellType cell_type) -{ - switch (variant) { - case DistanceMetric::Euclidean: - switch (cell_type) { - case CellType::FLOAT: return std::make_unique<SquaredEuclideanDistanceHW<float>>(); - case CellType::DOUBLE: return std::make_unique<SquaredEuclideanDistanceHW<double>>(); - case CellType::INT8: return std::make_unique<SquaredEuclideanDistanceHW<vespalib::eval::Int8Float>>(); - default: return std::make_unique<SquaredEuclideanDistance>(CellType::FLOAT); - } - case DistanceMetric::Angular: - switch (cell_type) { - case CellType::FLOAT: return std::make_unique<AngularDistanceHW<float>>(); - case CellType::DOUBLE: return std::make_unique<AngularDistanceHW<double>>(); - default: return std::make_unique<AngularDistance>(CellType::FLOAT); - } - case DistanceMetric::GeoDegrees: - return std::make_unique<GeoDegreesDistance>(CellType::DOUBLE); - case DistanceMetric::PrenormalizedAngular: - case DistanceMetric::InnerProduct: - switch (cell_type) { - case CellType::FLOAT: return std::make_unique<InnerProductDistanceHW<float>>(); - case CellType::DOUBLE: return std::make_unique<InnerProductDistanceHW<double>>(); - default: return std::make_unique<InnerProductDistance>(CellType::FLOAT); - } - case DistanceMetric::Hamming: - return std::make_unique<HammingDistance>(cell_type); - } - // not reached: - return DistanceFunction::UP(); -} - - -class SimpleBoundDistanceFunction : public BoundDistanceFunction { - const vespalib::eval::TypedCells _lhs; - const DistanceFunction &_df; -public: - SimpleBoundDistanceFunction(const vespalib::eval::TypedCells& lhs, - const DistanceFunction &df) - : _lhs(lhs), - _df(df) - {} - - double calc(const vespalib::eval::TypedCells& rhs) const override { - return _df.calc(_lhs, rhs); - } - double convert_threshold(double threshold) const override { - return _df.convert_threshold(threshold); - } - double to_rawscore(double distance) const override { - return _df.to_rawscore(distance); - } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double limit) const override { - return _df.calc_with_limit(_lhs, rhs, limit); - } -}; - -class SimpleDistanceFunctionFactory : public DistanceFunctionFactory { - DistanceFunction::UP _df; -public: - SimpleDistanceFunctionFactory(DistanceFunction::UP df) - : _df(std::move(df)) - {} - - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override { - return std::make_unique<SimpleBoundDistanceFunction>(lhs, *_df); - } - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override { - return std::make_unique<SimpleBoundDistanceFunction>(lhs, *_df); - } -}; - std::unique_ptr<DistanceFunctionFactory> make_distance_function_factory(search::attribute::DistanceMetric variant, vespalib::eval::CellType cell_type) diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h index 433c8e1962a..a69c7cff739 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h @@ -23,13 +23,9 @@ struct DistanceFunctionFactory { }; /** - * Create a distance function object customized for the given metric + * Create a distance function factory customized for the given metric * variant and (attribute) cell type. **/ -DistanceFunction::UP -make_distance_function(search::attribute::DistanceMetric variant, - vespalib::eval::CellType cell_type); - DistanceFunctionFactory::UP make_distance_function_factory(search::attribute::DistanceMetric variant, vespalib::eval::CellType cell_type); diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h index 2300dba2db1..ec401b9313d 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h @@ -7,5 +7,4 @@ #include "euclidean_distance.h" #include "geo_degrees_distance.h" #include "hamming_distance.h" -#include "inner_product_distance.h" #include "prenormalized_angular_distance.h" diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp index a98b37cb6cc..1b8924ab351 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp @@ -30,25 +30,6 @@ struct CalcEuclidean { } -double -SquaredEuclideanDistance::calc(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) const -{ - return typify_invoke<2,TypifyCellType,CalcEuclidean>(lhs.type, rhs.type, lhs, rhs); -} - -double -SquaredEuclideanDistance::calc_with_limit(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs, - double) const -{ - // maybe optimize this: - return typify_invoke<2,TypifyCellType,CalcEuclidean>(lhs.type, rhs.type, lhs, rhs); -} - -template class SquaredEuclideanDistanceHW<float>; -template class SquaredEuclideanDistanceHW<double>; - using vespalib::eval::Int8Float; using vespalib::BFloat16; diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h index 06295caee9e..fc663c50ccd 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h @@ -12,75 +12,10 @@ namespace search::tensor { /** * Calculates the square of the standard Euclidean distance. - */ -class SquaredEuclideanDistance : public DistanceFunction { -public: - SquaredEuclideanDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} - double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; - double calc_with_limit(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs, - double limit) const override; - double convert_threshold(double threshold) const override { - return threshold*threshold; - } - double to_rawscore(double distance) const override { - double d = sqrt(distance); - double score = 1.0 / (1.0 + d); - return score; - } -}; - -/** - * Calculates the square of the standard Euclidean distance. * Will use instruction optimal for the cpu it is running on - * when both vectors have the expected cell type. + * after converting both vectors to an optimal cell type. */ template <typename FloatType> -class SquaredEuclideanDistanceHW : public SquaredEuclideanDistance { -public: - SquaredEuclideanDistanceHW() - : SquaredEuclideanDistance(vespalib::eval::get_cell_type<FloatType>()), - _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) - { - assert(expected_cell_type() == vespalib::eval::get_cell_type<FloatType>()); - } - - static const double *cast(const double * p) { return p; } - static const float *cast(const float * p) { return p; } - static const int8_t *cast(const vespalib::eval::Int8Float * p) { return reinterpret_cast<const int8_t *>(p); } - double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override { - constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>(); - assert(lhs.type == expected && rhs.type == expected); - auto lhs_vector = lhs.typify<FloatType>(); - auto rhs_vector = rhs.typify<FloatType>(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - return _computer.squaredEuclideanDistance(cast(&lhs_vector[0]), cast(&rhs_vector[0]), sz); - } - - double calc_with_limit(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs, - double limit) const override - { - constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>(); - assert(lhs.type == expected && rhs.type == expected); - auto lhs_vector = lhs.typify<FloatType>(); - auto rhs_vector = rhs.typify<FloatType>(); - double sum = 0.0; - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - for (size_t i = 0; i < sz && sum <= limit; ++i) { - double diff = lhs_vector[i] - rhs_vector[i]; - sum += diff*diff; - } - return sum; - } -private: - const vespalib::hwaccelrated::IAccelrated & _computer; -}; - - -template <typename FloatType> class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory { public: EuclideanDistanceFunctionFactory() = default; diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp index 38ba8205c90..0212830efb6 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp @@ -8,54 +8,28 @@ using vespalib::eval::TypifyCellType; namespace search::tensor { -namespace { - -struct CalcGeoDegrees { - template <typename LCT, typename RCT> - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) - { - auto lhs_vector = lhs.unsafe_typify<LCT>(); - auto rhs_vector = rhs.unsafe_typify<RCT>(); - - assert(2 == lhs_vector.size()); - assert(2 == rhs_vector.size()); - // convert to radians: - double lat_A = lhs_vector[0] * GeoDegreesDistance::degrees_to_radians; - double lat_B = rhs_vector[0] * GeoDegreesDistance::degrees_to_radians; - double lon_A = lhs_vector[1] * GeoDegreesDistance::degrees_to_radians; - double lon_B = rhs_vector[1] * GeoDegreesDistance::degrees_to_radians; - - double lat_diff = lat_A - lat_B; - double lon_diff = lon_A - lon_B; - - // haversines of differences: - double hav_lat = GeoDegreesDistance::hav(lat_diff); - double hav_lon = GeoDegreesDistance::hav(lon_diff); - - // haversine of central angle between the two points: - double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon; - return hav_central_angle; - } -}; - -} - -double -GeoDegreesDistance::calc(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) const -{ - return typify_invoke<2,TypifyCellType,CalcGeoDegrees>(lhs.type, rhs.type, lhs, rhs); -} - -using vespalib::eval::TypedCells; - +/** + * Calculates great-circle distance between Latitude/Longitude pairs, + * measured in degrees. Output distance is measured in kilometers. + * Uses the haversine formula directly from: + * https://en.wikipedia.org/wiki/Haversine_formula + **/ class BoundGeoDistance : public BoundDistanceFunction { private: mutable TemporaryVectorStore<double> _tmpSpace; const vespalib::ConstArrayRef<double> _lh_vector; - static GeoDegreesDistance _g_d_helper; public: + // in km, as defined by IUGG, see: + // https://en.wikipedia.org/wiki/Earth_radius#Mean_radius + static constexpr double earth_mean_radius = 6371.0088; + static constexpr double degrees_to_radians = M_PI / 180.0; + + // haversine function: + static double haversine(double angle) { + double s = sin(0.5*angle); + return s*s; + } + BoundGeoDistance(const vespalib::eval::TypedCells& lhs) : _tmpSpace(lhs.size), _lh_vector(_tmpSpace.storeLhs(lhs)) @@ -65,27 +39,33 @@ public: assert(2 == _lh_vector.size()); assert(2 == rhs_vector.size()); // convert to radians: - double lat_A = _lh_vector[0] * GeoDegreesDistance::degrees_to_radians; - double lat_B = rhs_vector[0] * GeoDegreesDistance::degrees_to_radians; - double lon_A = _lh_vector[1] * GeoDegreesDistance::degrees_to_radians; - double lon_B = rhs_vector[1] * GeoDegreesDistance::degrees_to_radians; + double lat_A = _lh_vector[0] * degrees_to_radians; + double lat_B = rhs_vector[0] * degrees_to_radians; + double lon_A = _lh_vector[1] * degrees_to_radians; + double lon_B = rhs_vector[1] * degrees_to_radians; double lat_diff = lat_A - lat_B; double lon_diff = lon_A - lon_B; // haversines of differences: - double hav_lat = GeoDegreesDistance::hav(lat_diff); - double hav_lon = GeoDegreesDistance::hav(lon_diff); + double hav_lat = haversine(lat_diff); + double hav_lon = haversine(lon_diff); // haversine of central angle between the two points: double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon; return hav_central_angle; } double convert_threshold(double threshold) const override { - return _g_d_helper.convert_threshold(threshold); + double half_angle = threshold / (2 * earth_mean_radius); + double rt_hav = sin(half_angle); + return rt_hav * rt_hav; } double to_rawscore(double distance) const override { - return _g_d_helper.to_rawscore(distance); + double hav_diff = sqrt(distance); + // distance in kilometers: + double d = 2 * asin(hav_diff) * earth_mean_radius; + // km to rawscore: + return 1.0 / (1.0 + d); } double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { return calc(rhs); diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h index 56f75f6daed..801ee56aacf 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h @@ -13,44 +13,9 @@ namespace search::tensor { /** * Calculates great-circle distance between Latitude/Longitude pairs, - * measured in degrees. Output distance is measured in meters. - * Uses the haversine formula directly from: - * https://en.wikipedia.org/wiki/Haversine_formula + * where input is given as degrees. + * Output distance is measured in kilometers. **/ -class GeoDegreesDistance : public DistanceFunction { -public: - // in km, as defined by IUGG, see: - // https://en.wikipedia.org/wiki/Earth_radius#Mean_radius - static constexpr double earth_mean_radius = 6371.0088; - static constexpr double degrees_to_radians = M_PI / 180.0; - - GeoDegreesDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} - // haversine function: - static double hav(double angle) { - double s = sin(0.5*angle); - return s*s; - } - double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; - double convert_threshold(double threshold) const override { - double half_angle = threshold / (2 * earth_mean_radius); - double rt_hav = sin(half_angle); - return rt_hav * rt_hav; - } - double to_rawscore(double distance) const override { - double hav_diff = sqrt(distance); - // distance in kilometers: - double d = 2 * asin(hav_diff) * earth_mean_radius; - // km to rawscore: - return 1.0 / (1.0 + d); - } - double calc_with_limit(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs, - double /*limit*/) const override - { - return calc(lhs, rhs); - } -}; - class GeoDistanceFunctionFactory : public DistanceFunctionFactory { public: GeoDistanceFunctionFactory() = default; diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp index f4f6842715f..5b6528ef403 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp @@ -30,29 +30,6 @@ struct CalcHamming { } -double -HammingDistance::calc(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) const -{ - constexpr auto expected = vespalib::eval::CellType::INT8; - if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) { - size_t sz = lhs.size; - assert(sz == rhs.size); - return (double) vespalib::binary_hamming_distance(lhs.data, rhs.data, sz); - } else { - return typify_invoke<2,TypifyCellType,CalcHamming>(lhs.type, rhs.type, lhs, rhs); - } -} - -double -HammingDistance::calc_with_limit(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs, - double) const -{ - // consider optimizing: - return calc(lhs, rhs); -} - using vespalib::eval::Int8Float; template<typename FloatType> diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h index 3022e0da503..47b022afc13 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h @@ -16,20 +16,6 @@ namespace search::tensor { * or (for int8 cells, aka binary data only) * "number of bits that are different" */ -class HammingDistance final : public DistanceFunction { -public: - HammingDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} - double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; - double convert_threshold(double threshold) const override { - return threshold; - } - double to_rawscore(double distance) const override { - double score = 1.0 / (1.0 + distance); - return score; - } - double calc_with_limit(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs, double) const override; -}; - template <typename FloatType> class HammingDistanceFunctionFactory : public DistanceFunctionFactory { public: diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp deleted file mode 100644 index 2e8c970ba91..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "inner_product_distance.h" - -using vespalib::typify_invoke; -using vespalib::eval::TypifyCellType; - -namespace search::tensor { - -namespace { - -struct CalcInnerProduct { - template <typename LCT, typename RCT> - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) - { - auto lhs_vector = lhs.unsafe_typify<LCT>(); - auto rhs_vector = rhs.unsafe_typify<RCT>(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - double dot_product = 0.0; - for (size_t i = 0; i < sz; ++i) { - double a = lhs_vector[i]; - double b = rhs_vector[i]; - dot_product += a*b; - } - double score = 1.0 - dot_product; // in range [0,2] - return std::max(0.0, score); - } -}; - -} - -double -InnerProductDistance::calc(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) const -{ - return typify_invoke<2,TypifyCellType,CalcInnerProduct>(lhs.type, rhs.type, lhs, rhs); -} - -template class InnerProductDistanceHW<float>; -template class InnerProductDistanceHW<double>; - -} diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h deleted file mode 100644 index 135bb186fd4..00000000000 --- a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "distance_function.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> -#include <cmath> - -namespace search::tensor { - -/** - * Calculates inner-product "distance" between vectors with assumed norm 1. - * Should give same ordering as Angular distance, but is less expensive. - */ -class InnerProductDistance : public DistanceFunction { -public: - InnerProductDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} - double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; - double convert_threshold(double threshold) const override { - return threshold; - } - double to_rawscore(double distance) const override { - double score = 1.0 / (1.0 + distance); - return score; - } - double calc_with_limit(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs, - double /*limit*/) const override - { - return calc(lhs, rhs); - } -}; - -/** - * Calculates inner-product "distance" between vectors with assumed norm 1. - * Should give same ordering as Angular distance, but is less expensive. - * Will use instruction optimal for the cpu it is running on - * when both vectors have the expected cell type. - */ -template <typename FloatType> -class InnerProductDistanceHW : public InnerProductDistance { -public: - InnerProductDistanceHW() - : InnerProductDistance(vespalib::eval::get_cell_type<FloatType>()), - _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) - { - assert(expected_cell_type() == vespalib::eval::get_cell_type<FloatType>()); - } - double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override { - constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>(); - assert(lhs.type == expected && rhs.type == expected); - auto lhs_vector = lhs.typify<FloatType>(); - auto rhs_vector = rhs.typify<FloatType>(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - double score = 1.0 - _computer.dotProduct(lhs_vector.data(), rhs_vector.data(), sz); - return std::max(0.0, score); - } -private: - const vespalib::hwaccelrated::IAccelrated & _computer; -}; - -} diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h index be1699c36ad..b91efbddb3e 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h @@ -11,7 +11,7 @@ namespace search::tensor { /** - * Calculates inner-product "distance" between vectors with assumed norm 1. + * Calculates inner-product "distance" between vectors assuming a common norm. * Should give same ordering as Angular distance, but is less expensive. */ template <typename FloatType> diff --git a/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp b/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp index b64d477fd4c..2cd9017653f 100644 --- a/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp +++ b/streamingvisitors/src/tests/nearest_neighbor_field_searcher/nearest_neighbor_field_searcher_test.cpp @@ -57,7 +57,6 @@ public: vsm::test::MockFieldSearcherEnv env; ValueType tensor_type; TensorDataType data_type; - SquaredEuclideanDistance dist_func; vsm::FieldIdT field_id; NearestNeighborFieldSearcher searcher; MockQuery query; @@ -66,7 +65,6 @@ public: : env(), tensor_type(ValueType::from_spec("tensor(x[2])")), data_type(tensor_type), - dist_func(CellType::DOUBLE), field_id(2), searcher(field_id, DistanceMetric::Euclidean), query() @@ -97,7 +95,7 @@ public: expect_match(exp_square_distance, node); } void expect_match(double exp_square_distance, const NearestNeighborQueryNode& node) { - double exp_raw_score = dist_func.to_rawscore(exp_square_distance); + double exp_raw_score = 1.0 / (1.0 + std::sqrt(exp_square_distance)); EXPECT_TRUE(node.evaluate()); EXPECT_DOUBLE_EQ(exp_square_distance, node.get_distance().value()); EXPECT_DOUBLE_EQ(exp_raw_score, node.get_raw_score().value()); diff --git a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp index db4ee12438e..d4aee8ad652 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/nearest_neighbor_field_searcher.cpp @@ -21,7 +21,6 @@ using search::attribute::Config; using search::fef::QueryValue; using search::tensor::DistanceCalculator; using search::tensor::TensorExtAttribute; -using search::tensor::make_distance_function; using vespalib::eval::ValueType; namespace { |