From 581b6400479b5353bd78255804b486d91568c476 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Wed, 17 Apr 2024 06:13:03 +0000 Subject: Transfer TypedCells as value param as that is more efficient than reference for 16 byte structs. --- .../src/vespa/searchlib/tensor/angular_distance.cpp | 19 ++++++++++--------- .../src/vespa/searchlib/tensor/angular_distance.h | 9 ++------- .../vespa/searchlib/tensor/bound_distance_function.h | 14 +++++--------- .../src/vespa/searchlib/tensor/distance_calculator.h | 11 ++++++----- .../src/vespa/searchlib/tensor/distance_function.h | 13 ++++--------- .../searchlib/tensor/distance_function_factory.cpp | 10 +--------- .../searchlib/tensor/distance_function_factory.h | 10 +++++----- .../vespa/searchlib/tensor/euclidean_distance.cpp | 20 +++++++++++--------- .../src/vespa/searchlib/tensor/euclidean_distance.h | 10 +++------- .../vespa/searchlib/tensor/geo_degrees_distance.cpp | 15 ++++++++------- .../vespa/searchlib/tensor/geo_degrees_distance.h | 9 ++------- .../src/vespa/searchlib/tensor/hamming_distance.cpp | 18 +++++++++--------- .../src/vespa/searchlib/tensor/hamming_distance.h | 8 ++------ .../searchlib/tensor/mips_distance_transform.cpp | 19 +++++++++---------- .../vespa/searchlib/tensor/mips_distance_transform.h | 11 +++++------ .../tensor/prenormalized_angular_distance.cpp | 15 ++++++++------- .../tensor/prenormalized_angular_distance.h | 8 ++------ 17 files changed, 92 insertions(+), 127 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp index 14953011e22..9d08bedf078 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp @@ -2,7 +2,9 @@ #include "angular_distance.h" #include "temporary_vector_store.h" +#include #include +#include using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; @@ -14,8 +16,7 @@ namespace { struct CalcAngular { template - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) + static double invoke(TypedCells lhs, TypedCells rhs) { auto lhs_vector = lhs.unsafe_typify(); auto rhs_vector = rhs.unsafe_typify(); @@ -50,7 +51,7 @@ private: const vespalib::ConstArrayRef _lhs; double _lhs_norm_sq; public: - BoundAngularDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundAngularDistance(TypedCells lhs) : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()), _tmpSpace(lhs.size), _lhs(_tmpSpace.storeLhs(lhs)) @@ -58,7 +59,7 @@ public: auto a = _lhs.data(); _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size); } - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs.size(); vespalib::ConstArrayRef rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -72,7 +73,7 @@ public: double distance = 1.0 - cosine_similarity; // in range [0,2] return distance; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { if (threshold < 0.0) { return 0.0; } @@ -82,7 +83,7 @@ public: double cosine_similarity = cos(threshold); return 1.0 - cosine_similarity; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double cosine_similarity = 1.0 - distance; // should be in the range [-1,1] but roundoff may cause problems: cosine_similarity = std::min(1.0, cosine_similarity); @@ -91,7 +92,7 @@ public: double score = 1.0 / (1.0 + angle_distance); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; @@ -101,14 +102,14 @@ template class BoundAngularDistance; template BoundDistanceFunction::UP -AngularDistanceFunctionFactory::for_query_vector(const vespalib::eval::TypedCells& lhs) { +AngularDistanceFunctionFactory::for_query_vector(TypedCells lhs) { using DFT = BoundAngularDistance; return std::make_unique(lhs); } template BoundDistanceFunction::UP -AngularDistanceFunctionFactory::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +AngularDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) { using DFT = BoundAngularDistance; return std::make_unique(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h index f5e8589fe6a..5e0a060e060 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h @@ -2,12 +2,7 @@ #pragma once -#include "distance_function.h" -#include "bound_distance_function.h" #include "distance_function_factory.h" -#include -#include -#include namespace search::tensor { @@ -20,8 +15,8 @@ template class AngularDistanceFunctionFactory : public DistanceFunctionFactory { public: AngularDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h index 2865f5f55e6..a9d0c880625 100644 --- a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h @@ -2,13 +2,8 @@ #pragma once -#include -#include -#include -#include #include "distance_function.h" - -namespace vespalib::eval { struct TypedCells; } +#include namespace search::tensor { @@ -22,16 +17,17 @@ namespace search::tensor { class BoundDistanceFunction : public DistanceConverter { public: using UP = std::unique_ptr; + using TypedCells = vespalib::eval::TypedCells; - BoundDistanceFunction() = default; + BoundDistanceFunction() noexcept = default; ~BoundDistanceFunction() override = default; // calculate internal distance (comparable) - virtual double calc(const vespalib::eval::TypedCells& rhs) const = 0; + virtual double calc(TypedCells rhs) const noexcept = 0; // calculate internal distance, early return allowed if > limit - virtual double calc_with_limit(const vespalib::eval::TypedCells& rhs, double limit) const = 0; + virtual double calc_with_limit(TypedCells rhs, double limit) const noexcept = 0; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h index 836d9e9d287..9dbd12650cb 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h @@ -5,6 +5,7 @@ #include "distance_function_factory.h" #include "i_tensor_attribute.h" #include "vector_bundle.h" +#include #include namespace vespalib::eval { struct Value; } @@ -32,7 +33,7 @@ public: ~DistanceCalculator(); const tensor::ITensorAttribute& attribute_tensor() const { return _attr_tensor; } - const vespalib::eval::Value& query_tensor() const { + const vespalib::eval::Value& query_tensor() const noexcept{ assert(_query_tensor != nullptr); return *_query_tensor; } @@ -40,7 +41,7 @@ public: bool has_single_subspace() const noexcept { return _attr_tensor.getTensorType().is_dense(); } template - double calc_raw_score(uint32_t docid) const { + double calc_raw_score(uint32_t docid) const noexcept { if (has_single_subspace) { auto cells = _attr_tensor.get_vector(docid, 0); double min_rawscore = _dist_fun->min_rawscore(); @@ -62,7 +63,7 @@ public: } template - double calc_with_limit(uint32_t docid, double limit) const { + double calc_with_limit(uint32_t docid, double limit) const noexcept { if (has_single_subspace) { auto cells = _attr_tensor.get_vector(docid, 0); if (cells.size == 0) [[unlikely]] { @@ -80,7 +81,7 @@ public: } } - void calc_closest_subspace(VectorBundle vectors, std::optional& closest_subspace, double& best_distance) { + void calc_closest_subspace(VectorBundle vectors, std::optional& closest_subspace, double& best_distance) noexcept { for (uint32_t i = 0; i < vectors.subspaces(); ++i) { double distance = _dist_fun->calc(vectors.cells(i)); if (!closest_subspace.has_value() || distance < best_distance) { @@ -90,7 +91,7 @@ public: } } - std::optional calc_closest_subspace(VectorBundle vectors) { + std::optional calc_closest_subspace(VectorBundle vectors) noexcept { double best_distance = 0.0; std::optional closest_subspace; calc_closest_subspace(vectors, closest_subspace, best_distance); diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h index c2e8305038c..9a2db8dfac0 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h @@ -2,11 +2,6 @@ #pragma once -#include -#include - -namespace vespalib::eval { struct TypedCells; } - namespace search::tensor { class DistanceConverter { @@ -16,25 +11,25 @@ public: /** * Convert threshold (external distance units) to internal units. */ - virtual double convert_threshold(double threshold) const = 0; + virtual double convert_threshold(double threshold) const noexcept = 0; /** * Convert internal distance to rawscore (also used as closeness). */ - virtual double to_rawscore(double distance) const = 0; + virtual double to_rawscore(double distance) const noexcept = 0; /** * Convert rawscore to external distance. * Override this when the rawscore is NOT defined as (1.0 / (1.0 + external_distance)). */ - virtual double to_distance(double rawscore) const { + virtual double to_distance(double rawscore) const noexcept { return (1.0 / rawscore) - 1.0; } /** * The minimum rawscore (also used as closeness) that this distance function can return. */ - virtual double min_rawscore() const { + virtual double min_rawscore() const noexcept { return 0.0; } }; diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp index 4749a8549a6..ed08df5866e 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp @@ -3,22 +3,14 @@ #include "distance_function_factory.h" #include "distance_functions.h" #include "mips_distance_transform.h" -#include -#include -#include -#include - -LOG_SETUP(".searchlib.tensor.distance_function_factory"); using search::attribute::DistanceMetric; using vespalib::eval::CellType; -using vespalib::eval::ValueType; namespace search::tensor { std::unique_ptr -make_distance_function_factory(search::attribute::DistanceMetric variant, - vespalib::eval::CellType cell_type) +make_distance_function_factory(DistanceMetric variant, CellType cell_type) { switch (variant) { case DistanceMetric::Angular: diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h index 829ed7fae13..356366d6a77 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h @@ -4,7 +4,6 @@ #include "distance_function.h" #include "bound_distance_function.h" -#include #include namespace search::tensor { @@ -15,10 +14,11 @@ namespace search::tensor { * for one particular vector in the distance function object. */ struct DistanceFunctionFactory { - DistanceFunctionFactory() = default; - virtual ~DistanceFunctionFactory() {} - virtual BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) = 0; - virtual BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) = 0; + using TypedCells = vespalib::eval::TypedCells; + DistanceFunctionFactory() noexcept = default; + virtual ~DistanceFunctionFactory() = default; + virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) = 0; + virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) = 0; using UP = std::unique_ptr; }; diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp index 3efc8c3a5ea..a0b55795055 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp @@ -2,9 +2,12 @@ #include "euclidean_distance.h" #include "temporary_vector_store.h" +#include +#include using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; +using vespalib::eval::TypedCells; namespace search::tensor { @@ -12,8 +15,7 @@ namespace { struct CalcEuclidean { template - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) + static double invoke(TypedCells lhs, TypedCells rhs) { auto lhs_vector = lhs.unsafe_typify(); auto rhs_vector = rhs.unsafe_typify(); @@ -44,12 +46,12 @@ private: static const float *cast(const float * p) { return p; } static const int8_t *cast(const Int8Float * p) { return reinterpret_cast(p); } public: - BoundEuclideanDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundEuclideanDistance(TypedCells lhs) : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()), _tmpSpace(lhs.size), _lhs_vector(_tmpSpace.storeLhs(lhs)) {} - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs_vector.size(); vespalib::ConstArrayRef rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -57,15 +59,15 @@ public: auto b = rhs_vector.data(); return _computer.squaredEuclideanDistance(cast(a), cast(b), sz); } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { return threshold*threshold; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double d = sqrt(distance); double score = 1.0 / (1.0 + d); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double limit) const override { + double calc_with_limit(TypedCells rhs, double limit) const noexcept override { vespalib::ConstArrayRef rhs_vector = rhs.typify(); double sum = 0.0; size_t sz = _lhs_vector.size(); @@ -85,14 +87,14 @@ template class BoundEuclideanDistance; template BoundDistanceFunction::UP -EuclideanDistanceFunctionFactory::for_query_vector(const vespalib::eval::TypedCells& lhs) { +EuclideanDistanceFunctionFactory::for_query_vector(TypedCells lhs) { using DFT = BoundEuclideanDistance; return std::make_unique(lhs); } template BoundDistanceFunction::UP -EuclideanDistanceFunctionFactory::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +EuclideanDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) { using DFT = BoundEuclideanDistance; return std::make_unique(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h index 42097f8b39b..8c39a12bf86 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h @@ -2,11 +2,7 @@ #pragma once -#include "distance_function.h" #include "distance_function_factory.h" -#include -#include -#include namespace search::tensor { @@ -18,9 +14,9 @@ namespace search::tensor { template class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory { public: - EuclideanDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + EuclideanDistanceFunctionFactory() noexcept = default; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp index 7b6c40c643e..99da4b65255 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp @@ -3,6 +3,7 @@ #include "geo_degrees_distance.h" #include "temporary_vector_store.h" #include +#include using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; @@ -31,11 +32,11 @@ public: return s*s; } - BoundGeoDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundGeoDistance(TypedCells lhs) : _tmpSpace(lhs.size), _lh_vector(_tmpSpace.storeLhs(lhs)) {} - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { vespalib::ConstArrayRef rhs_vector = _tmpSpace.convertRhs(rhs); assert(2 == _lh_vector.size()); assert(2 == rhs_vector.size()); @@ -56,7 +57,7 @@ public: double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon; return hav_central_angle; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { if (threshold < 0.0) { return 0.0; } @@ -68,25 +69,25 @@ public: double rt_hav = sin(half_angle); return rt_hav * rt_hav; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double hav_diff = sqrt(distance); // distance in kilometers: double d = 2 * asin(hav_diff) * earth_mean_radius; // km to rawscore: return 1.0 / (1.0 + d); } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; BoundDistanceFunction::UP -GeoDistanceFunctionFactory::for_query_vector(const vespalib::eval::TypedCells& lhs) { +GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) { return std::make_unique(lhs); } BoundDistanceFunction::UP -GeoDistanceFunctionFactory::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) { return std::make_unique(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h index f1af976b91f..1464898421b 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h @@ -2,12 +2,7 @@ #pragma once -#include "distance_function.h" #include "distance_function_factory.h" -#include -#include -#include -#include namespace search::tensor { @@ -19,8 +14,8 @@ namespace search::tensor { class GeoDistanceFunctionFactory : public DistanceFunctionFactory { public: GeoDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp index 659e2cae372..2358733f1da 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp @@ -6,6 +6,7 @@ using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; +using vespalib::eval::TypedCells; namespace search::tensor { @@ -13,8 +14,7 @@ namespace { struct CalcHamming { template - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) + static double invoke(TypedCells lhs, TypedCells rhs) { auto lhs_vector = lhs.unsafe_typify(); auto rhs_vector = rhs.unsafe_typify(); @@ -38,11 +38,11 @@ private: mutable TemporaryVectorStore _tmpSpace; const vespalib::ConstArrayRef _lhs_vector; public: - explicit BoundHammingDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundHammingDistance(TypedCells lhs) : _tmpSpace(lhs.size), _lhs_vector(_tmpSpace.storeLhs(lhs)) {} - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs_vector.size(); vespalib::ConstArrayRef rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -58,13 +58,13 @@ public: return (double)sum; } } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { return threshold; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { return 1.0 / (1.0 + distance); } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { // consider optimizing: return calc(rhs); } @@ -72,14 +72,14 @@ public: template BoundDistanceFunction::UP -HammingDistanceFunctionFactory::for_query_vector(const vespalib::eval::TypedCells& lhs) { +HammingDistanceFunctionFactory::for_query_vector(TypedCells lhs) { using DFT = BoundHammingDistance; return std::make_unique(lhs); } template BoundDistanceFunction::UP -HammingDistanceFunctionFactory::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +HammingDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) { using DFT = BoundHammingDistance; return std::make_unique(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h index 32e2be99214..6e7f96e1e2f 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h @@ -2,11 +2,7 @@ #pragma once -#include "distance_function.h" #include "distance_function_factory.h" -#include -#include -#include namespace search::tensor { @@ -20,8 +16,8 @@ template class HammingDistanceFunctionFactory : public DistanceFunctionFactory { public: HammingDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp index 3645c511b01..c3a9720a1b3 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp @@ -4,7 +4,6 @@ #include "temporary_vector_store.h" #include #include -#include #include using vespalib::eval::Int8Float; @@ -24,7 +23,7 @@ class BoundMipsDistanceFunction : public BoundDistanceFunction { static const float *cast(const float * p) { return p; } static const int8_t *cast(const Int8Float * p) { return reinterpret_cast(p); } public: - BoundMipsDistanceFunction(const vespalib::eval::TypedCells& lhs, MaximumSquaredNormStore& sq_norm_store) + BoundMipsDistanceFunction(TypedCells lhs, MaximumSquaredNormStore& sq_norm_store) : BoundDistanceFunction(), _tmpSpace(lhs.size), _lhs_vector(_tmpSpace.storeLhs(lhs)), @@ -44,7 +43,7 @@ public: return _lhs_extra_dim; } - double calc(const vespalib::eval::TypedCells &rhs) const override { + double calc(TypedCells rhs) const noexcept override { vespalib::ConstArrayRef rhs_vector = _tmpSpace.convertRhs(rhs); const FloatType * a = _lhs_vector.data(); const FloatType * b = rhs_vector.data(); @@ -58,32 +57,32 @@ public: } return -dp; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { return threshold; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { return -distance; } - double to_distance(double rawscore) const override { + double to_distance(double rawscore) const noexcept override { return -rawscore; } - double min_rawscore() const override { + double min_rawscore() const noexcept override { return std::numeric_limits::lowest(); } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; template BoundDistanceFunction::UP -MipsDistanceFunctionFactory::for_query_vector(const vespalib::eval::TypedCells& lhs) { +MipsDistanceFunctionFactory::for_query_vector(TypedCells lhs) { return std::make_unique>(lhs, *_sq_norm_store); } template BoundDistanceFunction::UP -MipsDistanceFunctionFactory::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +MipsDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) { return std::make_unique>(lhs, *_sq_norm_store); }; diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h index 63b2a83c1b5..67a6eb58de0 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h @@ -45,7 +45,7 @@ public: : _sq_norm_store(std::make_shared()) { } - ~MipsDistanceFunctionFactoryBase() = default; + ~MipsDistanceFunctionFactoryBase() override = default; MaximumSquaredNormStore& get_max_squared_norm_store() noexcept { return *_sq_norm_store; } }; @@ -59,12 +59,11 @@ public: template class MipsDistanceFunctionFactory : public MipsDistanceFunctionFactoryBase { public: - MipsDistanceFunctionFactory() : MipsDistanceFunctionFactoryBase() { } - ~MipsDistanceFunctionFactory() = default; + MipsDistanceFunctionFactory() noexcept = default; + ~MipsDistanceFunctionFactory() override = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp index 931fd3edb06..c94eb6d3cad 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp @@ -2,6 +2,7 @@ #include "prenormalized_angular_distance.h" #include "temporary_vector_store.h" +#include using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; @@ -16,7 +17,7 @@ private: const vespalib::ConstArrayRef _lhs; double _lhs_norm_sq; public: - BoundPrenormalizedAngularDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundPrenormalizedAngularDistance(TypedCells lhs) : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()), _tmpSpace(lhs.size), _lhs(_tmpSpace.storeLhs(lhs)) @@ -27,7 +28,7 @@ public: _lhs_norm_sq = 1.0; } } - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs.size(); vespalib::ConstArrayRef rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -37,13 +38,13 @@ public: double distance = _lhs_norm_sq - dot_product; return distance; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { double cosine_similarity = 1.0 - threshold; double dot_product = cosine_similarity * _lhs_norm_sq; double distance = _lhs_norm_sq - dot_product; return distance; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double dot_product = _lhs_norm_sq - distance; double cosine_similarity = dot_product / _lhs_norm_sq; // should be in in range [-1,1] but roundoff may cause problems: @@ -53,7 +54,7 @@ public: double score = 1.0 / (1.0 + cosine_distance); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; @@ -63,14 +64,14 @@ template class BoundPrenormalizedAngularDistance; template BoundDistanceFunction::UP -PrenormalizedAngularDistanceFunctionFactory::for_query_vector(const vespalib::eval::TypedCells& lhs) { +PrenormalizedAngularDistanceFunctionFactory::for_query_vector(TypedCells lhs) { using DFT = BoundPrenormalizedAngularDistance; return std::make_unique(lhs); } template BoundDistanceFunction::UP -PrenormalizedAngularDistanceFunctionFactory::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +PrenormalizedAngularDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) { using DFT = BoundPrenormalizedAngularDistance; return std::make_unique(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h index 0f647547e08..7e3a8c2c676 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h @@ -2,11 +2,7 @@ #pragma once -#include "distance_function.h" -#include "bound_distance_function.h" #include "distance_function_factory.h" -#include -#include namespace search::tensor { @@ -18,8 +14,8 @@ template class PrenormalizedAngularDistanceFunctionFactory : public DistanceFunctionFactory { public: PrenormalizedAngularDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } -- cgit v1.2.3 From 907b967f5baf097a3323d9ef69e787d2d68ede25 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Wed, 17 Apr 2024 07:45:46 +0000 Subject: Add final to allow better code genrartion of calc_with_limit. --- .../vespa/searchlib/tensor/angular_distance.cpp | 33 +--------------------- .../vespa/searchlib/tensor/euclidean_distance.cpp | 23 +-------------- .../searchlib/tensor/geo_degrees_distance.cpp | 4 +-- .../vespa/searchlib/tensor/hamming_distance.cpp | 27 ++---------------- .../searchlib/tensor/mips_distance_transform.cpp | 2 +- .../tensor/prenormalized_angular_distance.cpp | 2 +- .../searchlib/tensor/temporary_vector_store.cpp | 12 +++----- .../searchlib/tensor/temporary_vector_store.h | 12 ++++---- 8 files changed, 18 insertions(+), 97 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp index 9d08bedf078..07e490f4575 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp @@ -12,39 +12,8 @@ using vespalib::eval::TypedCells; namespace search::tensor { -namespace { - -struct CalcAngular { - template - static double invoke(TypedCells lhs, TypedCells rhs) - { - auto lhs_vector = lhs.unsafe_typify(); - auto rhs_vector = rhs.unsafe_typify(); - - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - double a_norm_sq = 0.0; - double b_norm_sq = 0.0; - double dot_product = 0.0; - for (size_t i = 0; i < sz; ++i) { - double a = lhs_vector[i]; - double b = rhs_vector[i]; - a_norm_sq += a*a; - b_norm_sq += b*b; - dot_product += a*b; - } - double squared_norms = a_norm_sq * b_norm_sq; - double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0; - double cosine_similarity = dot_product / div; - double distance = 1.0 - cosine_similarity; // in range [0,2] - return std::max(0.0, distance); - } -}; - -} - template -class BoundAngularDistance : public BoundDistanceFunction { +class BoundAngularDistance final : public BoundDistanceFunction { private: const vespalib::hwaccelrated::IAccelrated & _computer; mutable TemporaryVectorStore _tmpSpace; diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp index a0b55795055..6a730132ad1 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp @@ -11,32 +11,11 @@ using vespalib::eval::TypedCells; namespace search::tensor { -namespace { - -struct CalcEuclidean { - template - static double invoke(TypedCells lhs, TypedCells rhs) - { - auto lhs_vector = lhs.unsafe_typify(); - auto rhs_vector = rhs.unsafe_typify(); - double sum = 0.0; - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - for (size_t i = 0; i < sz; ++i) { - double diff = lhs_vector[i] - rhs_vector[i]; - sum += diff*diff; - } - return sum; - } -}; - -} - using vespalib::eval::Int8Float; using vespalib::BFloat16; template -class BoundEuclideanDistance : public BoundDistanceFunction { +class BoundEuclideanDistance final : public BoundDistanceFunction { using FloatType = std::conditional_t::value,float,AttributeCellType>; private: const vespalib::hwaccelrated::IAccelrated & _computer; diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp index 99da4b65255..f5484f40271 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp @@ -16,7 +16,7 @@ namespace search::tensor { * Uses the haversine formula directly from: * https://en.wikipedia.org/wiki/Haversine_formula **/ -class BoundGeoDistance : public BoundDistanceFunction { +class BoundGeoDistance final : public BoundDistanceFunction { private: mutable TemporaryVectorStore _tmpSpace; const vespalib::ConstArrayRef _lh_vector; @@ -27,7 +27,7 @@ public: static constexpr double degrees_to_radians = M_PI / 180.0; // haversine function: - static double haversine(double angle) { + static double haversine(double angle) noexcept { double s = sin(0.5*angle); return s*s; } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp index 2358733f1da..779cd742b85 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp @@ -10,30 +10,10 @@ using vespalib::eval::TypedCells; namespace search::tensor { -namespace { - -struct CalcHamming { - template - static double invoke(TypedCells lhs, TypedCells rhs) - { - auto lhs_vector = lhs.unsafe_typify(); - auto rhs_vector = rhs.unsafe_typify(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - size_t sum = 0; - for (size_t i = 0; i < sz; ++i) { - sum += (lhs_vector[i] == rhs_vector[i]) ? 0 : 1; - } - return (double)sum; - } -}; - -} - using vespalib::eval::Int8Float; template -class BoundHammingDistance : public BoundDistanceFunction { +class BoundHammingDistance final : public BoundDistanceFunction { private: mutable TemporaryVectorStore _tmpSpace; const vespalib::ConstArrayRef _lhs_vector; @@ -46,10 +26,9 @@ public: size_t sz = _lhs_vector.size(); vespalib::ConstArrayRef rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); - auto a = _lhs_vector.data(); - auto b = rhs_vector.data(); + if constexpr (std::is_same::value) { - return (double) vespalib::binary_hamming_distance(a, b, sz); + return (double) vespalib::binary_hamming_distance(_lhs_vector.data(), rhs_vector.data(), sz); } else { size_t sum = 0; for (size_t i = 0; i < sz; ++i) { diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp index c3a9720a1b3..c42242d8dc8 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp @@ -11,7 +11,7 @@ using vespalib::eval::Int8Float; namespace search::tensor { template -class BoundMipsDistanceFunction : public BoundDistanceFunction { +class BoundMipsDistanceFunction final : public BoundDistanceFunction { mutable TemporaryVectorStore _tmpSpace; const vespalib::ConstArrayRef _lhs_vector; const vespalib::hwaccelrated::IAccelrated & _computer; diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp index c94eb6d3cad..267f91bb4e0 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp @@ -10,7 +10,7 @@ using vespalib::eval::TypifyCellType; namespace search::tensor { template -class BoundPrenormalizedAngularDistance : public BoundDistanceFunction { +class BoundPrenormalizedAngularDistance final : public BoundDistanceFunction { private: const vespalib::hwaccelrated::IAccelrated & _computer; mutable TemporaryVectorStore _tmpSpace; diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp index ff07f245de4..b1018555212 100644 --- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp @@ -2,10 +2,6 @@ #include "temporary_vector_store.h" -#include - -LOG_SETUP(".searchlib.tensor.temporary_vector_store"); - using vespalib::ConstArrayRef; using vespalib::ArrayRef; using vespalib::eval::CellType; @@ -17,7 +13,7 @@ namespace { template ConstArrayRef -convert_cells(ArrayRef space, TypedCells cells) +convert_cells(ArrayRef space, TypedCells cells) noexcept { assert(cells.size == space.size()); auto old_cells = cells.typify(); @@ -32,7 +28,7 @@ convert_cells(ArrayRef space, TypedCells cells) template struct ConvertCellsSelector { - template static auto invoke(ArrayRef dst, TypedCells src) { + template static auto invoke(ArrayRef dst, TypedCells src) noexcept { return convert_cells(dst, src); } }; @@ -41,8 +37,8 @@ struct ConvertCellsSelector template ConstArrayRef -TemporaryVectorStore::internal_convert(TypedCells cells, size_t offset) { - LOG_ASSERT(cells.size * 2 == _tmpSpace.size()); +TemporaryVectorStore::internal_convert(TypedCells cells, size_t offset) noexcept { + assert(cells.size * 2 == _tmpSpace.size()); ArrayRef where(_tmpSpace.data() + offset, cells.size); using MyTypify = vespalib::eval::TypifyCellType; using MySelector = ConvertCellsSelector; diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h index ad5bdf3ed3a..3dc237c85a4 100644 --- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h +++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h @@ -2,10 +2,7 @@ #pragma once -#include -#include #include -#include namespace search::tensor { @@ -13,14 +10,15 @@ namespace search::tensor { template class TemporaryVectorStore { private: + using TypedCells = vespalib::eval::TypedCells; std::vector _tmpSpace; - vespalib::ConstArrayRef internal_convert(vespalib::eval::TypedCells cells, size_t offset); + vespalib::ConstArrayRef internal_convert(TypedCells cells, size_t offset) noexcept; public: - TemporaryVectorStore(size_t vectorSize) : _tmpSpace(vectorSize * 2) {} - vespalib::ConstArrayRef storeLhs(vespalib::eval::TypedCells cells) { + explicit TemporaryVectorStore(size_t vectorSize) noexcept : _tmpSpace(vectorSize * 2) {} + vespalib::ConstArrayRef storeLhs(TypedCells cells) noexcept { return internal_convert(cells, 0); } - vespalib::ConstArrayRef convertRhs(vespalib::eval::TypedCells cells) { + vespalib::ConstArrayRef convertRhs(TypedCells cells) { if (vespalib::eval::get_cell_type() == cells.type) [[likely]] { return cells.unsafe_typify(); } else { -- cgit v1.2.3 From 63366582242f9b20434fbce2d9d3ea1848334a13 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Wed, 17 Apr 2024 08:01:40 +0000 Subject: Drop assert for hamming --- searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp index 779cd742b85..0be920b9c03 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp @@ -25,11 +25,10 @@ public: double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs_vector.size(); vespalib::ConstArrayRef rhs_vector = _tmpSpace.convertRhs(rhs); - assert(sz == rhs_vector.size()); - if constexpr (std::is_same::value) { return (double) vespalib::binary_hamming_distance(_lhs_vector.data(), rhs_vector.data(), sz); } else { + assert(sz == rhs_vector.size()); size_t sum = 0; for (size_t i = 0; i < sz; ++i) { sum += (_lhs_vector[i] == rhs_vector[i]) ? 0 : 1; -- cgit v1.2.3