diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-04-18 12:52:10 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-18 12:52:10 +0200 |
commit | 08c76594bc7e7182901960778044827f475d5bdc (patch) | |
tree | 0f46a7a957a70458e43244f2102ce9071f130d97 | |
parent | 871f1e9d2a1ad5f2371d1a406699ca9854f92d35 (diff) | |
parent | 63366582242f9b20434fbce2d9d3ea1848334a13 (diff) |
Merge pull request #30950 from vespa-engine/balder/typedcells-as-value-param
Balder/typedcells as value param
20 files changed, 107 insertions, 223 deletions
diff --git a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp index 81f25241d3d..94f0a313f2e 100644 --- a/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp +++ b/eval/src/vespa/eval/instruction/dense_hamming_distance.cpp @@ -3,7 +3,6 @@ #include "dense_hamming_distance.h" #include <vespa/eval/eval/operation.h> #include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/hamming_distance.h> #include <vespa/vespalib/util/binary_hamming_distance.h> #include <vespa/log/log.h> diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp index 14953011e22..07e490f4575 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp @@ -2,7 +2,9 @@ #include "angular_distance.h" #include "temporary_vector_store.h" +#include <vespa/vespalib/hwaccelrated/iaccelrated.h> #include <numbers> +#include <cmath> using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; @@ -10,47 +12,15 @@ using vespalib::eval::TypedCells; namespace search::tensor { -namespace { - -struct CalcAngular { - template <typename LCT, typename RCT> - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) - { - auto lhs_vector = lhs.unsafe_typify<LCT>(); - auto rhs_vector = rhs.unsafe_typify<RCT>(); - - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - double a_norm_sq = 0.0; - double b_norm_sq = 0.0; - double dot_product = 0.0; - for (size_t i = 0; i < sz; ++i) { - double a = lhs_vector[i]; - double b = rhs_vector[i]; - a_norm_sq += a*a; - b_norm_sq += b*b; - dot_product += a*b; - } - double squared_norms = a_norm_sq * b_norm_sq; - double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0; - double cosine_similarity = dot_product / div; - double distance = 1.0 - cosine_similarity; // in range [0,2] - return std::max(0.0, distance); - } -}; - -} - template<typename FloatType> -class BoundAngularDistance : public BoundDistanceFunction { +class BoundAngularDistance final : public BoundDistanceFunction { private: const vespalib::hwaccelrated::IAccelrated & _computer; mutable TemporaryVectorStore<FloatType> _tmpSpace; const vespalib::ConstArrayRef<FloatType> _lhs; double _lhs_norm_sq; public: - BoundAngularDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundAngularDistance(TypedCells lhs) : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()), _tmpSpace(lhs.size), _lhs(_tmpSpace.storeLhs(lhs)) @@ -58,7 +28,7 @@ public: auto a = _lhs.data(); _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size); } - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -72,7 +42,7 @@ public: double distance = 1.0 - cosine_similarity; // in range [0,2] return distance; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { if (threshold < 0.0) { return 0.0; } @@ -82,7 +52,7 @@ public: double cosine_similarity = cos(threshold); return 1.0 - cosine_similarity; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double cosine_similarity = 1.0 - distance; // should be in the range [-1,1] but roundoff may cause problems: cosine_similarity = std::min(1.0, cosine_similarity); @@ -91,7 +61,7 @@ public: double score = 1.0 / (1.0 + angle_distance); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; @@ -101,14 +71,14 @@ template class BoundAngularDistance<double>; template <typename FloatType> BoundDistanceFunction::UP -AngularDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { using DFT = BoundAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { using DFT = BoundAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h index f5e8589fe6a..5e0a060e060 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h @@ -2,12 +2,7 @@ #pragma once -#include "distance_function.h" -#include "bound_distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> -#include <cmath> namespace search::tensor { @@ -20,8 +15,8 @@ template <typename FloatType> class AngularDistanceFunctionFactory : public DistanceFunctionFactory { public: AngularDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h index 2865f5f55e6..a9d0c880625 100644 --- a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h @@ -2,13 +2,8 @@ #pragma once -#include <memory> -#include <vespa/eval/eval/cell_type.h> -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/util/arrayref.h> #include "distance_function.h" - -namespace vespalib::eval { struct TypedCells; } +#include <vespa/eval/eval/typed_cells.h> namespace search::tensor { @@ -22,16 +17,17 @@ namespace search::tensor { class BoundDistanceFunction : public DistanceConverter { public: using UP = std::unique_ptr<BoundDistanceFunction>; + using TypedCells = vespalib::eval::TypedCells; - BoundDistanceFunction() = default; + BoundDistanceFunction() noexcept = default; ~BoundDistanceFunction() override = default; // calculate internal distance (comparable) - virtual double calc(const vespalib::eval::TypedCells& rhs) const = 0; + virtual double calc(TypedCells rhs) const noexcept = 0; // calculate internal distance, early return allowed if > limit - virtual double calc_with_limit(const vespalib::eval::TypedCells& rhs, double limit) const = 0; + virtual double calc_with_limit(TypedCells rhs, double limit) const noexcept = 0; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h index 836d9e9d287..9dbd12650cb 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h @@ -5,6 +5,7 @@ #include "distance_function_factory.h" #include "i_tensor_attribute.h" #include "vector_bundle.h" +#include <vespa/eval/eval/value_type.h> #include <optional> namespace vespalib::eval { struct Value; } @@ -32,7 +33,7 @@ public: ~DistanceCalculator(); const tensor::ITensorAttribute& attribute_tensor() const { return _attr_tensor; } - const vespalib::eval::Value& query_tensor() const { + const vespalib::eval::Value& query_tensor() const noexcept{ assert(_query_tensor != nullptr); return *_query_tensor; } @@ -40,7 +41,7 @@ public: bool has_single_subspace() const noexcept { return _attr_tensor.getTensorType().is_dense(); } template<bool has_single_subspace> - double calc_raw_score(uint32_t docid) const { + double calc_raw_score(uint32_t docid) const noexcept { if (has_single_subspace) { auto cells = _attr_tensor.get_vector(docid, 0); double min_rawscore = _dist_fun->min_rawscore(); @@ -62,7 +63,7 @@ public: } template<bool has_single_subspace> - double calc_with_limit(uint32_t docid, double limit) const { + double calc_with_limit(uint32_t docid, double limit) const noexcept { if (has_single_subspace) { auto cells = _attr_tensor.get_vector(docid, 0); if (cells.size == 0) [[unlikely]] { @@ -80,7 +81,7 @@ public: } } - void calc_closest_subspace(VectorBundle vectors, std::optional<uint32_t>& closest_subspace, double& best_distance) { + void calc_closest_subspace(VectorBundle vectors, std::optional<uint32_t>& closest_subspace, double& best_distance) noexcept { for (uint32_t i = 0; i < vectors.subspaces(); ++i) { double distance = _dist_fun->calc(vectors.cells(i)); if (!closest_subspace.has_value() || distance < best_distance) { @@ -90,7 +91,7 @@ public: } } - std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) { + std::optional<uint32_t> calc_closest_subspace(VectorBundle vectors) noexcept { double best_distance = 0.0; std::optional<uint32_t> closest_subspace; calc_closest_subspace(vectors, closest_subspace, best_distance); diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h index c2e8305038c..9a2db8dfac0 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h @@ -2,11 +2,6 @@ #pragma once -#include <memory> -#include <vespa/eval/eval/cell_type.h> - -namespace vespalib::eval { struct TypedCells; } - namespace search::tensor { class DistanceConverter { @@ -16,25 +11,25 @@ public: /** * Convert threshold (external distance units) to internal units. */ - virtual double convert_threshold(double threshold) const = 0; + virtual double convert_threshold(double threshold) const noexcept = 0; /** * Convert internal distance to rawscore (also used as closeness). */ - virtual double to_rawscore(double distance) const = 0; + virtual double to_rawscore(double distance) const noexcept = 0; /** * Convert rawscore to external distance. * Override this when the rawscore is NOT defined as (1.0 / (1.0 + external_distance)). */ - virtual double to_distance(double rawscore) const { + virtual double to_distance(double rawscore) const noexcept { return (1.0 / rawscore) - 1.0; } /** * The minimum rawscore (also used as closeness) that this distance function can return. */ - virtual double min_rawscore() const { + virtual double min_rawscore() const noexcept { return 0.0; } }; diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp index 4749a8549a6..ed08df5866e 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp @@ -3,22 +3,14 @@ #include "distance_function_factory.h" #include "distance_functions.h" #include "mips_distance_transform.h" -#include <vespa/vespalib/util/typify.h> -#include <vespa/vespalib/util/array.h> -#include <vespa/vespalib/util/arrayref.h> -#include <vespa/log/log.h> - -LOG_SETUP(".searchlib.tensor.distance_function_factory"); using search::attribute::DistanceMetric; using vespalib::eval::CellType; -using vespalib::eval::ValueType; namespace search::tensor { std::unique_ptr<DistanceFunctionFactory> -make_distance_function_factory(search::attribute::DistanceMetric variant, - vespalib::eval::CellType cell_type) +make_distance_function_factory(DistanceMetric variant, CellType cell_type) { switch (variant) { case DistanceMetric::Angular: diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h index 829ed7fae13..356366d6a77 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.h @@ -4,7 +4,6 @@ #include "distance_function.h" #include "bound_distance_function.h" -#include <vespa/eval/eval/value_type.h> #include <vespa/searchcommon/attribute/distance_metric.h> namespace search::tensor { @@ -15,10 +14,11 @@ namespace search::tensor { * for one particular vector in the distance function object. */ struct DistanceFunctionFactory { - DistanceFunctionFactory() = default; - virtual ~DistanceFunctionFactory() {} - virtual BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) = 0; - virtual BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) = 0; + using TypedCells = vespalib::eval::TypedCells; + DistanceFunctionFactory() noexcept = default; + virtual ~DistanceFunctionFactory() = default; + virtual BoundDistanceFunction::UP for_query_vector(TypedCells lhs) = 0; + virtual BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) = 0; using UP = std::unique_ptr<DistanceFunctionFactory>; }; diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp index 3efc8c3a5ea..6a730132ad1 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp @@ -2,39 +2,20 @@ #include "euclidean_distance.h" #include "temporary_vector_store.h" +#include <vespa/vespalib/hwaccelrated/iaccelrated.h> +#include <cmath> using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; +using vespalib::eval::TypedCells; namespace search::tensor { -namespace { - -struct CalcEuclidean { - template <typename LCT, typename RCT> - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) - { - auto lhs_vector = lhs.unsafe_typify<LCT>(); - auto rhs_vector = rhs.unsafe_typify<RCT>(); - double sum = 0.0; - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - for (size_t i = 0; i < sz; ++i) { - double diff = lhs_vector[i] - rhs_vector[i]; - sum += diff*diff; - } - return sum; - } -}; - -} - using vespalib::eval::Int8Float; using vespalib::BFloat16; template<typename AttributeCellType> -class BoundEuclideanDistance : public BoundDistanceFunction { +class BoundEuclideanDistance final : public BoundDistanceFunction { using FloatType = std::conditional_t<std::is_same<AttributeCellType,BFloat16>::value,float,AttributeCellType>; private: const vespalib::hwaccelrated::IAccelrated & _computer; @@ -44,12 +25,12 @@ private: static const float *cast(const float * p) { return p; } static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); } public: - BoundEuclideanDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundEuclideanDistance(TypedCells lhs) : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()), _tmpSpace(lhs.size), _lhs_vector(_tmpSpace.storeLhs(lhs)) {} - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs_vector.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -57,15 +38,15 @@ public: auto b = rhs_vector.data(); return _computer.squaredEuclideanDistance(cast(a), cast(b), sz); } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { return threshold*threshold; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double d = sqrt(distance); double score = 1.0 / (1.0 + d); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double limit) const override { + double calc_with_limit(TypedCells rhs, double limit) const noexcept override { vespalib::ConstArrayRef<AttributeCellType> rhs_vector = rhs.typify<AttributeCellType>(); double sum = 0.0; size_t sz = _lhs_vector.size(); @@ -85,14 +66,14 @@ template class BoundEuclideanDistance<double>; template <typename FloatType> BoundDistanceFunction::UP -EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { using DFT = BoundEuclideanDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { using DFT = BoundEuclideanDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h index 42097f8b39b..8c39a12bf86 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h @@ -2,11 +2,7 @@ #pragma once -#include "distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> -#include <cmath> namespace search::tensor { @@ -18,9 +14,9 @@ namespace search::tensor { template <typename FloatType> class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory { public: - EuclideanDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + EuclideanDistanceFunctionFactory() noexcept = default; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp index 7b6c40c643e..f5484f40271 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.cpp @@ -3,6 +3,7 @@ #include "geo_degrees_distance.h" #include "temporary_vector_store.h" #include <numbers> +#include <cmath> using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; @@ -15,7 +16,7 @@ namespace search::tensor { * Uses the haversine formula directly from: * https://en.wikipedia.org/wiki/Haversine_formula **/ -class BoundGeoDistance : public BoundDistanceFunction { +class BoundGeoDistance final : public BoundDistanceFunction { private: mutable TemporaryVectorStore<double> _tmpSpace; const vespalib::ConstArrayRef<double> _lh_vector; @@ -26,16 +27,16 @@ public: static constexpr double degrees_to_radians = M_PI / 180.0; // haversine function: - static double haversine(double angle) { + static double haversine(double angle) noexcept { double s = sin(0.5*angle); return s*s; } - BoundGeoDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundGeoDistance(TypedCells lhs) : _tmpSpace(lhs.size), _lh_vector(_tmpSpace.storeLhs(lhs)) {} - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { vespalib::ConstArrayRef<double> rhs_vector = _tmpSpace.convertRhs(rhs); assert(2 == _lh_vector.size()); assert(2 == rhs_vector.size()); @@ -56,7 +57,7 @@ public: double hav_central_angle = hav_lat + cos(lat_A)*cos(lat_B)*hav_lon; return hav_central_angle; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { if (threshold < 0.0) { return 0.0; } @@ -68,25 +69,25 @@ public: double rt_hav = sin(half_angle); return rt_hav * rt_hav; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double hav_diff = sqrt(distance); // distance in kilometers: double d = 2 * asin(hav_diff) * earth_mean_radius; // km to rawscore: return 1.0 / (1.0 + d); } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; BoundDistanceFunction::UP -GeoDistanceFunctionFactory::for_query_vector(const vespalib::eval::TypedCells& lhs) { +GeoDistanceFunctionFactory::for_query_vector(TypedCells lhs) { return std::make_unique<BoundGeoDistance>(lhs); } BoundDistanceFunction::UP -GeoDistanceFunctionFactory::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +GeoDistanceFunctionFactory::for_insertion_vector(TypedCells lhs) { return std::make_unique<BoundGeoDistance>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h index f1af976b91f..1464898421b 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h @@ -2,12 +2,7 @@ #pragma once -#include "distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> -#include <vespa/vespalib/util/typify.h> -#include <cmath> namespace search::tensor { @@ -19,8 +14,8 @@ namespace search::tensor { class GeoDistanceFunctionFactory : public DistanceFunctionFactory { public: GeoDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp index 659e2cae372..0be920b9c03 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp @@ -6,51 +6,29 @@ using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; +using vespalib::eval::TypedCells; namespace search::tensor { -namespace { - -struct CalcHamming { - template <typename LCT, typename RCT> - static double invoke(const vespalib::eval::TypedCells& lhs, - const vespalib::eval::TypedCells& rhs) - { - auto lhs_vector = lhs.unsafe_typify<LCT>(); - auto rhs_vector = rhs.unsafe_typify<RCT>(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - size_t sum = 0; - for (size_t i = 0; i < sz; ++i) { - sum += (lhs_vector[i] == rhs_vector[i]) ? 0 : 1; - } - return (double)sum; - } -}; - -} - using vespalib::eval::Int8Float; template<typename FloatType> -class BoundHammingDistance : public BoundDistanceFunction { +class BoundHammingDistance final : public BoundDistanceFunction { private: mutable TemporaryVectorStore<FloatType> _tmpSpace; const vespalib::ConstArrayRef<FloatType> _lhs_vector; public: - explicit BoundHammingDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundHammingDistance(TypedCells lhs) : _tmpSpace(lhs.size), _lhs_vector(_tmpSpace.storeLhs(lhs)) {} - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs_vector.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); - assert(sz == rhs_vector.size()); - auto a = _lhs_vector.data(); - auto b = rhs_vector.data(); if constexpr (std::is_same<Int8Float, FloatType>::value) { - return (double) vespalib::binary_hamming_distance(a, b, sz); + return (double) vespalib::binary_hamming_distance(_lhs_vector.data(), rhs_vector.data(), sz); } else { + assert(sz == rhs_vector.size()); size_t sum = 0; for (size_t i = 0; i < sz; ++i) { sum += (_lhs_vector[i] == rhs_vector[i]) ? 0 : 1; @@ -58,13 +36,13 @@ public: return (double)sum; } } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { return threshold; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { return 1.0 / (1.0 + distance); } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { // consider optimizing: return calc(rhs); } @@ -72,14 +50,14 @@ public: template <typename FloatType> BoundDistanceFunction::UP -HammingDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { using DFT = BoundHammingDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { using DFT = BoundHammingDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h index 32e2be99214..6e7f96e1e2f 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h @@ -2,11 +2,7 @@ #pragma once -#include "distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/util/typify.h> -#include <cmath> namespace search::tensor { @@ -20,8 +16,8 @@ template <typename FloatType> class HammingDistanceFunctionFactory : public DistanceFunctionFactory { public: HammingDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp index 3645c511b01..c42242d8dc8 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp @@ -4,7 +4,6 @@ #include "temporary_vector_store.h" #include <vespa/vespalib/hwaccelrated/iaccelrated.h> #include <cmath> -#include <mutex> #include <variant> using vespalib::eval::Int8Float; @@ -12,7 +11,7 @@ using vespalib::eval::Int8Float; namespace search::tensor { template<typename FloatType, bool extra_dim> -class BoundMipsDistanceFunction : public BoundDistanceFunction { +class BoundMipsDistanceFunction final : public BoundDistanceFunction { mutable TemporaryVectorStore<FloatType> _tmpSpace; const vespalib::ConstArrayRef<FloatType> _lhs_vector; const vespalib::hwaccelrated::IAccelrated & _computer; @@ -24,7 +23,7 @@ class BoundMipsDistanceFunction : public BoundDistanceFunction { static const float *cast(const float * p) { return p; } static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); } public: - BoundMipsDistanceFunction(const vespalib::eval::TypedCells& lhs, MaximumSquaredNormStore& sq_norm_store) + BoundMipsDistanceFunction(TypedCells lhs, MaximumSquaredNormStore& sq_norm_store) : BoundDistanceFunction(), _tmpSpace(lhs.size), _lhs_vector(_tmpSpace.storeLhs(lhs)), @@ -44,7 +43,7 @@ public: return _lhs_extra_dim; } - double calc(const vespalib::eval::TypedCells &rhs) const override { + double calc(TypedCells rhs) const noexcept override { vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); const FloatType * a = _lhs_vector.data(); const FloatType * b = rhs_vector.data(); @@ -58,32 +57,32 @@ public: } return -dp; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { return threshold; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { return -distance; } - double to_distance(double rawscore) const override { + double to_distance(double rawscore) const noexcept override { return -rawscore; } - double min_rawscore() const override { + double min_rawscore() const noexcept override { return std::numeric_limits<double>::lowest(); } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; template<typename FloatType> BoundDistanceFunction::UP -MipsDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { return std::make_unique<BoundMipsDistanceFunction<FloatType, false>>(lhs, *_sq_norm_store); } template<typename FloatType> BoundDistanceFunction::UP -MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { return std::make_unique<BoundMipsDistanceFunction<FloatType, true>>(lhs, *_sq_norm_store); }; diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h index 63b2a83c1b5..67a6eb58de0 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h @@ -45,7 +45,7 @@ public: : _sq_norm_store(std::make_shared<MaximumSquaredNormStore>()) { } - ~MipsDistanceFunctionFactoryBase() = default; + ~MipsDistanceFunctionFactoryBase() override = default; MaximumSquaredNormStore& get_max_squared_norm_store() noexcept { return *_sq_norm_store; } }; @@ -59,12 +59,11 @@ public: template<typename FloatType> class MipsDistanceFunctionFactory : public MipsDistanceFunctionFactoryBase { public: - MipsDistanceFunctionFactory() : MipsDistanceFunctionFactoryBase() { } - ~MipsDistanceFunctionFactory() = default; + MipsDistanceFunctionFactory() noexcept = default; + ~MipsDistanceFunctionFactory() override = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp index 931fd3edb06..267f91bb4e0 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp @@ -2,6 +2,7 @@ #include "prenormalized_angular_distance.h" #include "temporary_vector_store.h" +#include <vespa/vespalib/hwaccelrated/iaccelrated.h> using vespalib::typify_invoke; using vespalib::eval::TypifyCellType; @@ -9,14 +10,14 @@ using vespalib::eval::TypifyCellType; namespace search::tensor { template<typename FloatType> -class BoundPrenormalizedAngularDistance : public BoundDistanceFunction { +class BoundPrenormalizedAngularDistance final : public BoundDistanceFunction { private: const vespalib::hwaccelrated::IAccelrated & _computer; mutable TemporaryVectorStore<FloatType> _tmpSpace; const vespalib::ConstArrayRef<FloatType> _lhs; double _lhs_norm_sq; public: - BoundPrenormalizedAngularDistance(const vespalib::eval::TypedCells& lhs) + explicit BoundPrenormalizedAngularDistance(TypedCells lhs) : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()), _tmpSpace(lhs.size), _lhs(_tmpSpace.storeLhs(lhs)) @@ -27,7 +28,7 @@ public: _lhs_norm_sq = 1.0; } } - double calc(const vespalib::eval::TypedCells& rhs) const override { + double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); assert(sz == rhs_vector.size()); @@ -37,13 +38,13 @@ public: double distance = _lhs_norm_sq - dot_product; return distance; } - double convert_threshold(double threshold) const override { + double convert_threshold(double threshold) const noexcept override { double cosine_similarity = 1.0 - threshold; double dot_product = cosine_similarity * _lhs_norm_sq; double distance = _lhs_norm_sq - dot_product; return distance; } - double to_rawscore(double distance) const override { + double to_rawscore(double distance) const noexcept override { double dot_product = _lhs_norm_sq - distance; double cosine_similarity = dot_product / _lhs_norm_sq; // should be in in range [-1,1] but roundoff may cause problems: @@ -53,7 +54,7 @@ public: double score = 1.0 / (1.0 + cosine_distance); return score; } - double calc_with_limit(const vespalib::eval::TypedCells& rhs, double) const override { + double calc_with_limit(TypedCells rhs, double) const noexcept override { return calc(rhs); } }; @@ -63,14 +64,14 @@ template class BoundPrenormalizedAngularDistance<double>; template <typename FloatType> BoundDistanceFunction::UP -PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(const vespalib::eval::TypedCells& lhs) { +PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) { using DFT = BoundPrenormalizedAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } template <typename FloatType> BoundDistanceFunction::UP -PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(const vespalib::eval::TypedCells& lhs) { +PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) { using DFT = BoundPrenormalizedAngularDistance<FloatType>; return std::make_unique<DFT>(lhs); } diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h index 0f647547e08..7e3a8c2c676 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h @@ -2,11 +2,7 @@ #pragma once -#include "distance_function.h" -#include "bound_distance_function.h" #include "distance_function_factory.h" -#include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/hwaccelrated/iaccelrated.h> namespace search::tensor { @@ -18,8 +14,8 @@ template <typename FloatType> class PrenormalizedAngularDistanceFunctionFactory : public DistanceFunctionFactory { public: PrenormalizedAngularDistanceFunctionFactory() = default; - BoundDistanceFunction::UP for_query_vector(const vespalib::eval::TypedCells& lhs) override; - BoundDistanceFunction::UP for_insertion_vector(const vespalib::eval::TypedCells& lhs) override; + BoundDistanceFunction::UP for_query_vector(TypedCells lhs) override; + BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) override; }; } diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp index ff07f245de4..b1018555212 100644 --- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp @@ -2,10 +2,6 @@ #include "temporary_vector_store.h" -#include <vespa/log/log.h> - -LOG_SETUP(".searchlib.tensor.temporary_vector_store"); - using vespalib::ConstArrayRef; using vespalib::ArrayRef; using vespalib::eval::CellType; @@ -17,7 +13,7 @@ namespace { template<typename FromType, typename ToType> ConstArrayRef<ToType> -convert_cells(ArrayRef<ToType> space, TypedCells cells) +convert_cells(ArrayRef<ToType> space, TypedCells cells) noexcept { assert(cells.size == space.size()); auto old_cells = cells.typify<FromType>(); @@ -32,7 +28,7 @@ convert_cells(ArrayRef<ToType> space, TypedCells cells) template <typename ToType> struct ConvertCellsSelector { - template <typename FromType> static auto invoke(ArrayRef<ToType> dst, TypedCells src) { + template <typename FromType> static auto invoke(ArrayRef<ToType> dst, TypedCells src) noexcept { return convert_cells<FromType, ToType>(dst, src); } }; @@ -41,8 +37,8 @@ struct ConvertCellsSelector template <typename FloatType> ConstArrayRef<FloatType> -TemporaryVectorStore<FloatType>::internal_convert(TypedCells cells, size_t offset) { - LOG_ASSERT(cells.size * 2 == _tmpSpace.size()); +TemporaryVectorStore<FloatType>::internal_convert(TypedCells cells, size_t offset) noexcept { + assert(cells.size * 2 == _tmpSpace.size()); ArrayRef<FloatType> where(_tmpSpace.data() + offset, cells.size); using MyTypify = vespalib::eval::TypifyCellType; using MySelector = ConvertCellsSelector<FloatType>; diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h index ad5bdf3ed3a..3dc237c85a4 100644 --- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h +++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h @@ -2,10 +2,7 @@ #pragma once -#include <memory> -#include <vespa/eval/eval/cell_type.h> #include <vespa/eval/eval/typed_cells.h> -#include <vespa/vespalib/util/arrayref.h> namespace search::tensor { @@ -13,14 +10,15 @@ namespace search::tensor { template <typename FloatType> class TemporaryVectorStore { private: + using TypedCells = vespalib::eval::TypedCells; std::vector<FloatType> _tmpSpace; - vespalib::ConstArrayRef<FloatType> internal_convert(vespalib::eval::TypedCells cells, size_t offset); + vespalib::ConstArrayRef<FloatType> internal_convert(TypedCells cells, size_t offset) noexcept; public: - TemporaryVectorStore(size_t vectorSize) : _tmpSpace(vectorSize * 2) {} - vespalib::ConstArrayRef<FloatType> storeLhs(vespalib::eval::TypedCells cells) { + explicit TemporaryVectorStore(size_t vectorSize) noexcept : _tmpSpace(vectorSize * 2) {} + vespalib::ConstArrayRef<FloatType> storeLhs(TypedCells cells) noexcept { return internal_convert(cells, 0); } - vespalib::ConstArrayRef<FloatType> convertRhs(vespalib::eval::TypedCells cells) { + vespalib::ConstArrayRef<FloatType> convertRhs(TypedCells cells) { if (vespalib::eval::get_cell_type<FloatType>() == cells.type) [[likely]] { return cells.unsafe_typify<FloatType>(); } else { |