From d083381fadef275687e7af2a566acac1b2ebc56b Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 12 Apr 2021 14:16:54 +0000 Subject: let the distance function decide how it wants the query tensor converted --- .../tensorattribute/tensorattribute_test.cpp | 2 +- .../tests/tensor/hnsw_index/hnsw_index_test.cpp | 2 +- .../src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp | 2 +- .../queryeval/nearest_neighbor_blueprint.cpp | 52 +++++++++++----------- .../src/vespa/searchlib/tensor/angular_distance.h | 38 ++++++++-------- .../src/vespa/searchlib/tensor/distance_function.h | 11 +++++ .../searchlib/tensor/distance_function_factory.cpp | 10 ++--- .../vespa/searchlib/tensor/euclidean_distance.h | 20 ++++----- .../vespa/searchlib/tensor/geo_degrees_distance.h | 2 +- .../src/vespa/searchlib/tensor/hamming_distance.h | 2 +- .../searchlib/tensor/inner_product_distance.h | 22 +++++---- 11 files changed, 85 insertions(+), 78 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 2a509031e24..9621b93fd37 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -230,7 +230,7 @@ public: const search::tensor::DistanceFunction *distance_function() const override { - static search::tensor::SquaredEuclideanDistance my_dist_fun; + static search::tensor::SquaredEuclideanDistance my_dist_fun(vespalib::eval::CellType::DOUBLE); return &my_dist_fun; } }; diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 6ffe118aa65..3c6668dd410 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -78,7 +78,7 @@ public: void init(bool heuristic_select_neighbors) { auto generator = std::make_unique(); level_generator = generator.get(); - index = std::make_unique(vectors, std::make_unique(), + index = std::make_unique(vectors, std::make_unique(vespalib::eval::CellType::DOUBLE), std::move(generator), HnswIndex::Config(5, 2, 10, 0, heuristic_select_neighbors)); } diff --git a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp index 7acdb4df983..090f0306fa7 100644 --- a/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/stress_hnsw_mt.cpp @@ -231,7 +231,7 @@ public: void init() { uint32_t m = 16; - index = std::make_unique(vectors, std::make_unique(), + index = std::make_unique(vectors, std::make_unique(vespalib::eval::CellType::DOUBLE), std::make_unique(m), HnswIndex::Config(2*m, m, 200, 10, true)); } diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index 8012c48a04a..243cf4d3911 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -4,7 +4,7 @@ #include "nearest_neighbor_blueprint.h" #include "nearest_neighbor_iterator.h" #include "nns_index_iterator.h" -#include +#include #include #include #include @@ -12,35 +12,36 @@ LOG_SETUP(".searchlib.queryeval.nearest_neighbor_blueprint"); -using vespalib::eval::DenseCellsValue; +using vespalib::eval::CellType; +using vespalib::eval::FastValueBuilderFactory; +using vespalib::eval::TypedCells; using vespalib::eval::Value; +using vespalib::eval::ValueType; namespace search::queryeval { namespace { template -void -convert_cells(std::unique_ptr &original, const vespalib::eval::ValueType &want_type) +std::unique_ptr +convert_cells(const ValueType &new_type, TypedCells cells) { - if constexpr (std::is_same::value) { - return; - } else { - auto old_cells = original->cells().typify(); - std::vector new_cells; - new_cells.reserve(old_cells.size()); - for (LCT value : old_cells) { - RCT conv(value); - new_cells.push_back(conv); - } - original = std::make_unique>(want_type, std::move(new_cells)); + auto old_cells = cells.typify(); + auto builder = FastValueBuilderFactory::get().create_value_builder(new_type); + auto new_cells = builder->add_subspace(); + assert(old_cells.size() == new_cells.size()); + auto p = new_cells.begin(); + for (LCT value : old_cells) { + RCT conv(value); + *p++ = conv; } + return builder->build(std::move(builder)); } struct ConvertCellsSelector { template - static auto invoke() { return convert_cells; } + static auto invoke(const ValueType &new_type, TypedCells old_cells) { return convert_cells(new_type, old_cells); } }; } // namespace @@ -63,15 +64,8 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f _found_hits(), _global_filter(GlobalFilter::create()) { - auto lct = _query_tensor->cells().type; - auto rct = _attr_tensor.getTensorType().cell_type(); - if (rct == vespalib::eval::CellType::FLOAT || rct == vespalib::eval::CellType::DOUBLE) { - // avoid downcasting to bfloat16 etc, that is just extra work - using MyTypify = vespalib::eval::TypifyCellType; - auto fixup_fun = vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(lct, rct); - fixup_fun(_query_tensor, _attr_tensor.getTensorType()); - } - _fallback_dist_fun = search::tensor::make_distance_function(_attr_tensor.distance_metric(), rct); + CellType attr_ct = _attr_tensor.getTensorType().cell_type(); + _fallback_dist_fun = search::tensor::make_distance_function(_attr_tensor.distance_metric(), attr_ct); _dist_fun = _fallback_dist_fun.get(); assert(_dist_fun); auto nns_index = _attr_tensor.nearest_neighbor_index(); @@ -79,6 +73,14 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f _dist_fun = nns_index->distance_function(); assert(_dist_fun); } + auto query_ct = _query_tensor->cells().type; + CellType want_ct = _dist_fun->expected_cell_type(); + if (query_ct != want_ct) { + ValueType new_type = ValueType::make_type(want_ct, _query_tensor->type().dimensions()); + using MyTypify = vespalib::eval::TypifyCellType; + TypedCells old_cells = _query_tensor->cells(); + _query_tensor = vespalib::typify_invoke<2,MyTypify,ConvertCellsSelector>(query_ct, want_ct, new_type, old_cells); + } if (distance_threshold < std::numeric_limits::max()) { _distance_threshold = _dist_fun->convert_threshold(distance_threshold); _distance_heap.set_distance_threshold(_distance_threshold); diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h index c480ba2879e..2c8643e7747 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h @@ -14,7 +14,7 @@ namespace search::tensor { */ class AngularDistance : public DistanceFunction { public: - AngularDistance() {} + AngularDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; double convert_threshold(double threshold) const override { double cosine_similarity = cos(threshold); @@ -46,28 +46,26 @@ template class AngularDistanceHW : public AngularDistance { public: AngularDistanceHW() - : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) + : AngularDistance(vespalib::eval::get_cell_type()), + _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) {} double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override { constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type(); - if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) { - auto lhs_vector = lhs.unsafe_typify(); - auto rhs_vector = rhs.unsafe_typify(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - auto a = &lhs_vector[0]; - auto b = &rhs_vector[0]; - double a_norm_sq = _computer.dotProduct(a, a, sz); - double b_norm_sq = _computer.dotProduct(b, b, sz); - double squared_norms = a_norm_sq * b_norm_sq; - double dot_product = _computer.dotProduct(a, b, sz); - double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0; - double cosine_similarity = dot_product / div; - double distance = 1.0 - cosine_similarity; // in range [0,2] - return distance; - } else { - return AngularDistance::calc(lhs, rhs); - } + assert(lhs.type == expected && rhs.type == expected); + auto lhs_vector = lhs.typify(); + auto rhs_vector = rhs.typify(); + size_t sz = lhs_vector.size(); + assert(sz == rhs_vector.size()); + auto a = &lhs_vector[0]; + auto b = &rhs_vector[0]; + double a_norm_sq = _computer.dotProduct(a, a, sz); + double b_norm_sq = _computer.dotProduct(b, b, sz); + double squared_norms = a_norm_sq * b_norm_sq; + double dot_product = _computer.dotProduct(a, b, sz); + double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0; + double cosine_similarity = dot_product / div; + double distance = 1.0 - cosine_similarity; // in range [0,2] + return distance; } private: const vespalib::hwaccelrated::IAccelrated & _computer; diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function.h b/searchlib/src/vespa/searchlib/tensor/distance_function.h index 08f90fec041..531d7faf2b3 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_function.h @@ -3,6 +3,7 @@ #pragma once #include +#include namespace vespalib::eval { struct TypedCells; } @@ -15,10 +16,20 @@ namespace search::tensor { * The actual implementation must know which type the vectors are. */ class DistanceFunction { +protected: + vespalib::eval::CellType expect_cell_type; public: using UP = std::unique_ptr; + + DistanceFunction(vespalib::eval::CellType expected) : expect_cell_type(expected) {} + virtual ~DistanceFunction() {} + // input (query) vectors must be converted to this cell type: + vespalib::eval::CellType expected_cell_type() const { + return expect_cell_type; + } + // calculate internal distance (comparable) virtual double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const = 0; diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp index 1d58c01fd99..8ae9441ff11 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp @@ -21,24 +21,24 @@ make_distance_function(DistanceMetric variant, CellType cell_type) switch (cell_type) { case CellType::FLOAT: return std::make_unique>(); case CellType::DOUBLE: return std::make_unique>(); - default: return std::make_unique(); + default: return std::make_unique(CellType::FLOAT); } case DistanceMetric::Angular: switch (cell_type) { case CellType::FLOAT: return std::make_unique>(); case CellType::DOUBLE: return std::make_unique>(); - default: return std::make_unique(); + default: return std::make_unique(CellType::FLOAT); } case DistanceMetric::GeoDegrees: - return std::make_unique(); + return std::make_unique(CellType::DOUBLE); case DistanceMetric::InnerProduct: switch (cell_type) { case CellType::FLOAT: return std::make_unique>(); case CellType::DOUBLE: return std::make_unique>(); - default: return std::make_unique(); + default: return std::make_unique(CellType::FLOAT); } case DistanceMetric::Hamming: - return std::make_unique(); + return std::make_unique(cell_type); } // not reached: return DistanceFunction::UP(); diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h index 6d4d982834f..380c6b2add2 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h @@ -14,7 +14,7 @@ namespace search::tensor { */ class SquaredEuclideanDistance : public DistanceFunction { public: - SquaredEuclideanDistance() {} + SquaredEuclideanDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; double calc_with_limit(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs, @@ -38,19 +38,17 @@ template class SquaredEuclideanDistanceHW : public SquaredEuclideanDistance { public: SquaredEuclideanDistanceHW() - : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) + : SquaredEuclideanDistance(vespalib::eval::get_cell_type()), + _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) {} double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override { constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type(); - if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) { - auto lhs_vector = lhs.unsafe_typify(); - auto rhs_vector = rhs.unsafe_typify(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - return _computer.squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz); - } else { - return SquaredEuclideanDistance::calc(lhs, rhs); - } + assert(lhs.type == expected && rhs.type == expected); + auto lhs_vector = lhs.typify(); + auto rhs_vector = rhs.typify(); + size_t sz = lhs_vector.size(); + assert(sz == rhs_vector.size()); + return _computer.squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz); } private: const vespalib::hwaccelrated::IAccelrated & _computer; diff --git a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h index b8b9bec50d5..7ce69ef8aae 100644 --- a/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/geo_degrees_distance.h @@ -23,7 +23,7 @@ public: static constexpr double earth_mean_radius = 6371.0088; static constexpr double degrees_to_radians = M_PI / 180.0; - GeoDegreesDistance() {} + GeoDegreesDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} // haversine function: static double hav(double angle) { double s = sin(0.5*angle); diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h index c2cd7af3863..d92671e4922 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h @@ -17,7 +17,7 @@ namespace search::tensor { */ class HammingDistance : public DistanceFunction { public: - HammingDistance() {} + HammingDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; double convert_threshold(double threshold) const override { return threshold; diff --git a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h index cb60d18c0f5..a6ecb4eb7bb 100644 --- a/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/inner_product_distance.h @@ -15,7 +15,7 @@ namespace search::tensor { */ class InnerProductDistance : public DistanceFunction { public: - InnerProductDistance() {} + InnerProductDistance(vespalib::eval::CellType expected) : DistanceFunction(expected) {} double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override; double convert_threshold(double threshold) const override { return threshold; @@ -42,20 +42,18 @@ template class InnerProductDistanceHW : public InnerProductDistance { public: InnerProductDistanceHW() - : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) + : InnerProductDistance(vespalib::eval::get_cell_type()), + _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()) {} double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override { constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type(); - if (__builtin_expect((lhs.type == expected && rhs.type == expected), true)) { - auto lhs_vector = lhs.unsafe_typify(); - auto rhs_vector = rhs.unsafe_typify(); - size_t sz = lhs_vector.size(); - assert(sz == rhs_vector.size()); - double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz); - return std::max(0.0, score); - } else { - return InnerProductDistance::calc(lhs, rhs); - } + assert(lhs.type == expected && rhs.type == expected); + auto lhs_vector = lhs.typify(); + auto rhs_vector = rhs.typify(); + size_t sz = lhs_vector.size(); + assert(sz == rhs_vector.size()); + double score = 1.0 - _computer.dotProduct(&lhs_vector[0], &rhs_vector[0], sz); + return std::max(0.0, score); } private: const vespalib::hwaccelrated::IAccelrated & _computer; -- cgit v1.2.3