diff options
Diffstat (limited to 'searchlib')
10 files changed, 87 insertions, 29 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 841d7f92b62..6ca7d298ee2 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -25,6 +25,7 @@ #include <vespa/vespalib/testkit/test_kit.h> #include <vespa/vespalib/util/mmap_file_allocator_factory.h> #include <vespa/searchlib/util/bufferwriter.h> +#include <vespa/vespalib/util/fake_doom.h> #include <vespa/vespalib/util/threadstackexecutor.h> #include <vespa/document/base/exceptions.h> #include <vespa/eval/eval/fast_value.h> @@ -301,23 +302,27 @@ public: std::vector<Neighbor> find_top_k(uint32_t k, const search::tensor::BoundDistanceFunction &df, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const override { (void) k; (void) df; (void) explore_k; + (void) doom; (void) distance_threshold; return std::vector<Neighbor>(); } std::vector<Neighbor> find_top_k_with_filter(uint32_t k, const search::tensor::BoundDistanceFunction &df, const GlobalFilter& filter, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const override { (void) k; (void) df; (void) explore_k; (void) filter; + (void) doom; (void) distance_threshold; return std::vector<Neighbor>(); } @@ -1291,10 +1296,12 @@ template <typename ParentT> class NearestNeighborBlueprintFixtureBase : public ParentT { private: std::unique_ptr<Value> _query_tensor; + vespalib::FakeDoom _no_doom; public: NearestNeighborBlueprintFixtureBase() - : _query_tensor() + : _query_tensor(), + _no_doom() { this->set_tensor(1, vec_2d(1, 1)); this->set_tensor(2, vec_2d(2, 2)); @@ -1321,7 +1328,7 @@ public: create_query_tensor(vec_2d(17, 42))), 3, approximate, 5, 100100.25, - global_filter_lower_limit, 1.0); + global_filter_lower_limit, 1.0, _no_doom.get_doom()); EXPECT_EQUAL(11u, bp->getState().estimate().estHits); EXPECT_EQUAL(100100.25 * 100100.25, bp->get_distance_threshold()); return bp; diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 4d759132114..f59c16c76f9 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -17,6 +17,7 @@ #include <vespa/vespalib/datastore/compaction_spec.h> #include <vespa/vespalib/datastore/compaction_strategy.h> #include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/util/fake_doom.h> #include <vespa/vespalib/util/generationhandler.h> #include <vespa/vespalib/data/slime/slime.h> #include <type_traits> @@ -90,13 +91,15 @@ public: LevelGenerator* level_generator; GenerationHandler gen_handler; std::unique_ptr<IndexType> index; + std::unique_ptr<vespalib::FakeDoom> _doom; HnswIndexTest() : vectors(), global_filter(GlobalFilter::create()), level_generator(), gen_handler(), - index() + index(), + _doom(std::make_unique<vespalib::FakeDoom>()) { vectors.set(1, {2, 2}).set(2, {3, 2}).set(3, {2, 3}) .set(4, {1, 2}).set(5, {8, 3}).set(6, {7, 2}) @@ -173,8 +176,8 @@ public: vespalib::eval::TypedCells qv_cells(qv_ref); auto df = index->distance_function_factory().for_query_vector(qv_cells); auto got_by_docid = (global_filter->is_active()) ? - index->find_top_k_with_filter(k, *df, *global_filter, explore_k, 10000.0) : - index->find_top_k(k, *df, explore_k, 10000.0); + index->find_top_k_with_filter(k, *df, *global_filter, explore_k, _doom->get_doom(), 10000.0) : + index->find_top_k(k, *df, explore_k, _doom->get_doom(), 10000.0); std::vector<uint32_t> act; act.reserve(got_by_docid.size()); for (auto& hit : got_by_docid) { @@ -186,7 +189,7 @@ public: uint32_t k = 3; auto qv = vectors.get_vector(docid, 0); auto df = index->distance_function_factory().for_query_vector(qv); - auto rv = index->top_k_candidates(*df, k, global_filter->ptr_if_active()).peek(); + auto rv = index->top_k_candidates(*df, k, global_filter->ptr_if_active(), _doom->get_doom()).peek(); std::sort(rv.begin(), rv.end(), LesserDistance()); size_t idx = 0; for (const auto & hit : rv) { @@ -197,25 +200,27 @@ public: if (exp_hits.size() == k) { std::vector<uint32_t> expected_by_docid = exp_hits; std::sort(expected_by_docid.begin(), expected_by_docid.end()); - auto got_by_docid = index->find_top_k(k, *df, k, 100100.25); + auto got_by_docid = index->find_top_k(k, *df, k, _doom->get_doom(), 100100.25); for (idx = 0; idx < k; ++idx) { EXPECT_EQ(expected_by_docid[idx], got_by_docid[idx].docid); } } - check_with_distance_threshold(docid); + if (!exp_hits.empty()) { + check_with_distance_threshold(docid); + } } void check_with_distance_threshold(uint32_t docid) { auto qv = vectors.get_vector(docid, 0); auto df = index->distance_function_factory().for_query_vector(qv); uint32_t k = 3; - auto rv = index->top_k_candidates(*df, k, global_filter->ptr_if_active()).peek(); + auto rv = index->top_k_candidates(*df, k, global_filter->ptr_if_active(), _doom->get_doom()).peek(); std::sort(rv.begin(), rv.end(), LesserDistance()); EXPECT_EQ(rv.size(), 3); EXPECT_LE(rv[0].distance, rv[1].distance); double thr = (rv[0].distance + rv[1].distance) * 0.5; auto got_by_docid = (global_filter->is_active()) - ? index->find_top_k_with_filter(k, *df, *global_filter, k, thr) - : index->find_top_k(k, *df, k, thr); + ? index->find_top_k_with_filter(k, *df, *global_filter, k, _doom->get_doom(), thr) + : index->find_top_k(k, *df, k, _doom->get_doom(), thr); EXPECT_EQ(got_by_docid.size(), 1); EXPECT_EQ(got_by_docid[0].docid, index->get_docid(rv[0].nodeid)); for (const auto & hit : got_by_docid) { @@ -262,6 +267,12 @@ public: HnswIndexLoader<VectorBufferReader, IndexType::index_type> loader(graph, id_mapping, std::make_unique<VectorBufferReader>(data)); while (loader.load_next()) {} } + void reset_doom() { + _doom = std::make_unique<vespalib::FakeDoom>(); + } + void reset_doom(vespalib::steady_time::duration time_to_doom) { + _doom = std::make_unique<vespalib::FakeDoom>(time_to_doom); + } static constexpr bool is_single = std::is_same_v<IndexType, HnswIndex<HnswIndexType::SINGLE>>; }; @@ -334,6 +345,8 @@ TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_in_level_0_graph_with_simple_selec this->expect_top_3(7, {3, 2}); this->expect_top_3(8, {4, 3}); this->expect_top_3(9, {3, 2}); + this->reset_doom(-1s); + this->expect_top_3(2, {}); } TYPED_TEST(HnswIndexTest, 2d_vectors_inserted_and_removed) @@ -824,6 +837,10 @@ TEST_F(HnswMultiIndexTest, duplicate_docid_is_removed) this->expect_top_3_by_docid("{2, 0}", {2, 0}, {1, 2, 4}); this->expect_top_3_by_docid("{2, 1}", {2, 1}, {2, 3, 4}); this->expect_top_3_by_docid("{2, 2}", {2, 2}, {1, 3, 4}); + this->reset_doom(-1s); // 1s beyond doom => no hits + this->expect_top_3_by_docid("{2, 2}", {2, 2}, {}); + this->reset_doom(); + this->expect_top_3_by_docid("{2, 2}", {2, 2}, {1, 3, 4}); auto filter = std::make_shared<MyGlobalFilter>(GlobalFilter::create({1, 2}, 3)); global_filter = filter; this->expect_top_3_by_docid("{2,2}", {2, 2}, {1, 2}); diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 7a622030d98..62f76b5cee0 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -835,7 +835,8 @@ public: n.get_explore_additional_hits(), n.get_distance_threshold(), getRequestContext().get_attribute_blueprint_params().global_filter_lower_limit, - getRequestContext().get_attribute_blueprint_params().global_filter_upper_limit)); + getRequestContext().get_attribute_blueprint_params().global_filter_upper_limit, + getRequestContext().getDoom())); } catch (const vespalib::IllegalArgumentException& ex) { return fail_nearest_neighbor_term(n, ex.getMessage()); diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index 7c307a1e35f..87ddb8b6edc 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -14,6 +14,8 @@ LOG_SETUP(".searchlib.queryeval.nearest_neighbor_blueprint"); using vespalib::eval::Value; +namespace vespalib { class Doom; } + namespace search::queryeval { namespace { @@ -40,7 +42,8 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f uint32_t explore_additional_hits, double distance_threshold, double global_filter_lower_limit, - double global_filter_upper_limit) + double global_filter_upper_limit, + const vespalib::Doom& doom) : ComplexLeafBlueprint(field), _distance_calc(std::move(distance_calc)), _attr_tensor(_distance_calc->attribute_tensor()), @@ -58,7 +61,8 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f _global_filter(GlobalFilter::create()), _global_filter_set(false), _global_filter_hits(), - _global_filter_hit_ratio() + _global_filter_hit_ratio(), + _doom(doom) { if (distance_threshold < std::numeric_limits<double>::max()) { _distance_threshold = _distance_calc->function().convert_threshold(distance_threshold); @@ -109,10 +113,10 @@ NearestNeighborBlueprint::perform_top_k(const search::tensor::NearestNeighborInd uint32_t k = _adjusted_target_hits; const auto &df = _distance_calc->function(); if (_global_filter->is_active()) { - _found_hits = nns_index->find_top_k_with_filter(k, df, *_global_filter, k + _explore_additional_hits, _distance_threshold); + _found_hits = nns_index->find_top_k_with_filter(k, df, *_global_filter, k + _explore_additional_hits, _doom, _distance_threshold); _algorithm = Algorithm::INDEX_TOP_K_WITH_FILTER; } else { - _found_hits = nns_index->find_top_k(k, df, k + _explore_additional_hits, _distance_threshold); + _found_hits = nns_index->find_top_k(k, df, k + _explore_additional_hits, _doom, _distance_threshold); _algorithm = Algorithm::INDEX_TOP_K; } } diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h index 3defb34cffd..f88cdd5adb1 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h @@ -45,6 +45,7 @@ private: bool _global_filter_set; std::optional<uint32_t> _global_filter_hits; std::optional<double> _global_filter_hit_ratio; + const vespalib::Doom& _doom; void perform_top_k(const search::tensor::NearestNeighborIndex* nns_index); public: @@ -53,7 +54,8 @@ public: uint32_t target_hits, bool approximate, uint32_t explore_additional_hits, double distance_threshold, double global_filter_lower_limit, - double global_filter_upper_limit); + double global_filter_upper_limit, + const vespalib::Doom& doom); NearestNeighborBlueprint(const NearestNeighborBlueprint&) = delete; NearestNeighborBlueprint& operator=(const NearestNeighborBlueprint&) = delete; ~NearestNeighborBlueprint(); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index d4b8c753431..748a747d515 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -18,6 +18,7 @@ #include <vespa/vespalib/data/slime/inserter.h> #include <vespa/vespalib/datastore/array_store.hpp> #include <vespa/vespalib/datastore/compaction_strategy.h> +#include <vespa/vespalib/util/doom.h> #include <vespa/vespalib/util/memory_allocator.h> #include <vespa/vespalib/util/size_literals.h> #include <vespa/vespalib/util/time.h> @@ -373,12 +374,19 @@ HnswIndex<type>::search_layer_helper( const BoundDistanceFunction &df, uint32_t neighbors_to_find, BestNeighbors& best_neighbors, uint32_t level, const GlobalFilter *filter, - uint32_t nodeid_limit, uint32_t estimated_visited_nodes) const + uint32_t nodeid_limit, const vespalib::Doom* const doom, + uint32_t estimated_visited_nodes) const { NearestPriQ candidates; GlobalFilterWrapper<type> filter_wrapper(filter); filter_wrapper.clamp_nodeid_limit(nodeid_limit); VisitedTracker visited(nodeid_limit, estimated_visited_nodes); + if (doom != nullptr && doom->soft_doom()) { + while (!best_neighbors.empty()) { + best_neighbors.pop(); + } + return; + } for (const auto &entry : best_neighbors.peek()) { if (entry.nodeid >= nodeid_limit) { continue; @@ -423,6 +431,9 @@ HnswIndex<type>::search_layer_helper( } } } + if (doom != nullptr && doom->soft_doom()) { + break; + } } } @@ -432,14 +443,15 @@ void HnswIndex<type>::search_layer( const BoundDistanceFunction &df, uint32_t neighbors_to_find, - BestNeighbors& best_neighbors, uint32_t level, const GlobalFilter *filter) const + BestNeighbors& best_neighbors, uint32_t level, + const vespalib::Doom* const doom, const GlobalFilter *filter) const { uint32_t nodeid_limit = _graph.nodes_size.load(std::memory_order_acquire); uint32_t estimated_visited_nodes = estimate_visited_nodes(level, nodeid_limit, neighbors_to_find, filter); if (estimated_visited_nodes >= nodeid_limit / 128) { - search_layer_helper<BitVectorVisitedTracker>(df, neighbors_to_find, best_neighbors, level, filter, nodeid_limit, estimated_visited_nodes); + search_layer_helper<BitVectorVisitedTracker>(df, neighbors_to_find, best_neighbors, level, filter, nodeid_limit, doom, estimated_visited_nodes); } else { - search_layer_helper<HashSetVisitedTracker>(df, neighbors_to_find, best_neighbors, level, filter, nodeid_limit, estimated_visited_nodes); + search_layer_helper<HashSetVisitedTracker>(df, neighbors_to_find, best_neighbors, level, filter, nodeid_limit, doom, estimated_visited_nodes); } } @@ -522,7 +534,7 @@ HnswIndex<type>::internal_prepare_add_node(PreparedAddDoc& op, TypedCells input_ search_level = std::min(node_max_level, search_level); // Find neighbors of the added document in each level it should exist in. while (search_level >= 0) { - search_layer(*df, _cfg.neighbors_to_explore_at_construction(), best_neighbors, search_level); + search_layer(*df, _cfg.neighbors_to_explore_at_construction(), best_neighbors, search_level, nullptr); auto neighbors = select_neighbors(best_neighbors.peek(), _cfg.max_links_on_inserts()); auto& links = connections[search_level]; links.reserve(neighbors.used.size()); @@ -887,9 +899,10 @@ HnswIndex<type>::top_k_by_docid( uint32_t k, const BoundDistanceFunction &df, const GlobalFilter *filter, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const { - SearchBestNeighbors candidates = top_k_candidates(df, std::max(k, explore_k), filter); + SearchBestNeighbors candidates = top_k_candidates(df, std::max(k, explore_k), filter, doom); auto result = candidates.get_neighbors(k, distance_threshold); std::sort(result.begin(), result.end(), NeighborsByDocId()); return result; @@ -901,9 +914,10 @@ HnswIndex<type>::find_top_k( uint32_t k, const BoundDistanceFunction &df, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const { - return top_k_by_docid(k, df, nullptr, explore_k, distance_threshold); + return top_k_by_docid(k, df, nullptr, explore_k, doom, distance_threshold); } template <HnswIndexType type> @@ -912,16 +926,18 @@ HnswIndex<type>::find_top_k_with_filter( uint32_t k, const BoundDistanceFunction &df, const GlobalFilter &filter, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const { - return top_k_by_docid(k, df, &filter, explore_k, distance_threshold); + return top_k_by_docid(k, df, &filter, explore_k, doom, distance_threshold); } template <HnswIndexType type> typename HnswIndex<type>::SearchBestNeighbors HnswIndex<type>::top_k_candidates( const BoundDistanceFunction &df, - uint32_t k, const GlobalFilter *filter) const + uint32_t k, const GlobalFilter *filter, + const vespalib::Doom& doom) const { SearchBestNeighbors best_neighbors; auto entry = _graph.get_entry_node(); @@ -939,7 +955,7 @@ HnswIndex<type>::top_k_candidates( --search_level; } best_neighbors.push(entry_point); - search_layer(df, k, best_neighbors, 0, filter); + search_layer(df, k, best_neighbors, 0, &doom, filter); return best_neighbors; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 20ab0dbea92..1ea8d1be558 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -170,12 +170,15 @@ protected: void search_layer_helper(const BoundDistanceFunction &df, uint32_t neighbors_to_find, BestNeighbors& best_neighbors, uint32_t level, const GlobalFilter *filter, uint32_t nodeid_limit, + const vespalib::Doom* const doom, uint32_t estimated_visited_nodes) const; template <class BestNeighbors> void search_layer(const BoundDistanceFunction &df, uint32_t neighbors_to_find, BestNeighbors& best_neighbors, - uint32_t level, const GlobalFilter *filter = nullptr) const; + uint32_t level, const vespalib::Doom* const doom, + const GlobalFilter *filter = nullptr) const; std::vector<Neighbor> top_k_by_docid(uint32_t k, const BoundDistanceFunction &df, const GlobalFilter *filter, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const; internal::PreparedAddDoc internal_prepare_add(uint32_t docid, VectorBundle input_vectors, @@ -217,19 +220,22 @@ public: uint32_t k, const BoundDistanceFunction &df, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const override; std::vector<Neighbor> find_top_k_with_filter( uint32_t k, const BoundDistanceFunction &df, const GlobalFilter &filter, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const override; DistanceFunctionFactory &distance_function_factory() const override { return *_distance_ff; } SearchBestNeighbors top_k_candidates( const BoundDistanceFunction &df, - uint32_t k, const GlobalFilter *filter) const; + uint32_t k, const GlobalFilter *filter, + const vespalib::Doom& doom) const; uint32_t get_entry_nodeid() const { return _graph.get_entry_node().nodeid; } int32_t get_entry_level() const { return _graph.get_entry_node().level; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_multi_best_neighbors.h b/searchlib/src/vespa/searchlib/tensor/hnsw_multi_best_neighbors.h index de707999f11..67eff4b33c7 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_multi_best_neighbors.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_multi_best_neighbors.h @@ -57,6 +57,7 @@ public: _candidates.pop(); } const HnswCandidateVector& peek() const { return _candidates.peek(); } + bool empty() const { return _candidates.empty(); } const HnswCandidate& top() const { return _candidates.top(); } size_t size() const { return _docids.size(); } void emplace(uint32_t nodeid, uint32_t docid, EntryRef ref, double distance) { diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_single_best_neighbors.h b/searchlib/src/vespa/searchlib/tensor/hnsw_single_best_neighbors.h index e7c0a7fded6..acb14f79b7a 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_single_best_neighbors.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_single_best_neighbors.h @@ -25,6 +25,7 @@ public: void push(const HnswCandidate& candidate) { _candidates.push(candidate); } void pop() { _candidates.pop(); } const HnswCandidateVector& peek() const { return _candidates.peek(); } + bool empty() const { return _candidates.empty(); } const HnswCandidate& top() const { return _candidates.top(); } size_t size() const { return _candidates.size(); } void emplace(uint32_t nodeid, uint32_t docid, EntryRef ref, double distance) { _candidates.emplace(nodeid, docid, ref, distance); } diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index 9cd1065a356..a11be086697 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -14,6 +14,7 @@ class FastOS_FileInterface; +namespace vespalib { class Doom; } namespace vespalib { class GenericHeader; } namespace vespalib::datastore { class CompactionSpec; @@ -101,6 +102,7 @@ public: virtual std::vector<Neighbor> find_top_k(uint32_t k, const BoundDistanceFunction &df, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const = 0; // only return neighbors where the corresponding filter bit is set @@ -108,6 +110,7 @@ public: const BoundDistanceFunction &df, const GlobalFilter &filter, uint32_t explore_k, + const vespalib::Doom& doom, double distance_threshold) const = 0; virtual DistanceFunctionFactory &distance_function_factory() const = 0; |