diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-02-20 13:42:40 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-02-20 14:25:24 +0000 |
commit | cb8d51a519fe5328e22e9b081d8915286ee63482 (patch) | |
tree | 9ab41675cd4601a93315d169d4e6426ce6615394 /searchlib | |
parent | 3ab530c774be833992bbec327dfd43a5ee7fa33a (diff) |
use NearestNeighborIndex when available
* update find_top_k API in NearestNeighborIndex to return
vector with both docids and distances.
* constify find_top_k
* when possible, call find_top_k from NearestNeighborBlueprint,
and use result from that call in iterator.
Diffstat (limited to 'searchlib')
7 files changed, 45 insertions, 14 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 644230cb340..5089743a54a 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -119,11 +119,11 @@ public: auto vector = _vectors.get_vector(docid).typify<double>(); _removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end())); } - std::vector<uint32_t> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) override { + std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const override { (void) k; (void) vector; (void) explore_k; - return std::vector<uint32_t>(); + return std::vector<Neighbor>(); } }; diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp index 8be6263221a..9a37c24aa7d 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp @@ -3,6 +3,7 @@ #include "emptysearch.h" #include "nearest_neighbor_blueprint.h" #include "nearest_neighbor_iterator.h" +#include "nns_index_iterator.h" #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/eval/tensor/dense/dense_tensor_view.h> #include <vespa/searchlib/tensor/dense_tensor_attribute.h> @@ -17,20 +18,46 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f _attr_tensor(attr_tensor), _query_tensor(std::move(query_tensor)), _target_num_hits(target_num_hits), - _distance_heap(target_num_hits) + _distance_heap(target_num_hits), + _found_hits() { setEstimate(HitEstimate(_attr_tensor.getNumDocs(), false)); } NearestNeighborBlueprint::~NearestNeighborBlueprint() = default; +void +NearestNeighborBlueprint::perform_top_k() +{ + auto nns_index = _attr_tensor.nearest_neighbor_index(); + if (nns_index) { + auto lhs_type = _query_tensor->fast_type(); + auto rhs_type = _attr_tensor.getTensorType(); + if (lhs_type == rhs_type) { + auto lhs = _query_tensor->cellsRef(); + uint32_t k = _target_num_hits; + uint32_t explore_k = k + 100; // XXX hardcoded for now + _found_hits = nns_index->find_top_k(k, lhs, explore_k); + } + } +} + +void +NearestNeighborBlueprint::fetchPostings(const ExecuteInfo &execInfo) { + if (execInfo.isStrict()) { + perform_top_k(); + } +} + std::unique_ptr<SearchIterator> NearestNeighborBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray& tfmda, bool strict) const { assert(tfmda.size() == 1); fef::TermFieldMatchData &tfmd = *tfmda[0]; // always search in only one field + if (strict && ! _found_hits.empty()) { + return NnsIndexIterator::create(strict, tfmd, _found_hits); + } const vespalib::tensor::DenseTensorView &qT = *_query_tensor; - return NearestNeighborIterator::create(strict, tfmd, qT, _attr_tensor, _distance_heap); } diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h index 019f8e31842..ab4413c487a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h @@ -3,6 +3,7 @@ #include "blueprint.h" #include "nearest_neighbor_distance_heap.h" +#include <vespa/searchlib/tensor/nearest_neighbor_index.h> namespace vespalib::tensor { class DenseTensorView; } namespace search::tensor { class DenseTensorAttribute; } @@ -21,7 +22,9 @@ private: std::unique_ptr<vespalib::tensor::DenseTensorView> _query_tensor; uint32_t _target_num_hits; mutable NearestNeighborDistanceHeap _distance_heap; + std::vector<search::tensor::NearestNeighborIndex::Neighbor> _found_hits; + void perform_top_k(); public: NearestNeighborBlueprint(const queryeval::FieldSpec& field, const tensor::DenseTensorAttribute& attr_tensor, @@ -38,6 +41,7 @@ public: bool strict) const override; void visitMembers(vespalib::ObjectVisitor& visitor) const override; bool always_needs_unpack() const override; + void fetchPostings(const ExecuteInfo &execInfo) override; }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp index 222f02d1941..0cf1be63dab 100644 --- a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp @@ -41,7 +41,7 @@ public: } else { _idx = _hits.size(); } - } + } setAtEnd(); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 0d90e4e822a..0d308206761 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -174,7 +174,7 @@ HnswIndex::calc_distance(const TypedCells& lhs, uint32_t rhs_docid) const } HnswCandidate -HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& entry_point, uint32_t level) +HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& entry_point, uint32_t level) const { HnswCandidate nearest = entry_point; bool keep_searching = true; @@ -192,7 +192,7 @@ HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& e } void -HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level) +HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level) const { NearestPriQ candidates; // TODO: Add proper handling of visited set. @@ -319,7 +319,7 @@ struct NeighborsByDocId { }; std::vector<NearestNeighborIndex::Neighbor> -HnswIndex::find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) +HnswIndex::find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const { std::vector<Neighbor> result; FurthestPriQ candidates = top_k_candidates(vector, std::max(k, explore_k)); @@ -335,7 +335,7 @@ HnswIndex::find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) } FurthestPriQ -HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k) +HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k) const { FurthestPriQ best_neighbors; if (_entry_level < 0) { diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index ae9927be7a8..800b88923b5 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -123,8 +123,8 @@ protected: /** * Performs a greedy search in the given layer to find the candidate that is nearest the input vector. */ - HnswCandidate find_nearest_in_layer(const TypedCells& input, const HnswCandidate& entry_point, uint32_t level); - void search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors, uint32_t level); + HnswCandidate find_nearest_in_layer(const TypedCells& input, const HnswCandidate& entry_point, uint32_t level) const; + void search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors, uint32_t level) const; public: HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distance_func, @@ -135,8 +135,8 @@ public: void add_document(uint32_t docid) override; void remove_document(uint32_t docid) override; - std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) override; - FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k); + std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const override; + FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k) const; // TODO: Add support for generation handling and cleanup (transfer_hold_lists, trim_hold_lists) diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h index 57e8beff84e..f933af0147e 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h @@ -26,7 +26,7 @@ public: virtual void remove_document(uint32_t docid) = 0; virtual std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, - uint32_t explore_k) = 0; + uint32_t explore_k) const = 0; }; } |