aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-02-20 13:42:40 +0000
committerArne Juul <arnej@verizonmedia.com>2020-02-20 14:25:24 +0000
commitcb8d51a519fe5328e22e9b081d8915286ee63482 (patch)
tree9ab41675cd4601a93315d169d4e6426ce6615394
parent3ab530c774be833992bbec327dfd43a5ee7fa33a (diff)
use NearestNeighborIndex when available
* update find_top_k API in NearestNeighborIndex to return vector with both docids and distances. * constify find_top_k * when possible, call find_top_k from NearestNeighborBlueprint, and use result from that call in iterator.
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h8
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h2
7 files changed, 45 insertions, 14 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 644230cb340..5089743a54a 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -119,11 +119,11 @@ public:
auto vector = _vectors.get_vector(docid).typify<double>();
_removes.emplace_back(docid, DoubleVector(vector.begin(), vector.end()));
}
- std::vector<uint32_t> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) override {
+ std::vector<Neighbor> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) const override {
(void) k;
(void) vector;
(void) explore_k;
- return std::vector<uint32_t>();
+ return std::vector<Neighbor>();
}
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
index 8be6263221a..9a37c24aa7d 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.cpp
@@ -3,6 +3,7 @@
#include "emptysearch.h"
#include "nearest_neighbor_blueprint.h"
#include "nearest_neighbor_iterator.h"
+#include "nns_index_iterator.h"
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/eval/tensor/dense/dense_tensor_view.h>
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
@@ -17,20 +18,46 @@ NearestNeighborBlueprint::NearestNeighborBlueprint(const queryeval::FieldSpec& f
_attr_tensor(attr_tensor),
_query_tensor(std::move(query_tensor)),
_target_num_hits(target_num_hits),
- _distance_heap(target_num_hits)
+ _distance_heap(target_num_hits),
+ _found_hits()
{
setEstimate(HitEstimate(_attr_tensor.getNumDocs(), false));
}
NearestNeighborBlueprint::~NearestNeighborBlueprint() = default;
+void
+NearestNeighborBlueprint::perform_top_k()
+{
+ auto nns_index = _attr_tensor.nearest_neighbor_index();
+ if (nns_index) {
+ auto lhs_type = _query_tensor->fast_type();
+ auto rhs_type = _attr_tensor.getTensorType();
+ if (lhs_type == rhs_type) {
+ auto lhs = _query_tensor->cellsRef();
+ uint32_t k = _target_num_hits;
+ uint32_t explore_k = k + 100; // XXX hardcoded for now
+ _found_hits = nns_index->find_top_k(k, lhs, explore_k);
+ }
+ }
+}
+
+void
+NearestNeighborBlueprint::fetchPostings(const ExecuteInfo &execInfo) {
+ if (execInfo.isStrict()) {
+ perform_top_k();
+ }
+}
+
std::unique_ptr<SearchIterator>
NearestNeighborBlueprint::createLeafSearch(const search::fef::TermFieldMatchDataArray& tfmda, bool strict) const
{
assert(tfmda.size() == 1);
fef::TermFieldMatchData &tfmd = *tfmda[0]; // always search in only one field
+ if (strict && ! _found_hits.empty()) {
+ return NnsIndexIterator::create(strict, tfmd, _found_hits);
+ }
const vespalib::tensor::DenseTensorView &qT = *_query_tensor;
-
return NearestNeighborIterator::create(strict, tfmd, qT, _attr_tensor, _distance_heap);
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
index 019f8e31842..ab4413c487a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/nearest_neighbor_blueprint.h
@@ -3,6 +3,7 @@
#include "blueprint.h"
#include "nearest_neighbor_distance_heap.h"
+#include <vespa/searchlib/tensor/nearest_neighbor_index.h>
namespace vespalib::tensor { class DenseTensorView; }
namespace search::tensor { class DenseTensorAttribute; }
@@ -21,7 +22,9 @@ private:
std::unique_ptr<vespalib::tensor::DenseTensorView> _query_tensor;
uint32_t _target_num_hits;
mutable NearestNeighborDistanceHeap _distance_heap;
+ std::vector<search::tensor::NearestNeighborIndex::Neighbor> _found_hits;
+ void perform_top_k();
public:
NearestNeighborBlueprint(const queryeval::FieldSpec& field,
const tensor::DenseTensorAttribute& attr_tensor,
@@ -38,6 +41,7 @@ public:
bool strict) const override;
void visitMembers(vespalib::ObjectVisitor& visitor) const override;
bool always_needs_unpack() const override;
+ void fetchPostings(const ExecuteInfo &execInfo) override;
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp
index 222f02d1941..0cf1be63dab 100644
--- a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp
@@ -41,7 +41,7 @@ public:
} else {
_idx = _hits.size();
}
- }
+ }
setAtEnd();
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 0d90e4e822a..0d308206761 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -174,7 +174,7 @@ HnswIndex::calc_distance(const TypedCells& lhs, uint32_t rhs_docid) const
}
HnswCandidate
-HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& entry_point, uint32_t level)
+HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& entry_point, uint32_t level) const
{
HnswCandidate nearest = entry_point;
bool keep_searching = true;
@@ -192,7 +192,7 @@ HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& e
}
void
-HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level)
+HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level) const
{
NearestPriQ candidates;
// TODO: Add proper handling of visited set.
@@ -319,7 +319,7 @@ struct NeighborsByDocId {
};
std::vector<NearestNeighborIndex::Neighbor>
-HnswIndex::find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k)
+HnswIndex::find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const
{
std::vector<Neighbor> result;
FurthestPriQ candidates = top_k_candidates(vector, std::max(k, explore_k));
@@ -335,7 +335,7 @@ HnswIndex::find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k)
}
FurthestPriQ
-HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k)
+HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k) const
{
FurthestPriQ best_neighbors;
if (_entry_level < 0) {
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index ae9927be7a8..800b88923b5 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -123,8 +123,8 @@ protected:
/**
* Performs a greedy search in the given layer to find the candidate that is nearest the input vector.
*/
- HnswCandidate find_nearest_in_layer(const TypedCells& input, const HnswCandidate& entry_point, uint32_t level);
- void search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors, uint32_t level);
+ HnswCandidate find_nearest_in_layer(const TypedCells& input, const HnswCandidate& entry_point, uint32_t level) const;
+ void search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors, uint32_t level) const;
public:
HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distance_func,
@@ -135,8 +135,8 @@ public:
void add_document(uint32_t docid) override;
void remove_document(uint32_t docid) override;
- std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) override;
- FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k);
+ std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) const override;
+ FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k) const;
// TODO: Add support for generation handling and cleanup (transfer_hold_lists, trim_hold_lists)
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index 57e8beff84e..f933af0147e 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -26,7 +26,7 @@ public:
virtual void remove_document(uint32_t docid) = 0;
virtual std::vector<Neighbor> find_top_k(uint32_t k,
vespalib::tensor::TypedCells vector,
- uint32_t explore_k) = 0;
+ uint32_t explore_k) const = 0;
};
}