summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-02-20 09:19:50 +0000
committerArne Juul <arnej@verizonmedia.com>2020-02-20 09:20:48 +0000
commit4b216edf783258ae9555ee32a12bd0b7fedc5ea1 (patch)
treed0e8823b4bbea2ebec505d259e345b10ee988d10 /searchlib
parent2a79ad8137963346ce8de808ec7287cb1c11dbd6 (diff)
add search iterator using result from find_top_k
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp65
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h21
-rw-r--r--searchlib/src/vespa/searchlib/tensor/CMakeLists.txt3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h12
9 files changed, 120 insertions, 9 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index c6246bb8434..c481492f85f 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -100,8 +100,10 @@ public:
if (exp_hits.size() == k) {
std::vector<uint32_t> expected_by_docid = exp_hits;
std::sort(expected_by_docid.begin(), expected_by_docid.end());
- std::vector<uint32_t> got_by_docid = index->find_top_k(k, qv, k);
- EXPECT_EQ(expected_by_docid, got_by_docid);
+ auto got_by_docid = index->find_top_k(k, qv, k);
+ for (idx = 0; idx < k; ++idx) {
+ EXPECT_EQ(expected_by_docid[idx], got_by_docid[idx].docid);
+ }
}
}
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
index de2919443ff..0dcb0393473 100644
--- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
@@ -32,6 +32,7 @@ vespa_add_library(searchlib_queryeval OBJECT
nearest_neighbor_blueprint.cpp
nearest_neighbor_iterator.cpp
nearsearch.cpp
+ nns_index_iterator.cpp
orsearch.cpp
predicate_blueprint.cpp
predicate_search.cpp
diff --git a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp
new file mode 100644
index 00000000000..222f02d1941
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.cpp
@@ -0,0 +1,65 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "nns_index_iterator.h"
+#include <vespa/searchlib/tensor/nearest_neighbor_index.h>
+#include <cmath>
+
+using Hit = search::tensor::NearestNeighborIndex::Neighbor;
+
+namespace search::queryeval {
+
+class NeighborVectorIterator : public NnsIndexIterator
+{
+private:
+ fef::TermFieldMatchData &_tfmd;
+ const std::vector<Hit> &_hits;
+ uint32_t _idx;
+ double _last_sq_dist;
+public:
+ NeighborVectorIterator(fef::TermFieldMatchData &tfmd,
+ const std::vector<Hit> &hits)
+ : _tfmd(tfmd),
+ _hits(hits),
+ _idx(0),
+ _last_sq_dist(0.0)
+ {}
+
+ void initRange(uint32_t begin_id, uint32_t end_id) override {
+ SearchIterator::initRange(begin_id, end_id);
+ _idx = 0;
+ }
+
+ void doSeek(uint32_t docId) override {
+ while (_idx < _hits.size()) {
+ uint32_t hit_id = _hits[_idx].docid;
+ if (hit_id < docId) {
+ ++_idx;
+ } else if (hit_id < getEndId()) {
+ setDocId(hit_id);
+ _last_sq_dist = _hits[_idx].distance;
+ return;
+ } else {
+ _idx = _hits.size();
+ }
+ }
+ setAtEnd();
+ }
+
+ void doUnpack(uint32_t docId) override {
+ _tfmd.setRawScore(docId, sqrt(_last_sq_dist));
+ }
+
+ Trinary is_strict() const override { return Trinary::True; }
+};
+
+std::unique_ptr<NnsIndexIterator>
+NnsIndexIterator::create(
+ bool strict,
+ fef::TermFieldMatchData &tfmd,
+ const std::vector<Hit> &hits)
+{
+ assert(strict);
+ return std::make_unique<NeighborVectorIterator>(tfmd, hits);
+}
+
+} // namespace
diff --git a/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h
new file mode 100644
index 00000000000..62fa49aac46
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/queryeval/nns_index_iterator.h
@@ -0,0 +1,21 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "searchiterator.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/tensor/nearest_neighbor_index.h>
+
+namespace search::queryeval {
+
+class NnsIndexIterator : public SearchIterator
+{
+public:
+ using Hit = search::tensor::NearestNeighborIndex::Neighbor;
+ static std::unique_ptr<NnsIndexIterator> create(
+ bool strict,
+ fef::TermFieldMatchData &tfmd,
+ const std::vector<Hit> &hits);
+};
+
+} // namespace
diff --git a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
index 9175168248c..7b8fbff4681 100644
--- a/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/tensor/CMakeLists.txt
@@ -5,12 +5,13 @@ vespa_add_library(searchlib_tensor OBJECT
dense_tensor_attribute_saver.cpp
dense_tensor_store.cpp
generic_tensor_attribute.cpp
+ generic_tensor_attribute_saver.cpp
generic_tensor_store.cpp
hnsw_index.cpp
imported_tensor_attribute_vector.cpp
imported_tensor_attribute_vector_read_guard.cpp
+ nearest_neighbor_index.cpp
tensor_attribute.cpp
- generic_tensor_attribute_saver.cpp
tensor_store.cpp
DEPENDS
)
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index be53b758841..dd4b6483290 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -310,19 +310,27 @@ HnswIndex::remove_document(uint32_t docid)
_node_refs[docid].store_release(invalid);
}
-std::vector<uint32_t>
+struct NeighborsByDocId {
+ bool operator() (const NearestNeighborIndex::Neighbor &lhs,
+ const NearestNeighborIndex::Neighbor &rhs)
+ {
+ return (lhs.docid < rhs.docid);
+ }
+};
+
+std::vector<NearestNeighborIndex::Neighbor>
HnswIndex::find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k)
{
- std::vector<uint32_t> result;
+ std::vector<Neighbor> result;
FurthestPriQ candidates = top_k_candidates(vector, std::max(k, explore_k));
while (candidates.size() > k) {
candidates.pop();
}
result.reserve(candidates.size());
for (const HnswCandidate & hit : candidates.peek()) {
- result.emplace_back(hit.docid);
+ result.emplace_back(hit.docid, hit.distance);
}
- std::sort(result.begin(), result.end());
+ std::sort(result.begin(), result.end(), NeighborsByDocId());
return result;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 814148072ca..af6f3b0e6a6 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -134,7 +134,7 @@ public:
void add_document(uint32_t docid) override;
void remove_document(uint32_t docid) override;
- std::vector<uint32_t> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) override;
+ std::vector<Neighbor> find_top_k(uint32_t k, TypedCells vector, uint32_t explore_k) override;
FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k);
// TODO: Add support for generation handling and cleanup (transfer_hold_lists, trim_hold_lists)
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.cpp b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.cpp
new file mode 100644
index 00000000000..f31230af381
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.cpp
@@ -0,0 +1,3 @@
+// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "nearest_neighbor_index.h"
diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
index 2ae322fe76e..57e8beff84e 100644
--- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index.h
@@ -13,10 +13,20 @@ namespace search::tensor {
*/
class NearestNeighborIndex {
public:
+ struct Neighbor {
+ uint32_t docid;
+ double distance;
+ Neighbor(uint32_t id, double dist)
+ : docid(id), distance(dist)
+ {}
+ Neighbor() : docid(0), distance(0.0) {}
+ };
virtual ~NearestNeighborIndex() {}
virtual void add_document(uint32_t docid) = 0;
virtual void remove_document(uint32_t docid) = 0;
- virtual std::vector<uint32_t> find_top_k(uint32_t k, vespalib::tensor::TypedCells vector, uint32_t explore_k) = 0;
+ virtual std::vector<Neighbor> find_top_k(uint32_t k,
+ vespalib::tensor::TypedCells vector,
+ uint32_t explore_k) = 0;
};
}