summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2022-10-26 13:49:08 +0200
committerGitHub <noreply@github.com>2022-10-26 13:49:08 +0200
commit5c279f0c7f0f22931be2226b2b9dfd82ce1da4e3 (patch)
tree40d2e570a57789db06778f892cb9ebc069525aff /searchlib
parent4bc54df04097788c49f31f1cfe1d0446c26d8c42 (diff)
parent9e7cd01437f96c8089a8bf242f7dc1aafd575143 (diff)
Merge pull request #24585 from vespa-engine/toregge/use-nodeid-instead-of-docid-to-identify-an-hnsw-graph-node
Use nodeid instead of docid to identify an HNSW graph node.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp20
-rw-r--r--searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.h10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hash_set_visited_tracker.h6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp42
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.h54
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp224
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h34
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp18
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h10
14 files changed, 242 insertions, 226 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 9fccad1d2d4..3a50601abbd 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -118,21 +118,21 @@ public:
CompactionStrategy compaction_strategy;
return index->update_stat(compaction_strategy);
}
- void expect_entry_point(uint32_t exp_docid, uint32_t exp_level) {
- EXPECT_EQ(exp_docid, index->get_entry_docid());
+ void expect_entry_point(uint32_t exp_nodeid, uint32_t exp_level) {
+ EXPECT_EQ(exp_nodeid, index->get_entry_nodeid());
EXPECT_EQ(exp_level, index->get_entry_level());
}
- void expect_level_0(uint32_t docid, const HnswNode::LinkArray& exp_links) {
- auto node = index->get_node(docid);
+ void expect_level_0(uint32_t nodeid, const HnswNode::LinkArray& exp_links) {
+ auto node = index->get_node(nodeid);
ASSERT_EQ(1, node.size());
EXPECT_EQ(exp_links, node.level(0));
}
- void expect_empty_level_0(uint32_t docid) {
- auto node = index->get_node(docid);
+ void expect_empty_level_0(uint32_t nodeid) {
+ auto node = index->get_node(nodeid);
EXPECT_TRUE(node.empty());
}
- void expect_levels(uint32_t docid, const HnswNode::LevelArray& exp_levels) {
- auto act_node = index->get_node(docid);
+ void expect_levels(uint32_t nodeid, const HnswNode::LevelArray& exp_levels) {
+ auto act_node = index->get_node(nodeid);
ASSERT_EQ(exp_levels.size(), act_node.size());
EXPECT_EQ(exp_levels, act_node.levels());
}
@@ -144,7 +144,7 @@ public:
size_t idx = 0;
for (const auto & hit : rv) {
if (idx < exp_hits.size()) {
- EXPECT_EQ(hit.docid, exp_hits[idx++]);
+ EXPECT_EQ(index->get_docid(hit.nodeid), exp_hits[idx++]);
}
}
if (exp_hits.size() == k) {
@@ -169,7 +169,7 @@ public:
? index->find_top_k_with_filter(k, qv, *global_filter, k, thr)
: index->find_top_k(k, qv, k, thr);
EXPECT_EQ(got_by_docid.size(), 1);
- EXPECT_EQ(got_by_docid[0].docid, rv[0].docid);
+ EXPECT_EQ(got_by_docid[0].docid, index->get_docid(rv[0].nodeid));
for (const auto & hit : got_by_docid) {
LOG(debug, "from docid=%u found docid=%u dist=%g (threshold %g)\n",
docid, hit.docid, hit.distance, thr);
diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
index add7801e03a..ea22aaabaff 100644
--- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
@@ -52,12 +52,12 @@ using V = std::vector<uint32_t>;
void populate(HnswGraph &graph) {
// no 0
- graph.make_node_for_document(1, 1);
- auto er = graph.make_node_for_document(2, 2);
+ graph.make_node(1, 1);
+ auto er = graph.make_node(2, 2);
// no 3
- graph.make_node_for_document(4, 2);
- graph.make_node_for_document(5, 0);
- graph.make_node_for_document(6, 1);
+ graph.make_node(4, 2);
+ graph.make_node(5, 0);
+ graph.make_node(6, 1);
graph.set_link_array(1, 0, V{2, 4, 6});
graph.set_link_array(2, 0, V{1, 4, 6});
@@ -69,9 +69,9 @@ void populate(HnswGraph &graph) {
}
void modify(HnswGraph &graph) {
- graph.remove_node_for_document(2);
- graph.remove_node_for_document(6);
- graph.make_node_for_document(7, 2);
+ graph.remove_node(2);
+ graph.remove_node(6);
+ graph.make_node(7, 2);
graph.set_link_array(1, 0, V{7, 4});
graph.set_link_array(4, 0, V{7, 2});
@@ -88,24 +88,24 @@ public:
HnswGraph original;
HnswGraph copy;
- void expect_empty_d(uint32_t docid) const {
- EXPECT_FALSE(copy.acquire_node_ref(docid).valid());
+ void expect_empty_d(uint32_t nodeid) const {
+ EXPECT_FALSE(copy.acquire_node_ref(nodeid).valid());
}
- void expect_level_0(uint32_t docid, const V& exp_links) const {
- auto levels = copy.acquire_level_array(docid);
+ void expect_level_0(uint32_t nodeid, const V& exp_links) const {
+ auto levels = copy.acquire_level_array(nodeid);
EXPECT_GE(levels.size(), 1);
- auto links = copy.acquire_link_array(docid, 0);
+ auto links = copy.acquire_link_array(nodeid, 0);
EXPECT_EQ(exp_links.size(), links.size());
for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) {
EXPECT_EQ(exp_links[i], links[i]);
}
}
- void expect_level_1(uint32_t docid, const V& exp_links) const {
- auto levels = copy.acquire_level_array(docid);
+ void expect_level_1(uint32_t nodeid, const V& exp_links) const {
+ auto levels = copy.acquire_level_array(nodeid);
EXPECT_EQ(2, levels.size());
- auto links = copy.acquire_link_array(docid, 1);
+ auto links = copy.acquire_link_array(nodeid, 1);
EXPECT_EQ(exp_links.size(), links.size());
for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) {
EXPECT_EQ(exp_links[i], links[i]);
@@ -126,7 +126,7 @@ public:
void expect_copy_as_populated() const {
EXPECT_EQ(copy.size(), 7);
auto entry = copy.get_entry_node();
- EXPECT_EQ(entry.docid, 2);
+ EXPECT_EQ(entry.nodeid, 2);
EXPECT_EQ(entry.level, 1);
expect_empty_d(0);
diff --git a/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.cpp b/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.cpp
index 524cc2b9cd5..e456e87dc96 100644
--- a/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.cpp
@@ -4,8 +4,8 @@
namespace search::tensor {
-BitVectorVisitedTracker::BitVectorVisitedTracker(const HnswIndex&, uint32_t doc_id_limit, uint32_t)
- : _visited(doc_id_limit)
+BitVectorVisitedTracker::BitVectorVisitedTracker(const HnswIndex&, uint32_t nodeid_limit, uint32_t)
+ : _visited(nodeid_limit)
{
}
diff --git a/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.h b/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.h
index bbd49aa0482..ecdd957187f 100644
--- a/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.h
+++ b/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.h
@@ -16,14 +16,14 @@ class BitVectorVisitedTracker
{
search::AllocatedBitVector _visited;
public:
- BitVectorVisitedTracker(const HnswIndex&, uint32_t doc_id_limit, uint32_t);
+ BitVectorVisitedTracker(const HnswIndex&, uint32_t nodeid_limit, uint32_t);
~BitVectorVisitedTracker();
- void mark(uint32_t doc_id) { _visited.setBit(doc_id); }
- bool try_mark(uint32_t doc_id) {
- if (_visited.testBit(doc_id)) {
+ void mark(uint32_t nodeid) { _visited.setBit(nodeid); }
+ bool try_mark(uint32_t nodeid) {
+ if (_visited.testBit(nodeid)) {
return false;
} else {
- _visited.setBit(doc_id);
+ _visited.setBit(nodeid);
return true;
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hash_set_visited_tracker.h b/searchlib/src/vespa/searchlib/tensor/hash_set_visited_tracker.h
index c2a14235f47..30434d1029d 100644
--- a/searchlib/src/vespa/searchlib/tensor/hash_set_visited_tracker.h
+++ b/searchlib/src/vespa/searchlib/tensor/hash_set_visited_tracker.h
@@ -18,9 +18,9 @@ class HashSetVisitedTracker
public:
HashSetVisitedTracker(const HnswIndex&, uint32_t, uint32_t estimated_visited_nodes);
~HashSetVisitedTracker();
- void mark(uint32_t doc_id) { _visited.insert(doc_id); }
- bool try_mark(uint32_t doc_id) {
- return _visited.insert(doc_id).second;
+ void mark(uint32_t nodeid) { _visited.insert(nodeid); }
+ bool try_mark(uint32_t nodeid) {
+ return _visited.insert(nodeid).second;
}
};
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
index 56af2ed3b35..6d8b143cff9 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
@@ -11,7 +11,7 @@ HnswGraph::HnswGraph()
node_refs_size(1u),
nodes(HnswIndex::make_default_node_store_config(), {}),
links(HnswIndex::make_default_link_store_config(), {}),
- entry_docid_and_level()
+ entry_nodeid_and_level()
{
node_refs.ensure_size(1, AtomicEntryRef());
EntryNode entry;
@@ -21,36 +21,36 @@ HnswGraph::HnswGraph()
HnswGraph::~HnswGraph() = default;
HnswGraph::NodeRef
-HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels)
+HnswGraph::make_node(uint32_t nodeid, uint32_t num_levels)
{
- node_refs.ensure_size(docid + 1, AtomicEntryRef());
+ node_refs.ensure_size(nodeid + 1, AtomicEntryRef());
// A document cannot be added twice.
- assert(!get_node_ref(docid).valid());
+ assert(!get_node_ref(nodeid).valid());
// Note: The level array instance lives as long as the document is present in the index.
std::vector<AtomicEntryRef> levels(num_levels, AtomicEntryRef());
auto node_ref = nodes.add(levels);
- node_refs[docid].store_release(node_ref);
- if (docid >= node_refs_size.load(std::memory_order_relaxed)) {
- node_refs_size.store(docid + 1, std::memory_order_release);
+ node_refs[nodeid].store_release(node_ref);
+ if (nodeid >= node_refs_size.load(std::memory_order_relaxed)) {
+ node_refs_size.store(nodeid + 1, std::memory_order_release);
}
return node_ref;
}
void
-HnswGraph::remove_node_for_document(uint32_t docid)
+HnswGraph::remove_node(uint32_t nodeid)
{
- auto node_ref = get_node_ref(docid);
+ auto node_ref = get_node_ref(nodeid);
assert(node_ref.valid());
auto levels = nodes.get(node_ref);
vespalib::datastore::EntryRef invalid;
- node_refs[docid].store_release(invalid);
+ node_refs[nodeid].store_release(invalid);
// Ensure data referenced through the old ref can be recycled:
nodes.remove(node_ref);
for (size_t i = 0; i < levels.size(); ++i) {
auto old_links_ref = levels[i].load_relaxed();
links.remove(old_links_ref);
}
- if (docid + 1 == node_refs_size.load(std::memory_order_relaxed)) {
+ if (nodeid + 1 == node_refs_size.load(std::memory_order_relaxed)) {
trim_node_refs_size();
}
}
@@ -58,18 +58,18 @@ HnswGraph::remove_node_for_document(uint32_t docid)
void
HnswGraph::trim_node_refs_size()
{
- uint32_t check_doc_id = node_refs_size.load(std::memory_order_relaxed) - 1;
- while (check_doc_id > 0u && !get_node_ref(check_doc_id).valid()) {
- --check_doc_id;
+ uint32_t check_nodeid = node_refs_size.load(std::memory_order_relaxed) - 1;
+ while (check_nodeid > 0u && !get_node_ref(check_nodeid).valid()) {
+ --check_nodeid;
}
- node_refs_size.store(check_doc_id + 1, std::memory_order_release);
+ node_refs_size.store(check_nodeid + 1, std::memory_order_release);
}
void
-HnswGraph::set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& new_links)
+HnswGraph::set_link_array(uint32_t nodeid, uint32_t level, const LinkArrayRef& new_links)
{
auto new_links_ref = links.add(new_links);
- auto node_ref = get_node_ref(docid);
+ auto node_ref = get_node_ref(nodeid);
assert(node_ref.valid());
auto levels = nodes.get_writable(node_ref);
assert(level < levels.size());
@@ -112,15 +112,15 @@ void
HnswGraph::set_entry_node(EntryNode node) {
uint64_t value = node.level;
value <<= 32;
- value |= node.docid;
+ value |= node.nodeid;
if (node.node_ref.valid()) {
assert(node.level >= 0);
- assert(node.docid > 0);
+ assert(node.nodeid > 0);
} else {
assert(node.level == -1);
- assert(node.docid == 0);
+ assert(node.nodeid == 0);
}
- entry_docid_and_level.store(value, std::memory_order_release);
+ entry_nodeid_and_level.store(value, std::memory_order_release);
}
} // namespace
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
index 9e3850bd470..60ba7d55f7f 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
@@ -43,27 +43,27 @@ struct HnswGraph {
NodeStore nodes;
LinkStore links;
- std::atomic<uint64_t> entry_docid_and_level;
+ std::atomic<uint64_t> entry_nodeid_and_level;
HnswGraph();
~HnswGraph();
- NodeRef make_node_for_document(uint32_t docid, uint32_t num_levels);
+ NodeRef make_node(uint32_t nodeid, uint32_t num_levels);
- void remove_node_for_document(uint32_t docid);
+ void remove_node(uint32_t nodeid);
void trim_node_refs_size();
- NodeRef get_node_ref(uint32_t docid) const {
- return node_refs.get_elem_ref(docid).load_relaxed(); // Called from writer only
+ NodeRef get_node_ref(uint32_t nodeid) const {
+ return node_refs.get_elem_ref(nodeid).load_relaxed(); // Called from writer only
}
- NodeRef acquire_node_ref(uint32_t docid) const {
- return node_refs.acquire_elem_ref(docid).load_acquire();
+ NodeRef acquire_node_ref(uint32_t nodeid) const {
+ return node_refs.acquire_elem_ref(nodeid).load_acquire();
}
- bool still_valid(uint32_t docid, NodeRef node_ref) const {
- return node_ref.valid() && (acquire_node_ref(docid) == node_ref);
+ bool still_valid(uint32_t nodeid, NodeRef node_ref) const {
+ return node_ref.valid() && (acquire_node_ref(nodeid) == node_ref);
}
LevelArrayRef get_level_array(NodeRef node_ref) const {
@@ -73,13 +73,13 @@ struct HnswGraph {
return LevelArrayRef();
}
- LevelArrayRef get_level_array(uint32_t docid) const {
- auto node_ref = get_node_ref(docid);
+ LevelArrayRef get_level_array(uint32_t nodeid) const {
+ auto node_ref = get_node_ref(nodeid);
return get_level_array(node_ref);
}
- LevelArrayRef acquire_level_array(uint32_t docid) const {
- auto node_ref = acquire_node_ref(docid);
+ LevelArrayRef acquire_level_array(uint32_t nodeid) const {
+ auto node_ref = acquire_node_ref(nodeid);
return get_level_array(node_ref);
}
@@ -93,13 +93,13 @@ struct HnswGraph {
return LinkArrayRef();
}
- LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const {
- auto levels = get_level_array(docid);
+ LinkArrayRef get_link_array(uint32_t nodeid, uint32_t level) const {
+ auto levels = get_level_array(nodeid);
return get_link_array(levels, level);
}
- LinkArrayRef acquire_link_array(uint32_t docid, uint32_t level) const {
- auto levels = acquire_level_array(docid);
+ LinkArrayRef acquire_link_array(uint32_t nodeid, uint32_t level) const {
+ auto levels = acquire_level_array(nodeid);
return get_link_array(levels, level);
}
@@ -108,19 +108,19 @@ struct HnswGraph {
return get_link_array(levels, level);
}
- void set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& new_links);
+ void set_link_array(uint32_t nodeid, uint32_t level, const LinkArrayRef& new_links);
struct EntryNode {
- uint32_t docid;
+ uint32_t nodeid;
NodeRef node_ref;
int32_t level;
EntryNode()
- : docid(0), // Note that docid 0 is reserved and never used
+ : nodeid(0), // Note that nodeid 0 is reserved and never used
node_ref(),
level(-1)
{}
- EntryNode(uint32_t docid_in, NodeRef node_ref_in, int32_t level_in)
- : docid(docid_in),
+ EntryNode(uint32_t nodeid_in, NodeRef node_ref_in, int32_t level_in)
+ : nodeid(nodeid_in),
node_ref(node_ref_in),
level(level_in)
{}
@@ -129,24 +129,24 @@ struct HnswGraph {
void set_entry_node(EntryNode node);
uint64_t get_entry_atomic() const {
- return entry_docid_and_level.load(std::memory_order_acquire);
+ return entry_nodeid_and_level.load(std::memory_order_acquire);
}
EntryNode get_entry_node() const {
EntryNode entry;
while (true) {
uint64_t value = get_entry_atomic();
- entry.docid = (uint32_t)value;
- entry.node_ref = acquire_node_ref(entry.docid);
+ entry.nodeid = (uint32_t)value;
+ entry.node_ref = acquire_node_ref(entry.nodeid);
entry.level = (int32_t)(value >> 32);
- if ((entry.docid == 0)
+ if ((entry.nodeid == 0)
&& (entry.level == -1)
&& ! entry.node_ref.valid())
{
// invalid in every way
return entry;
}
- if ((entry.docid > 0)
+ if ((entry.nodeid > 0)
&& (entry.level > -1)
&& entry.node_ref.valid()
&& (get_entry_atomic() == value))
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index a10ff1c48dc..89b4f62146c 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -86,7 +86,7 @@ bool
HnswIndex::have_closer_distance(HnswCandidate candidate, const HnswCandidateVector& result) const
{
for (const auto & neighbor : result) {
- double dist = calc_distance(candidate.docid, neighbor.docid);
+ double dist = calc_distance(candidate.nodeid, neighbor.nodeid);
if (dist < candidate.distance) {
return true;
}
@@ -104,7 +104,7 @@ HnswIndex::select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_
if (result.used.size() < max_links) {
result.used.push_back(candidate);
} else {
- result.unused.push_back(candidate.docid);
+ result.unused.push_back(candidate.nodeid);
}
}
return result;
@@ -122,7 +122,7 @@ HnswIndex::select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint
auto candidate = nearest.top();
nearest.pop();
if (have_closer_distance(candidate, result.used)) {
- result.unused.push_back(candidate.docid);
+ result.unused.push_back(candidate.nodeid);
continue;
}
result.used.push_back(candidate);
@@ -130,7 +130,7 @@ HnswIndex::select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint
while (!nearest.empty()) {
candidate = nearest.top();
nearest.pop();
- result.unused.push_back(candidate.docid);
+ result.unused.push_back(candidate.nodeid);
}
}
}
@@ -148,40 +148,40 @@ HnswIndex::select_neighbors(const HnswCandidateVector& neighbors, uint32_t max_l
}
void
-HnswIndex::shrink_if_needed(uint32_t docid, uint32_t level)
+HnswIndex::shrink_if_needed(uint32_t nodeid, uint32_t level)
{
- auto old_links = _graph.get_link_array(docid, level);
+ auto old_links = _graph.get_link_array(nodeid, level);
uint32_t max_links = max_links_for_level(level);
if (old_links.size() > max_links) {
HnswCandidateVector neighbors;
neighbors.reserve(old_links.size());
- for (uint32_t neighbor_docid : old_links) {
- double dist = calc_distance(docid, neighbor_docid);
- neighbors.emplace_back(neighbor_docid, dist);
+ for (uint32_t neighbor_nodeid : old_links) {
+ double dist = calc_distance(nodeid, neighbor_nodeid);
+ neighbors.emplace_back(neighbor_nodeid, dist);
}
auto split = select_neighbors(neighbors, max_links);
LinkArray new_links;
new_links.reserve(split.used.size());
for (const auto & neighbor : split.used) {
- new_links.push_back(neighbor.docid);
+ new_links.push_back(neighbor.nodeid);
}
- _graph.set_link_array(docid, level, new_links);
- for (uint32_t removed_docid : split.unused) {
- remove_link_to(removed_docid, docid, level);
+ _graph.set_link_array(nodeid, level, new_links);
+ for (uint32_t removed_nodeid : split.unused) {
+ remove_link_to(removed_nodeid, nodeid, level);
}
}
}
void
-HnswIndex::connect_new_node(uint32_t docid, const LinkArrayRef &neighbors, uint32_t level)
+HnswIndex::connect_new_node(uint32_t nodeid, const LinkArrayRef &neighbors, uint32_t level)
{
- _graph.set_link_array(docid, level, neighbors);
- for (uint32_t neighbor_docid : neighbors) {
- auto old_links = _graph.get_link_array(neighbor_docid, level);
- add_link_to(neighbor_docid, level, old_links, docid);
+ _graph.set_link_array(nodeid, level, neighbors);
+ for (uint32_t neighbor_nodeid : neighbors) {
+ auto old_links = _graph.get_link_array(neighbor_nodeid, level);
+ add_link_to(neighbor_nodeid, level, old_links, nodeid);
}
- for (uint32_t neighbor_docid : neighbors) {
- shrink_if_needed(neighbor_docid, level);
+ for (uint32_t neighbor_nodeid : neighbors) {
+ shrink_if_needed(neighbor_nodeid, level);
}
}
@@ -199,38 +199,38 @@ HnswIndex::remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t lev
double
-HnswIndex::calc_distance(uint32_t lhs_docid, uint32_t rhs_docid) const
+HnswIndex::calc_distance(uint32_t lhs_nodeid, uint32_t rhs_nodeid) const
{
- auto lhs = get_vector(lhs_docid);
- return calc_distance(lhs, rhs_docid);
+ auto lhs = get_vector(lhs_nodeid);
+ return calc_distance(lhs, rhs_nodeid);
}
double
-HnswIndex::calc_distance(const TypedCells& lhs, uint32_t rhs_docid) const
+HnswIndex::calc_distance(const TypedCells& lhs, uint32_t rhs_nodeid) const
{
- auto rhs = get_vector(rhs_docid);
+ auto rhs = get_vector(rhs_nodeid);
return _distance_func->calc(lhs, rhs);
}
uint32_t
-HnswIndex::estimate_visited_nodes(uint32_t level, uint32_t doc_id_limit, uint32_t neighbors_to_find, const GlobalFilter* filter) const
+HnswIndex::estimate_visited_nodes(uint32_t level, uint32_t nodeid_limit, uint32_t neighbors_to_find, const GlobalFilter* filter) const
{
uint32_t m_for_level = max_links_for_level(level);
uint64_t base_estimate = uint64_t(m_for_level) * neighbors_to_find + 100;
- if (base_estimate >= doc_id_limit) {
- return doc_id_limit;
+ if (base_estimate >= nodeid_limit) {
+ return nodeid_limit;
}
if (!filter) {
return base_estimate;
}
uint32_t true_bits = filter->count();
if (true_bits == 0) {
- return doc_id_limit;
+ return nodeid_limit;
}
double scaler = double(filter->size()) / true_bits;
double scaled_estimate = scaler * base_estimate;
- if (scaled_estimate >= doc_id_limit) {
- return doc_id_limit;
+ if (scaled_estimate >= nodeid_limit) {
+ return nodeid_limit;
}
return scaled_estimate;
}
@@ -242,13 +242,13 @@ HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& e
bool keep_searching = true;
while (keep_searching) {
keep_searching = false;
- for (uint32_t neighbor_docid : _graph.get_link_array(nearest.node_ref, level)) {
- auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid);
- double dist = calc_distance(input, neighbor_docid);
- if (_graph.still_valid(neighbor_docid, neighbor_ref)
+ for (uint32_t neighbor_nodeid : _graph.get_link_array(nearest.node_ref, level)) {
+ auto neighbor_ref = _graph.acquire_node_ref(neighbor_nodeid);
+ double dist = calc_distance(input, neighbor_nodeid);
+ if (_graph.still_valid(neighbor_nodeid, neighbor_ref)
&& dist < nearest.distance)
{
- nearest = HnswCandidate(neighbor_docid, neighbor_ref, dist);
+ nearest = HnswCandidate(neighbor_nodeid, neighbor_ref, dist);
keep_searching = true;
}
}
@@ -260,17 +260,17 @@ template <class VisitedTracker>
void
HnswIndex::search_layer_helper(const TypedCells& input, uint32_t neighbors_to_find,
FurthestPriQ& best_neighbors, uint32_t level, const GlobalFilter *filter,
- uint32_t doc_id_limit, uint32_t estimated_visited_nodes) const
+ uint32_t nodeid_limit, uint32_t estimated_visited_nodes) const
{
NearestPriQ candidates;
- VisitedTracker visited(*this, doc_id_limit, estimated_visited_nodes);
+ VisitedTracker visited(*this, nodeid_limit, estimated_visited_nodes);
for (const auto &entry : best_neighbors.peek()) {
- if (entry.docid >= doc_id_limit) {
+ if (entry.nodeid >= nodeid_limit) {
continue;
}
candidates.push(entry);
- visited.mark(entry.docid);
- if (filter && !filter->check(entry.docid)) {
+ visited.mark(entry.nodeid);
+ if (filter && !filter->check(entry.nodeid)) {
assert(best_neighbors.size() == 1);
best_neighbors.pop();
}
@@ -283,21 +283,21 @@ HnswIndex::search_layer_helper(const TypedCells& input, uint32_t neighbors_to_fi
break;
}
candidates.pop();
- for (uint32_t neighbor_docid : _graph.get_link_array(cand.node_ref, level)) {
- if (neighbor_docid >= doc_id_limit) {
+ for (uint32_t neighbor_nodeid : _graph.get_link_array(cand.node_ref, level)) {
+ if (neighbor_nodeid >= nodeid_limit) {
continue;
}
- auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid);
+ auto neighbor_ref = _graph.acquire_node_ref(neighbor_nodeid);
if ((! neighbor_ref.valid())
- || ! visited.try_mark(neighbor_docid))
+ || ! visited.try_mark(neighbor_nodeid))
{
continue;
}
- double dist_to_input = calc_distance(input, neighbor_docid);
+ double dist_to_input = calc_distance(input, neighbor_nodeid);
if (dist_to_input < limit_dist) {
- candidates.emplace(neighbor_docid, neighbor_ref, dist_to_input);
- if ((!filter) || filter->check(neighbor_docid)) {
- best_neighbors.emplace(neighbor_docid, neighbor_ref, dist_to_input);
+ candidates.emplace(neighbor_nodeid, neighbor_ref, dist_to_input);
+ if ((!filter) || filter->check(neighbor_nodeid)) {
+ best_neighbors.emplace(neighbor_nodeid, neighbor_ref, dist_to_input);
if (best_neighbors.size() > neighbors_to_find) {
best_neighbors.pop();
limit_dist = best_neighbors.top().distance;
@@ -312,15 +312,15 @@ void
HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find,
FurthestPriQ& best_neighbors, uint32_t level, const GlobalFilter *filter) const
{
- uint32_t doc_id_limit = _graph.node_refs_size.load(std::memory_order_acquire);
+ uint32_t nodeid_limit = _graph.node_refs_size.load(std::memory_order_acquire);
if (filter) {
- doc_id_limit = std::min(filter->size(), doc_id_limit);
+ nodeid_limit = std::min(filter->size(), nodeid_limit);
}
- uint32_t estimated_visited_nodes = estimate_visited_nodes(level, doc_id_limit, neighbors_to_find, filter);
- if (estimated_visited_nodes >= doc_id_limit / 128) {
- search_layer_helper<BitVectorVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, doc_id_limit, estimated_visited_nodes);
+ uint32_t estimated_visited_nodes = estimate_visited_nodes(level, nodeid_limit, neighbors_to_find, filter);
+ if (estimated_visited_nodes >= nodeid_limit / 128) {
+ search_layer_helper<BitVectorVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, nodeid_limit, estimated_visited_nodes);
} else {
- search_layer_helper<HashSetVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, doc_id_limit, estimated_visited_nodes);
+ search_layer_helper<HashSetVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, nodeid_limit, estimated_visited_nodes);
}
}
@@ -342,7 +342,7 @@ void
HnswIndex::add_document(uint32_t docid)
{
vespalib::GenerationHandler::Guard no_guard_needed;
- PreparedAddDoc op = internal_prepare_add(docid, get_vector(docid), no_guard_needed);
+ PreparedAddDoc op = internal_prepare_add(docid, get_vector_by_docid(docid), no_guard_needed);
internal_complete_add(docid, op);
}
@@ -353,14 +353,14 @@ HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector, vespali
int level = _level_generator->max_level();
PreparedAddDoc op(docid, level, std::move(read_guard));
auto entry = _graph.get_entry_node();
- if (entry.docid == 0) {
+ if (entry.nodeid == 0) {
// graph has no entry point
return op;
}
int search_level = entry.level;
- double entry_dist = calc_distance(input_vector, entry.docid);
- // TODO: check if entry docid/node_ref is still valid here
- HnswCandidate entry_point(entry.docid, entry.node_ref, entry_dist);
+ double entry_dist = calc_distance(input_vector, entry.nodeid);
+ // TODO: check if entry nodeid/node_ref is still valid here
+ HnswCandidate entry_point(entry.nodeid, entry.node_ref, entry_dist);
while (search_level > op.max_level) {
entry_point = find_nearest_in_layer(input_vector, entry_point, search_level);
--search_level;
@@ -378,10 +378,10 @@ HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector, vespali
for (const auto & neighbor : neighbors.used) {
auto neighbor_levels = _graph.get_level_array(neighbor.node_ref);
if (size_t(search_level) < neighbor_levels.size()) {
- op.connections[search_level].emplace_back(neighbor.docid, neighbor.node_ref);
+ op.connections[search_level].emplace_back(neighbor.nodeid, neighbor.node_ref);
} else {
LOG(warning, "in prepare_add(%u), selected neighbor %u is missing level %d (has %zu levels)",
- docid, neighbor.docid, search_level, neighbor_levels.size());
+ docid, neighbor.nodeid, search_level, neighbor_levels.size());
}
}
--search_level;
@@ -390,18 +390,18 @@ HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector, vespali
}
HnswIndex::LinkArray
-HnswIndex::filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t self_docid)
+HnswIndex::filter_valid_nodeids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t self_nodeid)
{
LinkArray valid;
valid.reserve(neighbors.size());
for (const auto & neighbor : neighbors) {
- uint32_t docid = neighbor.first;
+ uint32_t nodeid = neighbor.first;
HnswGraph::NodeRef node_ref = neighbor.second;
- if (_graph.still_valid(docid, node_ref)) {
- assert(docid != self_docid);
+ if (_graph.still_valid(nodeid, node_ref)) {
+ assert(nodeid != self_nodeid);
auto levels = _graph.get_level_array(node_ref);
if (level < levels.size()) {
- valid.push_back(docid);
+ valid.push_back(nodeid);
}
}
}
@@ -411,13 +411,14 @@ HnswIndex::filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neig
void
HnswIndex::internal_complete_add(uint32_t docid, PreparedAddDoc &op)
{
- auto node_ref = _graph.make_node_for_document(docid, op.max_level + 1);
+ auto nodeid = get_nodeid(docid);
+ auto node_ref = _graph.make_node(nodeid, op.max_level + 1);
for (int level = 0; level <= op.max_level; ++level) {
- auto neighbors = filter_valid_docids(level, op.connections[level], docid);
- connect_new_node(docid, neighbors, level);
+ auto neighbors = filter_valid_nodeids(level, op.connections[level], nodeid);
+ connect_new_node(nodeid, neighbors, level);
}
if (op.max_level > get_entry_level()) {
- _graph.set_entry_node({docid, node_ref, op.max_level});
+ _graph.set_entry_node({nodeid, node_ref, op.max_level});
}
}
@@ -480,19 +481,19 @@ HnswIndex::mutual_reconnect(const LinkArrayRef &cluster, uint32_t level)
}
void
-HnswIndex::remove_document(uint32_t docid)
+HnswIndex::remove_node(uint32_t nodeid)
{
- bool need_new_entrypoint = (docid == get_entry_docid());
- LevelArrayRef node_levels = _graph.get_level_array(docid);
+ bool need_new_entrypoint = (nodeid == get_entry_nodeid());
+ LevelArrayRef node_levels = _graph.get_level_array(nodeid);
for (int level = node_levels.size(); level-- > 0; ) {
- LinkArrayRef my_links = _graph.get_link_array(docid, level);
+ LinkArrayRef my_links = _graph.get_link_array(nodeid, level);
for (uint32_t neighbor_id : my_links) {
if (need_new_entrypoint) {
auto entry_node_ref = _graph.get_node_ref(neighbor_id);
_graph.set_entry_node({neighbor_id, entry_node_ref, level});
need_new_entrypoint = false;
}
- remove_link_to(neighbor_id, docid, level);
+ remove_link_to(neighbor_id, nodeid, level);
}
mutual_reconnect(my_links, level);
}
@@ -500,7 +501,14 @@ HnswIndex::remove_document(uint32_t docid)
HnswGraph::EntryNode entry;
_graph.set_entry_node(entry);
}
- _graph.remove_node_for_document(docid);
+ _graph.remove_node(nodeid);
+}
+
+void
+HnswIndex::remove_document(uint32_t docid)
+{
+ auto nodeid = get_nodeid(docid);
+ remove_node(nodeid);
}
void
@@ -525,8 +533,8 @@ void
HnswIndex::compact_level_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy)
{
auto context = _graph.nodes.compactWorst(compaction_spec, compaction_strategy);
- uint32_t doc_id_limit = _graph.node_refs.size();
- vespalib::ArrayRef<AtomicEntryRef> refs(&_graph.node_refs[0], doc_id_limit);
+ uint32_t nodeid_limit = _graph.node_refs.size();
+ vespalib::ArrayRef<AtomicEntryRef> refs(&_graph.node_refs[0], nodeid_limit);
context->compact(refs);
}
@@ -534,9 +542,9 @@ void
HnswIndex::compact_link_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy)
{
auto context = _graph.links.compactWorst(compaction_spec, compaction_strategy);
- uint32_t doc_id_limit = _graph.node_refs.size();
- for (uint32_t doc_id = 1; doc_id < doc_id_limit; ++doc_id) {
- EntryRef level_ref = _graph.get_node_ref(doc_id);
+ uint32_t nodeid_limit = _graph.node_refs.size();
+ for (uint32_t nodeid = 1; nodeid < nodeid_limit; ++nodeid) {
+ EntryRef level_ref = _graph.get_node_ref(nodeid);
if (level_ref.valid()) {
vespalib::ArrayRef<AtomicEntryRef> refs(_graph.nodes.get_writable(level_ref));
context->compact(refs);
@@ -640,7 +648,7 @@ HnswIndex::get_state(const vespalib::slime::Inserter& inserter) const
object.setLong("unreachable_nodes_incomplete_count", unreachable);
}
auto entry_node = _graph.get_entry_node();
- object.setLong("entry_docid", entry_node.docid);
+ object.setLong("entry_nodeid", entry_node.nodeid);
object.setLong("entry_level", entry_node.level);
auto& cfgObj = object.setObject("cfg");
cfgObj.setLong("max_links_at_level_0", _cfg.max_links_at_level_0());
@@ -670,7 +678,7 @@ HnswIndex::make_saver() const
std::unique_ptr<NearestNeighborIndexLoader>
HnswIndex::make_loader(FastOS_FileInterface& file)
{
- assert(get_entry_docid() == 0); // cannot load after index has data
+ assert(get_entry_nodeid() == 0); // cannot load after index has data
using ReaderType = FileReader<uint32_t>;
using LoaderType = HnswIndexLoader<ReaderType>;
return std::make_unique<LoaderType>(_graph, std::make_unique<ReaderType>(&file));
@@ -697,7 +705,7 @@ HnswIndex::top_k_by_docid(uint32_t k, TypedCells vector,
result.reserve(candidates.size());
for (const HnswCandidate & hit : candidates.peek()) {
if (hit.distance > distance_threshold) continue;
- result.emplace_back(hit.docid, hit.distance);
+ result.emplace_back(get_docid(hit.nodeid), hit.distance);
}
std::sort(result.begin(), result.end(), NeighborsByDocId());
return result;
@@ -723,14 +731,14 @@ HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k, const GlobalFi
{
FurthestPriQ best_neighbors;
auto entry = _graph.get_entry_node();
- if (entry.docid == 0) {
+ if (entry.nodeid == 0) {
// graph has no entry point
return best_neighbors;
}
int search_level = entry.level;
- double entry_dist = calc_distance(vector, entry.docid);
+ double entry_dist = calc_distance(vector, entry.nodeid);
// TODO: check if entry docid/node_ref is still valid here
- HnswCandidate entry_point(entry.docid, entry.node_ref, entry_dist);
+ HnswCandidate entry_point(entry.nodeid, entry.node_ref, entry_dist);
while (search_level > 0) {
entry_point = find_nearest_in_layer(vector, entry_point, search_level);
--search_level;
@@ -741,9 +749,9 @@ HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k, const GlobalFi
}
HnswNode
-HnswIndex::get_node(uint32_t docid) const
+HnswIndex::get_node(uint32_t nodeid) const
{
- auto node_ref = _graph.acquire_node_ref(docid);
+ auto node_ref = _graph.acquire_node_ref(nodeid);
if (!node_ref.valid()) {
return HnswNode();
}
@@ -759,17 +767,17 @@ HnswIndex::get_node(uint32_t docid) const
}
void
-HnswIndex::set_node(uint32_t docid, const HnswNode &node)
+HnswIndex::set_node(uint32_t nodeid, const HnswNode &node)
{
size_t num_levels = node.size();
assert(num_levels > 0);
- auto node_ref = _graph.make_node_for_document(docid, num_levels);
+ auto node_ref = _graph.make_node(nodeid, num_levels);
for (size_t level = 0; level < num_levels; ++level) {
- connect_new_node(docid, node.level(level), level);
+ connect_new_node(nodeid, node.level(level), level);
}
int max_level = num_levels - 1;
if (get_entry_level() < max_level) {
- _graph.set_entry_node({docid, node_ref, max_level});
+ _graph.set_entry_node({nodeid, node_ref, max_level});
}
}
@@ -777,20 +785,20 @@ bool
HnswIndex::check_link_symmetry() const
{
bool all_sym = true;
- size_t doc_id_limit = _graph.size();
- for (size_t docid = 0; docid < doc_id_limit; ++docid) {
- auto node_ref = _graph.acquire_node_ref(docid);
+ size_t nodeid_limit = _graph.size();
+ for (size_t nodeid = 0; nodeid < nodeid_limit; ++nodeid) {
+ auto node_ref = _graph.acquire_node_ref(nodeid);
if (node_ref.valid()) {
auto levels = _graph.nodes.get(node_ref);
uint32_t level = 0;
for (const auto& links_ref : levels) {
auto links = _graph.links.get(links_ref.load_acquire());
- for (auto neighbor_docid : links) {
- auto neighbor_links = _graph.acquire_link_array(neighbor_docid, level);
- if (! has_link_to(neighbor_links, docid)) {
+ for (auto neighbor_nodeid : links) {
+ auto neighbor_links = _graph.acquire_link_array(neighbor_nodeid, level);
+ if (! has_link_to(neighbor_links, nodeid)) {
all_sym = false;
- LOG(warning, "check_link_symmetry: docid %zu links to %u on level %u, but no backlink",
- docid, neighbor_docid, level);
+ LOG(warning, "check_link_symmetry: nodeid %zu links to %u on level %u, but no backlink",
+ nodeid, neighbor_nodeid, level);
}
}
++level;
@@ -810,9 +818,9 @@ HnswIndex::count_reachable_nodes() const
}
std::vector<bool> visited(_graph.size());
LinkArray found_links;
- if (entry.docid < visited.size()) {
- found_links.push_back(entry.docid);
- visited[entry.docid] = true;
+ if (entry.nodeid < visited.size()) {
+ found_links.push_back(entry.nodeid);
+ visited[entry.nodeid] = true;
}
vespalib::steady_time doom = vespalib::steady_clock::now() + MAX_COUNT_DURATION;
while (search_level >= 0) {
@@ -820,9 +828,9 @@ HnswIndex::count_reachable_nodes() const
if (vespalib::steady_clock::now() > doom) {
return {found_links.size(), false};
}
- uint32_t docid = found_links[idx];
- if (docid < visited.size()) {
- auto neighbors = _graph.acquire_link_array(docid, search_level);
+ uint32_t nodeid = found_links[idx];
+ if (nodeid < visited.size()) {
+ auto neighbors = _graph.acquire_link_array(nodeid, search_level);
for (uint32_t neighbor : neighbors) {
if (neighbor >= visited.size() || visited[neighbor]) {
continue;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 43e3001b496..1833eec3909 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -80,6 +80,9 @@ public:
CompactionSpec link_arrays() const noexcept { return _link_arrays; }
};
+ // Stubs for mapping between docid and nodeid.
+ static uint32_t get_docid(uint32_t nodeid) { return nodeid; }
+ static uint32_t get_nodeid(uint32_t docid) { return docid; }
protected:
using AtomicEntryRef = HnswGraph::AtomicEntryRef;
using NodeStore = HnswGraph::NodeStore;
@@ -100,10 +103,10 @@ protected:
HnswIndexCompactionSpec _compaction_spec;
uint32_t max_links_for_level(uint32_t level) const;
- void add_link_to(uint32_t docid, uint32_t level, const LinkArrayRef& old_links, uint32_t new_link) {
+ void add_link_to(uint32_t nodeid, uint32_t level, const LinkArrayRef& old_links, uint32_t new_link) {
LinkArray new_links(old_links.begin(), old_links.end());
new_links.push_back(new_link);
- _graph.set_link_array(docid, level, new_links);
+ _graph.set_link_array(nodeid, level, new_links);
}
/**
@@ -122,18 +125,22 @@ protected:
SelectResult select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint32_t max_links) const;
SelectResult select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const;
SelectResult select_neighbors(const HnswCandidateVector& neighbors, uint32_t max_links) const;
- void shrink_if_needed(uint32_t docid, uint32_t level);
- void connect_new_node(uint32_t docid, const LinkArrayRef &neighbors, uint32_t level);
+ void shrink_if_needed(uint32_t nodeid, uint32_t level);
+ void connect_new_node(uint32_t nodeid, const LinkArrayRef &neighbors, uint32_t level);
void mutual_reconnect(const LinkArrayRef &cluster, uint32_t level);
void remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t level);
- inline TypedCells get_vector(uint32_t docid) const {
+ inline TypedCells get_vector(uint32_t nodeid) const {
+ uint32_t docid = get_docid(nodeid);
+ return _vectors.get_vector(docid);
+ }
+ inline TypedCells get_vector_by_docid(uint32_t docid) const {
return _vectors.get_vector(docid);
}
- double calc_distance(uint32_t lhs_docid, uint32_t rhs_docid) const;
- double calc_distance(const TypedCells& lhs, uint32_t rhs_docid) const;
- uint32_t estimate_visited_nodes(uint32_t level, uint32_t doc_id_limit, uint32_t neighbors_to_find, const GlobalFilter* filter) const;
+ double calc_distance(uint32_t lhs_nodeid, uint32_t rhs_nodeid) const;
+ double calc_distance(const TypedCells& lhs, uint32_t rhs_nodeid) const;
+ uint32_t estimate_visited_nodes(uint32_t level, uint32_t nodeid_limit, uint32_t neighbors_to_find, const GlobalFilter* filter) const;
/**
* Performs a greedy search in the given layer to find the candidate that is nearest the input vector.
@@ -142,7 +149,7 @@ protected:
template <class VisitedTracker>
void search_layer_helper(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors,
uint32_t level, const GlobalFilter *filter,
- uint32_t doc_id_limit,
+ uint32_t nodeid_limit,
uint32_t estimated_visited_nodes) const;
void search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors,
uint32_t level, const GlobalFilter *filter = nullptr) const;
@@ -169,7 +176,7 @@ protected:
};
PreparedAddDoc internal_prepare_add(uint32_t docid, TypedCells input_vector,
vespalib::GenerationHandler::Guard read_guard) const;
- LinkArray filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t me);
+ LinkArray filter_valid_nodeids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t self_nodeid);
void internal_complete_add(uint32_t docid, PreparedAddDoc &op);
public:
HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distance_func,
@@ -184,6 +191,7 @@ public:
TypedCells vector,
vespalib::GenerationHandler::Guard read_guard) const override;
void complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult> prepare_result) override;
+ void remove_node(uint32_t nodeid);
void remove_document(uint32_t docid) override;
void assign_generation(generation_t current_gen) override;
void reclaim_memory(generation_t oldest_used_gen) override;
@@ -210,12 +218,12 @@ public:
FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k, const GlobalFilter *filter) const;
- uint32_t get_entry_docid() const { return _graph.get_entry_node().docid; }
+ uint32_t get_entry_nodeid() const { return _graph.get_entry_node().nodeid; }
int32_t get_entry_level() const { return _graph.get_entry_node().level; }
// Should only be used by unit tests.
- HnswNode get_node(uint32_t docid) const;
- void set_node(uint32_t docid, const HnswNode &node);
+ HnswNode get_node(uint32_t nodeid) const;
+ void set_node(uint32_t nodeid, const HnswNode &node);
bool check_link_symmetry() const;
std::pair<uint32_t, bool> count_reachable_nodes() const;
HnswGraph& get_graph() { return _graph; }
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h
index b65ae22ed47..5d40893f215 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h
@@ -22,10 +22,10 @@ class HnswIndexLoader : public NearestNeighborIndexLoader {
private:
HnswGraph& _graph;
std::unique_ptr<ReaderType> _reader;
- uint32_t _entry_docid;
+ uint32_t _entry_nodeid;
int32_t _entry_level;
uint32_t _num_nodes;
- uint32_t _docid;
+ uint32_t _nodeid;
std::vector<uint32_t> _link_array;
bool _complete;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp
index 4fd75bb2fec..1c38e7d7936 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp
@@ -13,7 +13,7 @@ template <typename ReaderType>
void
HnswIndexLoader<ReaderType>::init()
{
- _entry_docid = next_int();
+ _entry_nodeid = next_int();
_entry_level = next_int();
_num_nodes = next_int();
}
@@ -25,10 +25,10 @@ template <typename ReaderType>
HnswIndexLoader<ReaderType>::HnswIndexLoader(HnswGraph& graph, std::unique_ptr<ReaderType> reader)
: _graph(graph),
_reader(std::move(reader)),
- _entry_docid(0),
+ _entry_nodeid(0),
_entry_level(0),
_num_nodes(0),
- _docid(0),
+ _nodeid(0),
_link_array(),
_complete(false)
{
@@ -40,28 +40,28 @@ bool
HnswIndexLoader<ReaderType>::load_next()
{
assert(!_complete);
- if (_docid < _num_nodes) {
+ if (_nodeid < _num_nodes) {
uint32_t num_levels = next_int();
if (num_levels > 0) {
- _graph.make_node_for_document(_docid, num_levels);
+ _graph.make_node(_nodeid, num_levels);
for (uint32_t level = 0; level < num_levels; ++level) {
uint32_t num_links = next_int();
_link_array.clear();
while (num_links-- > 0) {
_link_array.push_back(next_int());
}
- _graph.set_link_array(_docid, level, _link_array);
+ _graph.set_link_array(_nodeid, level, _link_array);
}
}
}
- if (++_docid < _num_nodes) {
+ if (++_nodeid < _num_nodes) {
return true;
} else {
_graph.node_refs.ensure_size(std::max(_num_nodes, 1u));
_graph.node_refs_size.store(std::max(_num_nodes, 1u), std::memory_order_release);
_graph.trim_node_refs_size();
- auto entry_node_ref = _graph.get_node_ref(_entry_docid);
- _graph.set_entry_node({_entry_docid, entry_node_ref, _entry_level});
+ auto entry_node_ref = _graph.get_node_ref(_entry_nodeid);
+ _graph.set_entry_node({_entry_nodeid, entry_node_ref, _entry_level});
_complete = true;
return false;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
index 29218b47f53..2784dd5af28 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
@@ -26,7 +26,7 @@ count_valid_link_arrays(const HnswGraph & graph) {
}
HnswIndexSaver::MetaData::MetaData()
- : entry_docid(0),
+ : entry_nodeid(0),
entry_level(-1),
refs(),
nodes()
@@ -38,7 +38,7 @@ HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph)
: _graph_links(graph.links), _meta_data()
{
auto entry = graph.get_entry_node();
- _meta_data.entry_docid = entry.docid;
+ _meta_data.entry_nodeid = entry.nodeid;
_meta_data.entry_level = entry.level;
size_t num_nodes = graph.node_refs.get_size(); // Called from writer only
assert (num_nodes <= (std::numeric_limits<uint32_t>::max() - 1));
@@ -62,7 +62,7 @@ HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph)
void
HnswIndexSaver::save(BufferWriter& writer) const
{
- writer.write(&_meta_data.entry_docid, sizeof(uint32_t));
+ writer.write(&_meta_data.entry_nodeid, sizeof(uint32_t));
writer.write(&_meta_data.entry_level, sizeof(int32_t));
uint32_t num_nodes = _meta_data.nodes.size() - 1;
writer.write(&num_nodes, sizeof(uint32_t));
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h
index e170a4a894f..11a1f993c4e 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h
@@ -25,7 +25,7 @@ public:
private:
struct MetaData {
using EntryRef = vespalib::datastore::EntryRef;
- uint32_t entry_docid;
+ uint32_t entry_nodeid;
int32_t entry_level;
std::vector<EntryRef, vespalib::allocator_large<EntryRef>> refs;
std::vector<uint32_t, vespalib::allocator_large<uint32_t>> nodes;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
index abb7ecc199a..66255c3f797 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
@@ -13,13 +13,13 @@ namespace search::tensor {
* Represents a candidate node with its distance to another point in space.
*/
struct HnswCandidate {
- uint32_t docid;
+ uint32_t nodeid;
HnswGraph::NodeRef node_ref;
double distance;
- HnswCandidate(uint32_t docid_in, double distance_in) noexcept
- : docid(docid_in), node_ref(), distance(distance_in) {}
- HnswCandidate(uint32_t docid_in, HnswGraph::NodeRef node_ref_in, double distance_in) noexcept
- : docid(docid_in), node_ref(node_ref_in), distance(distance_in) {}
+ HnswCandidate(uint32_t nodeid_in, double distance_in) noexcept
+ : nodeid(nodeid_in), node_ref(), distance(distance_in) {}
+ HnswCandidate(uint32_t nodeid_in, HnswGraph::NodeRef node_ref_in, double distance_in) noexcept
+ : nodeid(nodeid_in), node_ref(node_ref_in), distance(distance_in) {}
};
struct GreaterDistance {