diff options
author | Geir Storli <geirst@yahooinc.com> | 2022-10-26 13:49:08 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-26 13:49:08 +0200 |
commit | 5c279f0c7f0f22931be2226b2b9dfd82ce1da4e3 (patch) | |
tree | 40d2e570a57789db06778f892cb9ebc069525aff /searchlib | |
parent | 4bc54df04097788c49f31f1cfe1d0446c26d8c42 (diff) | |
parent | 9e7cd01437f96c8089a8bf242f7dc1aafd575143 (diff) |
Merge pull request #24585 from vespa-engine/toregge/use-nodeid-instead-of-docid-to-identify-an-hnsw-graph-node
Use nodeid instead of docid to identify an HNSW graph node.
Diffstat (limited to 'searchlib')
14 files changed, 242 insertions, 226 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index 9fccad1d2d4..3a50601abbd 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -118,21 +118,21 @@ public: CompactionStrategy compaction_strategy; return index->update_stat(compaction_strategy); } - void expect_entry_point(uint32_t exp_docid, uint32_t exp_level) { - EXPECT_EQ(exp_docid, index->get_entry_docid()); + void expect_entry_point(uint32_t exp_nodeid, uint32_t exp_level) { + EXPECT_EQ(exp_nodeid, index->get_entry_nodeid()); EXPECT_EQ(exp_level, index->get_entry_level()); } - void expect_level_0(uint32_t docid, const HnswNode::LinkArray& exp_links) { - auto node = index->get_node(docid); + void expect_level_0(uint32_t nodeid, const HnswNode::LinkArray& exp_links) { + auto node = index->get_node(nodeid); ASSERT_EQ(1, node.size()); EXPECT_EQ(exp_links, node.level(0)); } - void expect_empty_level_0(uint32_t docid) { - auto node = index->get_node(docid); + void expect_empty_level_0(uint32_t nodeid) { + auto node = index->get_node(nodeid); EXPECT_TRUE(node.empty()); } - void expect_levels(uint32_t docid, const HnswNode::LevelArray& exp_levels) { - auto act_node = index->get_node(docid); + void expect_levels(uint32_t nodeid, const HnswNode::LevelArray& exp_levels) { + auto act_node = index->get_node(nodeid); ASSERT_EQ(exp_levels.size(), act_node.size()); EXPECT_EQ(exp_levels, act_node.levels()); } @@ -144,7 +144,7 @@ public: size_t idx = 0; for (const auto & hit : rv) { if (idx < exp_hits.size()) { - EXPECT_EQ(hit.docid, exp_hits[idx++]); + EXPECT_EQ(index->get_docid(hit.nodeid), exp_hits[idx++]); } } if (exp_hits.size() == k) { @@ -169,7 +169,7 @@ public: ? index->find_top_k_with_filter(k, qv, *global_filter, k, thr) : index->find_top_k(k, qv, k, thr); EXPECT_EQ(got_by_docid.size(), 1); - EXPECT_EQ(got_by_docid[0].docid, rv[0].docid); + EXPECT_EQ(got_by_docid[0].docid, index->get_docid(rv[0].nodeid)); for (const auto & hit : got_by_docid) { LOG(debug, "from docid=%u found docid=%u dist=%g (threshold %g)\n", docid, hit.docid, hit.distance, thr); diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp index add7801e03a..ea22aaabaff 100644 --- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp @@ -52,12 +52,12 @@ using V = std::vector<uint32_t>; void populate(HnswGraph &graph) { // no 0 - graph.make_node_for_document(1, 1); - auto er = graph.make_node_for_document(2, 2); + graph.make_node(1, 1); + auto er = graph.make_node(2, 2); // no 3 - graph.make_node_for_document(4, 2); - graph.make_node_for_document(5, 0); - graph.make_node_for_document(6, 1); + graph.make_node(4, 2); + graph.make_node(5, 0); + graph.make_node(6, 1); graph.set_link_array(1, 0, V{2, 4, 6}); graph.set_link_array(2, 0, V{1, 4, 6}); @@ -69,9 +69,9 @@ void populate(HnswGraph &graph) { } void modify(HnswGraph &graph) { - graph.remove_node_for_document(2); - graph.remove_node_for_document(6); - graph.make_node_for_document(7, 2); + graph.remove_node(2); + graph.remove_node(6); + graph.make_node(7, 2); graph.set_link_array(1, 0, V{7, 4}); graph.set_link_array(4, 0, V{7, 2}); @@ -88,24 +88,24 @@ public: HnswGraph original; HnswGraph copy; - void expect_empty_d(uint32_t docid) const { - EXPECT_FALSE(copy.acquire_node_ref(docid).valid()); + void expect_empty_d(uint32_t nodeid) const { + EXPECT_FALSE(copy.acquire_node_ref(nodeid).valid()); } - void expect_level_0(uint32_t docid, const V& exp_links) const { - auto levels = copy.acquire_level_array(docid); + void expect_level_0(uint32_t nodeid, const V& exp_links) const { + auto levels = copy.acquire_level_array(nodeid); EXPECT_GE(levels.size(), 1); - auto links = copy.acquire_link_array(docid, 0); + auto links = copy.acquire_link_array(nodeid, 0); EXPECT_EQ(exp_links.size(), links.size()); for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) { EXPECT_EQ(exp_links[i], links[i]); } } - void expect_level_1(uint32_t docid, const V& exp_links) const { - auto levels = copy.acquire_level_array(docid); + void expect_level_1(uint32_t nodeid, const V& exp_links) const { + auto levels = copy.acquire_level_array(nodeid); EXPECT_EQ(2, levels.size()); - auto links = copy.acquire_link_array(docid, 1); + auto links = copy.acquire_link_array(nodeid, 1); EXPECT_EQ(exp_links.size(), links.size()); for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) { EXPECT_EQ(exp_links[i], links[i]); @@ -126,7 +126,7 @@ public: void expect_copy_as_populated() const { EXPECT_EQ(copy.size(), 7); auto entry = copy.get_entry_node(); - EXPECT_EQ(entry.docid, 2); + EXPECT_EQ(entry.nodeid, 2); EXPECT_EQ(entry.level, 1); expect_empty_d(0); diff --git a/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.cpp b/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.cpp index 524cc2b9cd5..e456e87dc96 100644 --- a/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.cpp +++ b/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.cpp @@ -4,8 +4,8 @@ namespace search::tensor { -BitVectorVisitedTracker::BitVectorVisitedTracker(const HnswIndex&, uint32_t doc_id_limit, uint32_t) - : _visited(doc_id_limit) +BitVectorVisitedTracker::BitVectorVisitedTracker(const HnswIndex&, uint32_t nodeid_limit, uint32_t) + : _visited(nodeid_limit) { } diff --git a/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.h b/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.h index bbd49aa0482..ecdd957187f 100644 --- a/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.h +++ b/searchlib/src/vespa/searchlib/tensor/bitvector_visited_tracker.h @@ -16,14 +16,14 @@ class BitVectorVisitedTracker { search::AllocatedBitVector _visited; public: - BitVectorVisitedTracker(const HnswIndex&, uint32_t doc_id_limit, uint32_t); + BitVectorVisitedTracker(const HnswIndex&, uint32_t nodeid_limit, uint32_t); ~BitVectorVisitedTracker(); - void mark(uint32_t doc_id) { _visited.setBit(doc_id); } - bool try_mark(uint32_t doc_id) { - if (_visited.testBit(doc_id)) { + void mark(uint32_t nodeid) { _visited.setBit(nodeid); } + bool try_mark(uint32_t nodeid) { + if (_visited.testBit(nodeid)) { return false; } else { - _visited.setBit(doc_id); + _visited.setBit(nodeid); return true; } } diff --git a/searchlib/src/vespa/searchlib/tensor/hash_set_visited_tracker.h b/searchlib/src/vespa/searchlib/tensor/hash_set_visited_tracker.h index c2a14235f47..30434d1029d 100644 --- a/searchlib/src/vespa/searchlib/tensor/hash_set_visited_tracker.h +++ b/searchlib/src/vespa/searchlib/tensor/hash_set_visited_tracker.h @@ -18,9 +18,9 @@ class HashSetVisitedTracker public: HashSetVisitedTracker(const HnswIndex&, uint32_t, uint32_t estimated_visited_nodes); ~HashSetVisitedTracker(); - void mark(uint32_t doc_id) { _visited.insert(doc_id); } - bool try_mark(uint32_t doc_id) { - return _visited.insert(doc_id).second; + void mark(uint32_t nodeid) { _visited.insert(nodeid); } + bool try_mark(uint32_t nodeid) { + return _visited.insert(nodeid).second; } }; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp index 56af2ed3b35..6d8b143cff9 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp @@ -11,7 +11,7 @@ HnswGraph::HnswGraph() node_refs_size(1u), nodes(HnswIndex::make_default_node_store_config(), {}), links(HnswIndex::make_default_link_store_config(), {}), - entry_docid_and_level() + entry_nodeid_and_level() { node_refs.ensure_size(1, AtomicEntryRef()); EntryNode entry; @@ -21,36 +21,36 @@ HnswGraph::HnswGraph() HnswGraph::~HnswGraph() = default; HnswGraph::NodeRef -HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels) +HnswGraph::make_node(uint32_t nodeid, uint32_t num_levels) { - node_refs.ensure_size(docid + 1, AtomicEntryRef()); + node_refs.ensure_size(nodeid + 1, AtomicEntryRef()); // A document cannot be added twice. - assert(!get_node_ref(docid).valid()); + assert(!get_node_ref(nodeid).valid()); // Note: The level array instance lives as long as the document is present in the index. std::vector<AtomicEntryRef> levels(num_levels, AtomicEntryRef()); auto node_ref = nodes.add(levels); - node_refs[docid].store_release(node_ref); - if (docid >= node_refs_size.load(std::memory_order_relaxed)) { - node_refs_size.store(docid + 1, std::memory_order_release); + node_refs[nodeid].store_release(node_ref); + if (nodeid >= node_refs_size.load(std::memory_order_relaxed)) { + node_refs_size.store(nodeid + 1, std::memory_order_release); } return node_ref; } void -HnswGraph::remove_node_for_document(uint32_t docid) +HnswGraph::remove_node(uint32_t nodeid) { - auto node_ref = get_node_ref(docid); + auto node_ref = get_node_ref(nodeid); assert(node_ref.valid()); auto levels = nodes.get(node_ref); vespalib::datastore::EntryRef invalid; - node_refs[docid].store_release(invalid); + node_refs[nodeid].store_release(invalid); // Ensure data referenced through the old ref can be recycled: nodes.remove(node_ref); for (size_t i = 0; i < levels.size(); ++i) { auto old_links_ref = levels[i].load_relaxed(); links.remove(old_links_ref); } - if (docid + 1 == node_refs_size.load(std::memory_order_relaxed)) { + if (nodeid + 1 == node_refs_size.load(std::memory_order_relaxed)) { trim_node_refs_size(); } } @@ -58,18 +58,18 @@ HnswGraph::remove_node_for_document(uint32_t docid) void HnswGraph::trim_node_refs_size() { - uint32_t check_doc_id = node_refs_size.load(std::memory_order_relaxed) - 1; - while (check_doc_id > 0u && !get_node_ref(check_doc_id).valid()) { - --check_doc_id; + uint32_t check_nodeid = node_refs_size.load(std::memory_order_relaxed) - 1; + while (check_nodeid > 0u && !get_node_ref(check_nodeid).valid()) { + --check_nodeid; } - node_refs_size.store(check_doc_id + 1, std::memory_order_release); + node_refs_size.store(check_nodeid + 1, std::memory_order_release); } void -HnswGraph::set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& new_links) +HnswGraph::set_link_array(uint32_t nodeid, uint32_t level, const LinkArrayRef& new_links) { auto new_links_ref = links.add(new_links); - auto node_ref = get_node_ref(docid); + auto node_ref = get_node_ref(nodeid); assert(node_ref.valid()); auto levels = nodes.get_writable(node_ref); assert(level < levels.size()); @@ -112,15 +112,15 @@ void HnswGraph::set_entry_node(EntryNode node) { uint64_t value = node.level; value <<= 32; - value |= node.docid; + value |= node.nodeid; if (node.node_ref.valid()) { assert(node.level >= 0); - assert(node.docid > 0); + assert(node.nodeid > 0); } else { assert(node.level == -1); - assert(node.docid == 0); + assert(node.nodeid == 0); } - entry_docid_and_level.store(value, std::memory_order_release); + entry_nodeid_and_level.store(value, std::memory_order_release); } } // namespace diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h index 9e3850bd470..60ba7d55f7f 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h @@ -43,27 +43,27 @@ struct HnswGraph { NodeStore nodes; LinkStore links; - std::atomic<uint64_t> entry_docid_and_level; + std::atomic<uint64_t> entry_nodeid_and_level; HnswGraph(); ~HnswGraph(); - NodeRef make_node_for_document(uint32_t docid, uint32_t num_levels); + NodeRef make_node(uint32_t nodeid, uint32_t num_levels); - void remove_node_for_document(uint32_t docid); + void remove_node(uint32_t nodeid); void trim_node_refs_size(); - NodeRef get_node_ref(uint32_t docid) const { - return node_refs.get_elem_ref(docid).load_relaxed(); // Called from writer only + NodeRef get_node_ref(uint32_t nodeid) const { + return node_refs.get_elem_ref(nodeid).load_relaxed(); // Called from writer only } - NodeRef acquire_node_ref(uint32_t docid) const { - return node_refs.acquire_elem_ref(docid).load_acquire(); + NodeRef acquire_node_ref(uint32_t nodeid) const { + return node_refs.acquire_elem_ref(nodeid).load_acquire(); } - bool still_valid(uint32_t docid, NodeRef node_ref) const { - return node_ref.valid() && (acquire_node_ref(docid) == node_ref); + bool still_valid(uint32_t nodeid, NodeRef node_ref) const { + return node_ref.valid() && (acquire_node_ref(nodeid) == node_ref); } LevelArrayRef get_level_array(NodeRef node_ref) const { @@ -73,13 +73,13 @@ struct HnswGraph { return LevelArrayRef(); } - LevelArrayRef get_level_array(uint32_t docid) const { - auto node_ref = get_node_ref(docid); + LevelArrayRef get_level_array(uint32_t nodeid) const { + auto node_ref = get_node_ref(nodeid); return get_level_array(node_ref); } - LevelArrayRef acquire_level_array(uint32_t docid) const { - auto node_ref = acquire_node_ref(docid); + LevelArrayRef acquire_level_array(uint32_t nodeid) const { + auto node_ref = acquire_node_ref(nodeid); return get_level_array(node_ref); } @@ -93,13 +93,13 @@ struct HnswGraph { return LinkArrayRef(); } - LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const { - auto levels = get_level_array(docid); + LinkArrayRef get_link_array(uint32_t nodeid, uint32_t level) const { + auto levels = get_level_array(nodeid); return get_link_array(levels, level); } - LinkArrayRef acquire_link_array(uint32_t docid, uint32_t level) const { - auto levels = acquire_level_array(docid); + LinkArrayRef acquire_link_array(uint32_t nodeid, uint32_t level) const { + auto levels = acquire_level_array(nodeid); return get_link_array(levels, level); } @@ -108,19 +108,19 @@ struct HnswGraph { return get_link_array(levels, level); } - void set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& new_links); + void set_link_array(uint32_t nodeid, uint32_t level, const LinkArrayRef& new_links); struct EntryNode { - uint32_t docid; + uint32_t nodeid; NodeRef node_ref; int32_t level; EntryNode() - : docid(0), // Note that docid 0 is reserved and never used + : nodeid(0), // Note that nodeid 0 is reserved and never used node_ref(), level(-1) {} - EntryNode(uint32_t docid_in, NodeRef node_ref_in, int32_t level_in) - : docid(docid_in), + EntryNode(uint32_t nodeid_in, NodeRef node_ref_in, int32_t level_in) + : nodeid(nodeid_in), node_ref(node_ref_in), level(level_in) {} @@ -129,24 +129,24 @@ struct HnswGraph { void set_entry_node(EntryNode node); uint64_t get_entry_atomic() const { - return entry_docid_and_level.load(std::memory_order_acquire); + return entry_nodeid_and_level.load(std::memory_order_acquire); } EntryNode get_entry_node() const { EntryNode entry; while (true) { uint64_t value = get_entry_atomic(); - entry.docid = (uint32_t)value; - entry.node_ref = acquire_node_ref(entry.docid); + entry.nodeid = (uint32_t)value; + entry.node_ref = acquire_node_ref(entry.nodeid); entry.level = (int32_t)(value >> 32); - if ((entry.docid == 0) + if ((entry.nodeid == 0) && (entry.level == -1) && ! entry.node_ref.valid()) { // invalid in every way return entry; } - if ((entry.docid > 0) + if ((entry.nodeid > 0) && (entry.level > -1) && entry.node_ref.valid() && (get_entry_atomic() == value)) diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index a10ff1c48dc..89b4f62146c 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -86,7 +86,7 @@ bool HnswIndex::have_closer_distance(HnswCandidate candidate, const HnswCandidateVector& result) const { for (const auto & neighbor : result) { - double dist = calc_distance(candidate.docid, neighbor.docid); + double dist = calc_distance(candidate.nodeid, neighbor.nodeid); if (dist < candidate.distance) { return true; } @@ -104,7 +104,7 @@ HnswIndex::select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_ if (result.used.size() < max_links) { result.used.push_back(candidate); } else { - result.unused.push_back(candidate.docid); + result.unused.push_back(candidate.nodeid); } } return result; @@ -122,7 +122,7 @@ HnswIndex::select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint auto candidate = nearest.top(); nearest.pop(); if (have_closer_distance(candidate, result.used)) { - result.unused.push_back(candidate.docid); + result.unused.push_back(candidate.nodeid); continue; } result.used.push_back(candidate); @@ -130,7 +130,7 @@ HnswIndex::select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint while (!nearest.empty()) { candidate = nearest.top(); nearest.pop(); - result.unused.push_back(candidate.docid); + result.unused.push_back(candidate.nodeid); } } } @@ -148,40 +148,40 @@ HnswIndex::select_neighbors(const HnswCandidateVector& neighbors, uint32_t max_l } void -HnswIndex::shrink_if_needed(uint32_t docid, uint32_t level) +HnswIndex::shrink_if_needed(uint32_t nodeid, uint32_t level) { - auto old_links = _graph.get_link_array(docid, level); + auto old_links = _graph.get_link_array(nodeid, level); uint32_t max_links = max_links_for_level(level); if (old_links.size() > max_links) { HnswCandidateVector neighbors; neighbors.reserve(old_links.size()); - for (uint32_t neighbor_docid : old_links) { - double dist = calc_distance(docid, neighbor_docid); - neighbors.emplace_back(neighbor_docid, dist); + for (uint32_t neighbor_nodeid : old_links) { + double dist = calc_distance(nodeid, neighbor_nodeid); + neighbors.emplace_back(neighbor_nodeid, dist); } auto split = select_neighbors(neighbors, max_links); LinkArray new_links; new_links.reserve(split.used.size()); for (const auto & neighbor : split.used) { - new_links.push_back(neighbor.docid); + new_links.push_back(neighbor.nodeid); } - _graph.set_link_array(docid, level, new_links); - for (uint32_t removed_docid : split.unused) { - remove_link_to(removed_docid, docid, level); + _graph.set_link_array(nodeid, level, new_links); + for (uint32_t removed_nodeid : split.unused) { + remove_link_to(removed_nodeid, nodeid, level); } } } void -HnswIndex::connect_new_node(uint32_t docid, const LinkArrayRef &neighbors, uint32_t level) +HnswIndex::connect_new_node(uint32_t nodeid, const LinkArrayRef &neighbors, uint32_t level) { - _graph.set_link_array(docid, level, neighbors); - for (uint32_t neighbor_docid : neighbors) { - auto old_links = _graph.get_link_array(neighbor_docid, level); - add_link_to(neighbor_docid, level, old_links, docid); + _graph.set_link_array(nodeid, level, neighbors); + for (uint32_t neighbor_nodeid : neighbors) { + auto old_links = _graph.get_link_array(neighbor_nodeid, level); + add_link_to(neighbor_nodeid, level, old_links, nodeid); } - for (uint32_t neighbor_docid : neighbors) { - shrink_if_needed(neighbor_docid, level); + for (uint32_t neighbor_nodeid : neighbors) { + shrink_if_needed(neighbor_nodeid, level); } } @@ -199,38 +199,38 @@ HnswIndex::remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t lev double -HnswIndex::calc_distance(uint32_t lhs_docid, uint32_t rhs_docid) const +HnswIndex::calc_distance(uint32_t lhs_nodeid, uint32_t rhs_nodeid) const { - auto lhs = get_vector(lhs_docid); - return calc_distance(lhs, rhs_docid); + auto lhs = get_vector(lhs_nodeid); + return calc_distance(lhs, rhs_nodeid); } double -HnswIndex::calc_distance(const TypedCells& lhs, uint32_t rhs_docid) const +HnswIndex::calc_distance(const TypedCells& lhs, uint32_t rhs_nodeid) const { - auto rhs = get_vector(rhs_docid); + auto rhs = get_vector(rhs_nodeid); return _distance_func->calc(lhs, rhs); } uint32_t -HnswIndex::estimate_visited_nodes(uint32_t level, uint32_t doc_id_limit, uint32_t neighbors_to_find, const GlobalFilter* filter) const +HnswIndex::estimate_visited_nodes(uint32_t level, uint32_t nodeid_limit, uint32_t neighbors_to_find, const GlobalFilter* filter) const { uint32_t m_for_level = max_links_for_level(level); uint64_t base_estimate = uint64_t(m_for_level) * neighbors_to_find + 100; - if (base_estimate >= doc_id_limit) { - return doc_id_limit; + if (base_estimate >= nodeid_limit) { + return nodeid_limit; } if (!filter) { return base_estimate; } uint32_t true_bits = filter->count(); if (true_bits == 0) { - return doc_id_limit; + return nodeid_limit; } double scaler = double(filter->size()) / true_bits; double scaled_estimate = scaler * base_estimate; - if (scaled_estimate >= doc_id_limit) { - return doc_id_limit; + if (scaled_estimate >= nodeid_limit) { + return nodeid_limit; } return scaled_estimate; } @@ -242,13 +242,13 @@ HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& e bool keep_searching = true; while (keep_searching) { keep_searching = false; - for (uint32_t neighbor_docid : _graph.get_link_array(nearest.node_ref, level)) { - auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid); - double dist = calc_distance(input, neighbor_docid); - if (_graph.still_valid(neighbor_docid, neighbor_ref) + for (uint32_t neighbor_nodeid : _graph.get_link_array(nearest.node_ref, level)) { + auto neighbor_ref = _graph.acquire_node_ref(neighbor_nodeid); + double dist = calc_distance(input, neighbor_nodeid); + if (_graph.still_valid(neighbor_nodeid, neighbor_ref) && dist < nearest.distance) { - nearest = HnswCandidate(neighbor_docid, neighbor_ref, dist); + nearest = HnswCandidate(neighbor_nodeid, neighbor_ref, dist); keep_searching = true; } } @@ -260,17 +260,17 @@ template <class VisitedTracker> void HnswIndex::search_layer_helper(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level, const GlobalFilter *filter, - uint32_t doc_id_limit, uint32_t estimated_visited_nodes) const + uint32_t nodeid_limit, uint32_t estimated_visited_nodes) const { NearestPriQ candidates; - VisitedTracker visited(*this, doc_id_limit, estimated_visited_nodes); + VisitedTracker visited(*this, nodeid_limit, estimated_visited_nodes); for (const auto &entry : best_neighbors.peek()) { - if (entry.docid >= doc_id_limit) { + if (entry.nodeid >= nodeid_limit) { continue; } candidates.push(entry); - visited.mark(entry.docid); - if (filter && !filter->check(entry.docid)) { + visited.mark(entry.nodeid); + if (filter && !filter->check(entry.nodeid)) { assert(best_neighbors.size() == 1); best_neighbors.pop(); } @@ -283,21 +283,21 @@ HnswIndex::search_layer_helper(const TypedCells& input, uint32_t neighbors_to_fi break; } candidates.pop(); - for (uint32_t neighbor_docid : _graph.get_link_array(cand.node_ref, level)) { - if (neighbor_docid >= doc_id_limit) { + for (uint32_t neighbor_nodeid : _graph.get_link_array(cand.node_ref, level)) { + if (neighbor_nodeid >= nodeid_limit) { continue; } - auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid); + auto neighbor_ref = _graph.acquire_node_ref(neighbor_nodeid); if ((! neighbor_ref.valid()) - || ! visited.try_mark(neighbor_docid)) + || ! visited.try_mark(neighbor_nodeid)) { continue; } - double dist_to_input = calc_distance(input, neighbor_docid); + double dist_to_input = calc_distance(input, neighbor_nodeid); if (dist_to_input < limit_dist) { - candidates.emplace(neighbor_docid, neighbor_ref, dist_to_input); - if ((!filter) || filter->check(neighbor_docid)) { - best_neighbors.emplace(neighbor_docid, neighbor_ref, dist_to_input); + candidates.emplace(neighbor_nodeid, neighbor_ref, dist_to_input); + if ((!filter) || filter->check(neighbor_nodeid)) { + best_neighbors.emplace(neighbor_nodeid, neighbor_ref, dist_to_input); if (best_neighbors.size() > neighbors_to_find) { best_neighbors.pop(); limit_dist = best_neighbors.top().distance; @@ -312,15 +312,15 @@ void HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& best_neighbors, uint32_t level, const GlobalFilter *filter) const { - uint32_t doc_id_limit = _graph.node_refs_size.load(std::memory_order_acquire); + uint32_t nodeid_limit = _graph.node_refs_size.load(std::memory_order_acquire); if (filter) { - doc_id_limit = std::min(filter->size(), doc_id_limit); + nodeid_limit = std::min(filter->size(), nodeid_limit); } - uint32_t estimated_visited_nodes = estimate_visited_nodes(level, doc_id_limit, neighbors_to_find, filter); - if (estimated_visited_nodes >= doc_id_limit / 128) { - search_layer_helper<BitVectorVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, doc_id_limit, estimated_visited_nodes); + uint32_t estimated_visited_nodes = estimate_visited_nodes(level, nodeid_limit, neighbors_to_find, filter); + if (estimated_visited_nodes >= nodeid_limit / 128) { + search_layer_helper<BitVectorVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, nodeid_limit, estimated_visited_nodes); } else { - search_layer_helper<HashSetVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, doc_id_limit, estimated_visited_nodes); + search_layer_helper<HashSetVisitedTracker>(input, neighbors_to_find, best_neighbors, level, filter, nodeid_limit, estimated_visited_nodes); } } @@ -342,7 +342,7 @@ void HnswIndex::add_document(uint32_t docid) { vespalib::GenerationHandler::Guard no_guard_needed; - PreparedAddDoc op = internal_prepare_add(docid, get_vector(docid), no_guard_needed); + PreparedAddDoc op = internal_prepare_add(docid, get_vector_by_docid(docid), no_guard_needed); internal_complete_add(docid, op); } @@ -353,14 +353,14 @@ HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector, vespali int level = _level_generator->max_level(); PreparedAddDoc op(docid, level, std::move(read_guard)); auto entry = _graph.get_entry_node(); - if (entry.docid == 0) { + if (entry.nodeid == 0) { // graph has no entry point return op; } int search_level = entry.level; - double entry_dist = calc_distance(input_vector, entry.docid); - // TODO: check if entry docid/node_ref is still valid here - HnswCandidate entry_point(entry.docid, entry.node_ref, entry_dist); + double entry_dist = calc_distance(input_vector, entry.nodeid); + // TODO: check if entry nodeid/node_ref is still valid here + HnswCandidate entry_point(entry.nodeid, entry.node_ref, entry_dist); while (search_level > op.max_level) { entry_point = find_nearest_in_layer(input_vector, entry_point, search_level); --search_level; @@ -378,10 +378,10 @@ HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector, vespali for (const auto & neighbor : neighbors.used) { auto neighbor_levels = _graph.get_level_array(neighbor.node_ref); if (size_t(search_level) < neighbor_levels.size()) { - op.connections[search_level].emplace_back(neighbor.docid, neighbor.node_ref); + op.connections[search_level].emplace_back(neighbor.nodeid, neighbor.node_ref); } else { LOG(warning, "in prepare_add(%u), selected neighbor %u is missing level %d (has %zu levels)", - docid, neighbor.docid, search_level, neighbor_levels.size()); + docid, neighbor.nodeid, search_level, neighbor_levels.size()); } } --search_level; @@ -390,18 +390,18 @@ HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector, vespali } HnswIndex::LinkArray -HnswIndex::filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t self_docid) +HnswIndex::filter_valid_nodeids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t self_nodeid) { LinkArray valid; valid.reserve(neighbors.size()); for (const auto & neighbor : neighbors) { - uint32_t docid = neighbor.first; + uint32_t nodeid = neighbor.first; HnswGraph::NodeRef node_ref = neighbor.second; - if (_graph.still_valid(docid, node_ref)) { - assert(docid != self_docid); + if (_graph.still_valid(nodeid, node_ref)) { + assert(nodeid != self_nodeid); auto levels = _graph.get_level_array(node_ref); if (level < levels.size()) { - valid.push_back(docid); + valid.push_back(nodeid); } } } @@ -411,13 +411,14 @@ HnswIndex::filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neig void HnswIndex::internal_complete_add(uint32_t docid, PreparedAddDoc &op) { - auto node_ref = _graph.make_node_for_document(docid, op.max_level + 1); + auto nodeid = get_nodeid(docid); + auto node_ref = _graph.make_node(nodeid, op.max_level + 1); for (int level = 0; level <= op.max_level; ++level) { - auto neighbors = filter_valid_docids(level, op.connections[level], docid); - connect_new_node(docid, neighbors, level); + auto neighbors = filter_valid_nodeids(level, op.connections[level], nodeid); + connect_new_node(nodeid, neighbors, level); } if (op.max_level > get_entry_level()) { - _graph.set_entry_node({docid, node_ref, op.max_level}); + _graph.set_entry_node({nodeid, node_ref, op.max_level}); } } @@ -480,19 +481,19 @@ HnswIndex::mutual_reconnect(const LinkArrayRef &cluster, uint32_t level) } void -HnswIndex::remove_document(uint32_t docid) +HnswIndex::remove_node(uint32_t nodeid) { - bool need_new_entrypoint = (docid == get_entry_docid()); - LevelArrayRef node_levels = _graph.get_level_array(docid); + bool need_new_entrypoint = (nodeid == get_entry_nodeid()); + LevelArrayRef node_levels = _graph.get_level_array(nodeid); for (int level = node_levels.size(); level-- > 0; ) { - LinkArrayRef my_links = _graph.get_link_array(docid, level); + LinkArrayRef my_links = _graph.get_link_array(nodeid, level); for (uint32_t neighbor_id : my_links) { if (need_new_entrypoint) { auto entry_node_ref = _graph.get_node_ref(neighbor_id); _graph.set_entry_node({neighbor_id, entry_node_ref, level}); need_new_entrypoint = false; } - remove_link_to(neighbor_id, docid, level); + remove_link_to(neighbor_id, nodeid, level); } mutual_reconnect(my_links, level); } @@ -500,7 +501,14 @@ HnswIndex::remove_document(uint32_t docid) HnswGraph::EntryNode entry; _graph.set_entry_node(entry); } - _graph.remove_node_for_document(docid); + _graph.remove_node(nodeid); +} + +void +HnswIndex::remove_document(uint32_t docid) +{ + auto nodeid = get_nodeid(docid); + remove_node(nodeid); } void @@ -525,8 +533,8 @@ void HnswIndex::compact_level_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) { auto context = _graph.nodes.compactWorst(compaction_spec, compaction_strategy); - uint32_t doc_id_limit = _graph.node_refs.size(); - vespalib::ArrayRef<AtomicEntryRef> refs(&_graph.node_refs[0], doc_id_limit); + uint32_t nodeid_limit = _graph.node_refs.size(); + vespalib::ArrayRef<AtomicEntryRef> refs(&_graph.node_refs[0], nodeid_limit); context->compact(refs); } @@ -534,9 +542,9 @@ void HnswIndex::compact_link_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) { auto context = _graph.links.compactWorst(compaction_spec, compaction_strategy); - uint32_t doc_id_limit = _graph.node_refs.size(); - for (uint32_t doc_id = 1; doc_id < doc_id_limit; ++doc_id) { - EntryRef level_ref = _graph.get_node_ref(doc_id); + uint32_t nodeid_limit = _graph.node_refs.size(); + for (uint32_t nodeid = 1; nodeid < nodeid_limit; ++nodeid) { + EntryRef level_ref = _graph.get_node_ref(nodeid); if (level_ref.valid()) { vespalib::ArrayRef<AtomicEntryRef> refs(_graph.nodes.get_writable(level_ref)); context->compact(refs); @@ -640,7 +648,7 @@ HnswIndex::get_state(const vespalib::slime::Inserter& inserter) const object.setLong("unreachable_nodes_incomplete_count", unreachable); } auto entry_node = _graph.get_entry_node(); - object.setLong("entry_docid", entry_node.docid); + object.setLong("entry_nodeid", entry_node.nodeid); object.setLong("entry_level", entry_node.level); auto& cfgObj = object.setObject("cfg"); cfgObj.setLong("max_links_at_level_0", _cfg.max_links_at_level_0()); @@ -670,7 +678,7 @@ HnswIndex::make_saver() const std::unique_ptr<NearestNeighborIndexLoader> HnswIndex::make_loader(FastOS_FileInterface& file) { - assert(get_entry_docid() == 0); // cannot load after index has data + assert(get_entry_nodeid() == 0); // cannot load after index has data using ReaderType = FileReader<uint32_t>; using LoaderType = HnswIndexLoader<ReaderType>; return std::make_unique<LoaderType>(_graph, std::make_unique<ReaderType>(&file)); @@ -697,7 +705,7 @@ HnswIndex::top_k_by_docid(uint32_t k, TypedCells vector, result.reserve(candidates.size()); for (const HnswCandidate & hit : candidates.peek()) { if (hit.distance > distance_threshold) continue; - result.emplace_back(hit.docid, hit.distance); + result.emplace_back(get_docid(hit.nodeid), hit.distance); } std::sort(result.begin(), result.end(), NeighborsByDocId()); return result; @@ -723,14 +731,14 @@ HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k, const GlobalFi { FurthestPriQ best_neighbors; auto entry = _graph.get_entry_node(); - if (entry.docid == 0) { + if (entry.nodeid == 0) { // graph has no entry point return best_neighbors; } int search_level = entry.level; - double entry_dist = calc_distance(vector, entry.docid); + double entry_dist = calc_distance(vector, entry.nodeid); // TODO: check if entry docid/node_ref is still valid here - HnswCandidate entry_point(entry.docid, entry.node_ref, entry_dist); + HnswCandidate entry_point(entry.nodeid, entry.node_ref, entry_dist); while (search_level > 0) { entry_point = find_nearest_in_layer(vector, entry_point, search_level); --search_level; @@ -741,9 +749,9 @@ HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k, const GlobalFi } HnswNode -HnswIndex::get_node(uint32_t docid) const +HnswIndex::get_node(uint32_t nodeid) const { - auto node_ref = _graph.acquire_node_ref(docid); + auto node_ref = _graph.acquire_node_ref(nodeid); if (!node_ref.valid()) { return HnswNode(); } @@ -759,17 +767,17 @@ HnswIndex::get_node(uint32_t docid) const } void -HnswIndex::set_node(uint32_t docid, const HnswNode &node) +HnswIndex::set_node(uint32_t nodeid, const HnswNode &node) { size_t num_levels = node.size(); assert(num_levels > 0); - auto node_ref = _graph.make_node_for_document(docid, num_levels); + auto node_ref = _graph.make_node(nodeid, num_levels); for (size_t level = 0; level < num_levels; ++level) { - connect_new_node(docid, node.level(level), level); + connect_new_node(nodeid, node.level(level), level); } int max_level = num_levels - 1; if (get_entry_level() < max_level) { - _graph.set_entry_node({docid, node_ref, max_level}); + _graph.set_entry_node({nodeid, node_ref, max_level}); } } @@ -777,20 +785,20 @@ bool HnswIndex::check_link_symmetry() const { bool all_sym = true; - size_t doc_id_limit = _graph.size(); - for (size_t docid = 0; docid < doc_id_limit; ++docid) { - auto node_ref = _graph.acquire_node_ref(docid); + size_t nodeid_limit = _graph.size(); + for (size_t nodeid = 0; nodeid < nodeid_limit; ++nodeid) { + auto node_ref = _graph.acquire_node_ref(nodeid); if (node_ref.valid()) { auto levels = _graph.nodes.get(node_ref); uint32_t level = 0; for (const auto& links_ref : levels) { auto links = _graph.links.get(links_ref.load_acquire()); - for (auto neighbor_docid : links) { - auto neighbor_links = _graph.acquire_link_array(neighbor_docid, level); - if (! has_link_to(neighbor_links, docid)) { + for (auto neighbor_nodeid : links) { + auto neighbor_links = _graph.acquire_link_array(neighbor_nodeid, level); + if (! has_link_to(neighbor_links, nodeid)) { all_sym = false; - LOG(warning, "check_link_symmetry: docid %zu links to %u on level %u, but no backlink", - docid, neighbor_docid, level); + LOG(warning, "check_link_symmetry: nodeid %zu links to %u on level %u, but no backlink", + nodeid, neighbor_nodeid, level); } } ++level; @@ -810,9 +818,9 @@ HnswIndex::count_reachable_nodes() const } std::vector<bool> visited(_graph.size()); LinkArray found_links; - if (entry.docid < visited.size()) { - found_links.push_back(entry.docid); - visited[entry.docid] = true; + if (entry.nodeid < visited.size()) { + found_links.push_back(entry.nodeid); + visited[entry.nodeid] = true; } vespalib::steady_time doom = vespalib::steady_clock::now() + MAX_COUNT_DURATION; while (search_level >= 0) { @@ -820,9 +828,9 @@ HnswIndex::count_reachable_nodes() const if (vespalib::steady_clock::now() > doom) { return {found_links.size(), false}; } - uint32_t docid = found_links[idx]; - if (docid < visited.size()) { - auto neighbors = _graph.acquire_link_array(docid, search_level); + uint32_t nodeid = found_links[idx]; + if (nodeid < visited.size()) { + auto neighbors = _graph.acquire_link_array(nodeid, search_level); for (uint32_t neighbor : neighbors) { if (neighbor >= visited.size() || visited[neighbor]) { continue; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 43e3001b496..1833eec3909 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -80,6 +80,9 @@ public: CompactionSpec link_arrays() const noexcept { return _link_arrays; } }; + // Stubs for mapping between docid and nodeid. + static uint32_t get_docid(uint32_t nodeid) { return nodeid; } + static uint32_t get_nodeid(uint32_t docid) { return docid; } protected: using AtomicEntryRef = HnswGraph::AtomicEntryRef; using NodeStore = HnswGraph::NodeStore; @@ -100,10 +103,10 @@ protected: HnswIndexCompactionSpec _compaction_spec; uint32_t max_links_for_level(uint32_t level) const; - void add_link_to(uint32_t docid, uint32_t level, const LinkArrayRef& old_links, uint32_t new_link) { + void add_link_to(uint32_t nodeid, uint32_t level, const LinkArrayRef& old_links, uint32_t new_link) { LinkArray new_links(old_links.begin(), old_links.end()); new_links.push_back(new_link); - _graph.set_link_array(docid, level, new_links); + _graph.set_link_array(nodeid, level, new_links); } /** @@ -122,18 +125,22 @@ protected: SelectResult select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint32_t max_links) const; SelectResult select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const; SelectResult select_neighbors(const HnswCandidateVector& neighbors, uint32_t max_links) const; - void shrink_if_needed(uint32_t docid, uint32_t level); - void connect_new_node(uint32_t docid, const LinkArrayRef &neighbors, uint32_t level); + void shrink_if_needed(uint32_t nodeid, uint32_t level); + void connect_new_node(uint32_t nodeid, const LinkArrayRef &neighbors, uint32_t level); void mutual_reconnect(const LinkArrayRef &cluster, uint32_t level); void remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t level); - inline TypedCells get_vector(uint32_t docid) const { + inline TypedCells get_vector(uint32_t nodeid) const { + uint32_t docid = get_docid(nodeid); + return _vectors.get_vector(docid); + } + inline TypedCells get_vector_by_docid(uint32_t docid) const { return _vectors.get_vector(docid); } - double calc_distance(uint32_t lhs_docid, uint32_t rhs_docid) const; - double calc_distance(const TypedCells& lhs, uint32_t rhs_docid) const; - uint32_t estimate_visited_nodes(uint32_t level, uint32_t doc_id_limit, uint32_t neighbors_to_find, const GlobalFilter* filter) const; + double calc_distance(uint32_t lhs_nodeid, uint32_t rhs_nodeid) const; + double calc_distance(const TypedCells& lhs, uint32_t rhs_nodeid) const; + uint32_t estimate_visited_nodes(uint32_t level, uint32_t nodeid_limit, uint32_t neighbors_to_find, const GlobalFilter* filter) const; /** * Performs a greedy search in the given layer to find the candidate that is nearest the input vector. @@ -142,7 +149,7 @@ protected: template <class VisitedTracker> void search_layer_helper(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors, uint32_t level, const GlobalFilter *filter, - uint32_t doc_id_limit, + uint32_t nodeid_limit, uint32_t estimated_visited_nodes) const; void search_layer(const TypedCells& input, uint32_t neighbors_to_find, FurthestPriQ& found_neighbors, uint32_t level, const GlobalFilter *filter = nullptr) const; @@ -169,7 +176,7 @@ protected: }; PreparedAddDoc internal_prepare_add(uint32_t docid, TypedCells input_vector, vespalib::GenerationHandler::Guard read_guard) const; - LinkArray filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t me); + LinkArray filter_valid_nodeids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t self_nodeid); void internal_complete_add(uint32_t docid, PreparedAddDoc &op); public: HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distance_func, @@ -184,6 +191,7 @@ public: TypedCells vector, vespalib::GenerationHandler::Guard read_guard) const override; void complete_add_document(uint32_t docid, std::unique_ptr<PrepareResult> prepare_result) override; + void remove_node(uint32_t nodeid); void remove_document(uint32_t docid) override; void assign_generation(generation_t current_gen) override; void reclaim_memory(generation_t oldest_used_gen) override; @@ -210,12 +218,12 @@ public: FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k, const GlobalFilter *filter) const; - uint32_t get_entry_docid() const { return _graph.get_entry_node().docid; } + uint32_t get_entry_nodeid() const { return _graph.get_entry_node().nodeid; } int32_t get_entry_level() const { return _graph.get_entry_node().level; } // Should only be used by unit tests. - HnswNode get_node(uint32_t docid) const; - void set_node(uint32_t docid, const HnswNode &node); + HnswNode get_node(uint32_t nodeid) const; + void set_node(uint32_t nodeid, const HnswNode &node); bool check_link_symmetry() const; std::pair<uint32_t, bool> count_reachable_nodes() const; HnswGraph& get_graph() { return _graph; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h index b65ae22ed47..5d40893f215 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.h @@ -22,10 +22,10 @@ class HnswIndexLoader : public NearestNeighborIndexLoader { private: HnswGraph& _graph; std::unique_ptr<ReaderType> _reader; - uint32_t _entry_docid; + uint32_t _entry_nodeid; int32_t _entry_level; uint32_t _num_nodes; - uint32_t _docid; + uint32_t _nodeid; std::vector<uint32_t> _link_array; bool _complete; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp index 4fd75bb2fec..1c38e7d7936 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp @@ -13,7 +13,7 @@ template <typename ReaderType> void HnswIndexLoader<ReaderType>::init() { - _entry_docid = next_int(); + _entry_nodeid = next_int(); _entry_level = next_int(); _num_nodes = next_int(); } @@ -25,10 +25,10 @@ template <typename ReaderType> HnswIndexLoader<ReaderType>::HnswIndexLoader(HnswGraph& graph, std::unique_ptr<ReaderType> reader) : _graph(graph), _reader(std::move(reader)), - _entry_docid(0), + _entry_nodeid(0), _entry_level(0), _num_nodes(0), - _docid(0), + _nodeid(0), _link_array(), _complete(false) { @@ -40,28 +40,28 @@ bool HnswIndexLoader<ReaderType>::load_next() { assert(!_complete); - if (_docid < _num_nodes) { + if (_nodeid < _num_nodes) { uint32_t num_levels = next_int(); if (num_levels > 0) { - _graph.make_node_for_document(_docid, num_levels); + _graph.make_node(_nodeid, num_levels); for (uint32_t level = 0; level < num_levels; ++level) { uint32_t num_links = next_int(); _link_array.clear(); while (num_links-- > 0) { _link_array.push_back(next_int()); } - _graph.set_link_array(_docid, level, _link_array); + _graph.set_link_array(_nodeid, level, _link_array); } } } - if (++_docid < _num_nodes) { + if (++_nodeid < _num_nodes) { return true; } else { _graph.node_refs.ensure_size(std::max(_num_nodes, 1u)); _graph.node_refs_size.store(std::max(_num_nodes, 1u), std::memory_order_release); _graph.trim_node_refs_size(); - auto entry_node_ref = _graph.get_node_ref(_entry_docid); - _graph.set_entry_node({_entry_docid, entry_node_ref, _entry_level}); + auto entry_node_ref = _graph.get_node_ref(_entry_nodeid); + _graph.set_entry_node({_entry_nodeid, entry_node_ref, _entry_level}); _complete = true; return false; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp index 29218b47f53..2784dd5af28 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp @@ -26,7 +26,7 @@ count_valid_link_arrays(const HnswGraph & graph) { } HnswIndexSaver::MetaData::MetaData() - : entry_docid(0), + : entry_nodeid(0), entry_level(-1), refs(), nodes() @@ -38,7 +38,7 @@ HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph) : _graph_links(graph.links), _meta_data() { auto entry = graph.get_entry_node(); - _meta_data.entry_docid = entry.docid; + _meta_data.entry_nodeid = entry.nodeid; _meta_data.entry_level = entry.level; size_t num_nodes = graph.node_refs.get_size(); // Called from writer only assert (num_nodes <= (std::numeric_limits<uint32_t>::max() - 1)); @@ -62,7 +62,7 @@ HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph) void HnswIndexSaver::save(BufferWriter& writer) const { - writer.write(&_meta_data.entry_docid, sizeof(uint32_t)); + writer.write(&_meta_data.entry_nodeid, sizeof(uint32_t)); writer.write(&_meta_data.entry_level, sizeof(int32_t)); uint32_t num_nodes = _meta_data.nodes.size() - 1; writer.write(&num_nodes, sizeof(uint32_t)); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h index e170a4a894f..11a1f993c4e 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.h @@ -25,7 +25,7 @@ public: private: struct MetaData { using EntryRef = vespalib::datastore::EntryRef; - uint32_t entry_docid; + uint32_t entry_nodeid; int32_t entry_level; std::vector<EntryRef, vespalib::allocator_large<EntryRef>> refs; std::vector<uint32_t, vespalib::allocator_large<uint32_t>> nodes; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h index abb7ecc199a..66255c3f797 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h @@ -13,13 +13,13 @@ namespace search::tensor { * Represents a candidate node with its distance to another point in space. */ struct HnswCandidate { - uint32_t docid; + uint32_t nodeid; HnswGraph::NodeRef node_ref; double distance; - HnswCandidate(uint32_t docid_in, double distance_in) noexcept - : docid(docid_in), node_ref(), distance(distance_in) {} - HnswCandidate(uint32_t docid_in, HnswGraph::NodeRef node_ref_in, double distance_in) noexcept - : docid(docid_in), node_ref(node_ref_in), distance(distance_in) {} + HnswCandidate(uint32_t nodeid_in, double distance_in) noexcept + : nodeid(nodeid_in), node_ref(), distance(distance_in) {} + HnswCandidate(uint32_t nodeid_in, HnswGraph::NodeRef node_ref_in, double distance_in) noexcept + : nodeid(nodeid_in), node_ref(node_ref_in), distance(distance_in) {} }; struct GreaterDistance { |