diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-06-09 08:09:37 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-06-09 08:14:58 +0000 |
commit | f9f0e7a1f555b092739a1b30704bf7550348c129 (patch) | |
tree | fb675999ad47e16815baa59a2338c974ade67a82 /searchlib | |
parent | 66c66aa167c2ba431943ad7287da3f20b11a05ab (diff) |
store entry docid/level in an atomic value
Diffstat (limited to 'searchlib')
7 files changed, 81 insertions, 38 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp index 7aa78fbe06b..b0a82abec25 100644 --- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp @@ -49,7 +49,10 @@ void populate(HnswGraph &graph) { graph.set_link_array(6, 0, V{1, 2, 4}); graph.set_link_array(2, 1, V{4}); graph.set_link_array(4, 1, V{2}); - graph.set_entry_node(2, 1); + HnswGraph::EntryNode entry; + entry.docid = 2; + entry.level = 1; + graph.set_entry_node(entry); } void modify(HnswGraph &graph) { @@ -63,7 +66,10 @@ void modify(HnswGraph &graph) { graph.set_link_array(4, 1, V{7}); graph.set_link_array(7, 1, V{4}); - graph.set_entry_node(4, 1); + HnswGraph::EntryNode entry; + entry.docid = 4; + entry.level = 1; + graph.set_entry_node(entry); } @@ -110,8 +116,9 @@ public: void expect_copy_as_populated() const { EXPECT_EQ(copy.size(), 7); - EXPECT_EQ(copy.entry_docid, 2); - EXPECT_EQ(copy.entry_level, 1); + auto entry = copy.get_entry_node(); + EXPECT_EQ(entry.docid, 2); + EXPECT_EQ(entry.level, 1); expect_empty_d(0); expect_empty_d(3); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp index 0f58a72e794..30796b8f2a9 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp @@ -11,9 +11,13 @@ HnswGraph::HnswGraph() : node_refs(), nodes(HnswIndex::make_default_node_store_config()), links(HnswIndex::make_default_link_store_config()), - entry_docid(0), // Note that docid 0 is reserved and never used - entry_level(-1) -{} + entry_docid_and_level() +{ + EntryNode entry; + entry.docid = 0; // Note that docid 0 is reserved and never used + entry.level = -1; + set_entry_node(entry); +} HnswGraph::~HnswGraph() {} diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h index d1d308def99..9567d6e7d03 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h @@ -38,8 +38,8 @@ struct HnswGraph { NodeRefVector node_refs; NodeStore nodes; LinkStore links; - uint32_t entry_docid; - int32_t entry_level; + + std::atomic<uint64_t> entry_docid_and_level; HnswGraph(); @@ -63,9 +63,25 @@ struct HnswGraph { void set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& new_links); - void set_entry_node(uint32_t docid, int32_t level) { - entry_docid = docid; - entry_level = level; + struct EntryNode { + uint32_t docid; + int32_t level; + EntryNode() : docid(0), level(-1) {} + }; + + void set_entry_node(EntryNode node) { + uint64_t value = node.level; + value <<= 32; + value |= node.docid; + entry_docid_and_level.store(value, std::memory_order_release); + } + + EntryNode get_entry_node() const { + EntryNode entry; + uint64_t value = entry_docid_and_level.load(std::memory_order_acquire); + entry.docid = (uint32_t)value; + entry.level = (int32_t)(value >> 32); + return entry; } size_t size() const { return node_refs.size(); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 904754d6d9f..2b15b7b0e5f 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -276,15 +276,17 @@ HnswIndex::add_document(uint32_t docid) // TODO: Add capping on num_levels int level = _level_generator->max_level(); _graph.make_node_for_document(docid, level + 1); - uint32_t entry_docid = get_entry_docid(); - if (entry_docid == 0) { - _graph.set_entry_node(docid, level); + auto entry = _graph.get_entry_node(); + if (entry.docid == 0) { + entry.docid = docid; + entry.level = level; + _graph.set_entry_node(entry); return; } - int search_level = get_entry_level(); - double entry_dist = calc_distance(input, entry_docid); - HnswCandidate entry_point(entry_docid, entry_dist); + int search_level = entry.level; + double entry_dist = calc_distance(input, entry.docid); + HnswCandidate entry_point(entry.docid, entry_dist); while (search_level > level) { entry_point = find_nearest_in_layer(input, entry_point, search_level); --search_level; @@ -303,7 +305,9 @@ HnswIndex::add_document(uint32_t docid) --search_level; } if (level > get_entry_level()) { - _graph.set_entry_node(docid, level); + entry.docid = docid; + entry.level = level; + _graph.set_entry_node(entry); } } @@ -343,7 +347,10 @@ HnswIndex::remove_document(uint32_t docid) LinkArrayRef my_links = _graph.get_link_array(docid, level); for (uint32_t neighbor_id : my_links) { if (need_new_entrypoint) { - _graph.set_entry_node(neighbor_id, level); + HnswGraph::EntryNode entry; + entry.docid = neighbor_id; + entry.level = level; + _graph.set_entry_node(entry); need_new_entrypoint = false; } remove_link_to(neighbor_id, docid, level); @@ -352,7 +359,8 @@ HnswIndex::remove_document(uint32_t docid) _graph.set_link_array(docid, level, empty); } if (need_new_entrypoint) { - _graph.set_entry_node(0, -1); + HnswGraph::EntryNode entry; + _graph.set_entry_node(entry); } _graph.remove_node_for_document(docid); } @@ -407,8 +415,9 @@ HnswIndex::get_state(const vespalib::slime::Inserter& inserter) const uint32_t reachable = count_reachable_nodes(); uint32_t unreachable = valid_nodes - reachable; object.setLong("unreachable_nodes", unreachable); - object.setLong("entry_docid", _graph.entry_docid); - object.setLong("entry_level", _graph.entry_level); + auto entry_node = _graph.get_entry_node(); + object.setLong("entry_docid", entry_node.docid); + object.setLong("entry_level", entry_node.level); auto& cfgObj = object.setObject("cfg"); cfgObj.setLong("max_links_at_level_0", _cfg.max_links_at_level_0()); cfgObj.setLong("max_links_on_inserts", _cfg.max_links_on_inserts()); @@ -472,13 +481,13 @@ FurthestPriQ HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k, const BitVector *filter) const { FurthestPriQ best_neighbors; - if (get_entry_level() < 0) { + auto entry = _graph.get_entry_node(); + if (entry.level < 0) { return best_neighbors; } - uint32_t entry_docid = get_entry_docid(); - int search_level = get_entry_level(); - double entry_dist = calc_distance(vector, entry_docid); - HnswCandidate entry_point(entry_docid, entry_dist); + int search_level = entry.level; + double entry_dist = calc_distance(vector, entry.docid); + HnswCandidate entry_point(entry.docid, entry_dist); while (search_level > 0) { entry_point = find_nearest_in_layer(vector, entry_point, search_level); --search_level; @@ -517,7 +526,10 @@ HnswIndex::set_node(uint32_t docid, const HnswNode &node) } int max_level = num_levels - 1; if (get_entry_level() < max_level) { - _graph.set_entry_node(docid, max_level); + HnswGraph::EntryNode entry; + entry.docid = docid; + entry.level = max_level; + _graph.set_entry_node(entry); } } @@ -548,15 +560,15 @@ HnswIndex::check_link_symmetry() const uint32_t HnswIndex::count_reachable_nodes() const { - int search_level = get_entry_level(); + auto entry = _graph.get_entry_node(); + int search_level = entry.level; if (search_level < 0) { return 0; } auto visited = _visited_set_pool.get(_graph.size()); - uint32_t entry_id = get_entry_docid(); LinkArray found_links; - found_links.push_back(entry_id); - visited.mark(entry_id); + found_links.push_back(entry.docid); + visited.mark(entry.docid); while (search_level >= 0) { for (uint32_t idx = 0; idx < found_links.size(); ++idx) { uint32_t docid = found_links[idx]; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 6bae8bef4c5..7f2e0067036 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -145,8 +145,8 @@ public: FurthestPriQ top_k_candidates(const TypedCells &vector, uint32_t k, const BitVector *filter) const; - uint32_t get_entry_docid() const { return _graph.entry_docid; } - int32_t get_entry_level() const { return _graph.entry_level; } + uint32_t get_entry_docid() const { return _graph.get_entry_node().docid; } + int32_t get_entry_level() const { return _graph.get_entry_node().level; } // Should only be used by unit tests. HnswNode get_node(uint32_t docid) const; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp index f02ead86a8d..8d8b4204063 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp @@ -39,7 +39,10 @@ HnswIndexLoader::load(const fileutil::LoadedBuffer& buf) } if (_failed) return false; _graph.node_refs.ensure_size(num_nodes); - _graph.set_entry_node(entry_docid, entry_level); + HnswGraph::EntryNode entry; + entry.docid = entry_docid; + entry.level = entry_level; + _graph.set_entry_node(entry); return true; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp index 46a988d575e..6593a60d6b5 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp @@ -11,8 +11,9 @@ HnswIndexSaver::~HnswIndexSaver() {} HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph) : _graph_links(graph.links), _meta_data() { - _meta_data.entry_docid = graph.entry_docid; - _meta_data.entry_level = graph.entry_level; + auto entry = graph.get_entry_node(); + _meta_data.entry_docid = entry.docid; + _meta_data.entry_level = entry.level; size_t num_nodes = graph.node_refs.size(); _meta_data.nodes.reserve(num_nodes); for (size_t i = 0; i < num_nodes; ++i) { |