aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-03-23 20:27:18 +0100
committerGitHub <noreply@github.com>2022-03-23 20:27:18 +0100
commitb07fc575ba690710f6524004848e5fc01b3a3152 (patch)
tree4a0139a4a5fdf122afa741af3eaef124cfdf9453
parentb8f4f354f51d4104c2ed0b87158414e22c3014db (diff)
parent2188d3b1b8e15d3625e480fddeb7afd7001e42d4 (diff)
Merge pull request #21766 from vespa-engine/toregge/add-acquire-node-ref-to-hnsw-graph
Add acquire_node_ref() member function to HnswGraph.
-rw-r--r--searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.h22
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp37
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp6
5 files changed, 56 insertions, 35 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
index 43ff6f5b83b..add7801e03a 100644
--- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
@@ -89,13 +89,13 @@ public:
HnswGraph copy;
void expect_empty_d(uint32_t docid) const {
- EXPECT_FALSE(copy.node_refs[docid].load_acquire().valid());
+ EXPECT_FALSE(copy.acquire_node_ref(docid).valid());
}
void expect_level_0(uint32_t docid, const V& exp_links) const {
- auto levels = copy.get_level_array(docid);
+ auto levels = copy.acquire_level_array(docid);
EXPECT_GE(levels.size(), 1);
- auto links = copy.get_link_array(docid, 0);
+ auto links = copy.acquire_link_array(docid, 0);
EXPECT_EQ(exp_links.size(), links.size());
for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) {
EXPECT_EQ(exp_links[i], links[i]);
@@ -103,9 +103,9 @@ public:
}
void expect_level_1(uint32_t docid, const V& exp_links) const {
- auto levels = copy.get_level_array(docid);
+ auto levels = copy.acquire_level_array(docid);
EXPECT_EQ(2, levels.size());
- auto links = copy.get_link_array(docid, 1);
+ auto links = copy.acquire_link_array(docid, 1);
EXPECT_EQ(exp_links.size(), links.size());
for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) {
EXPECT_EQ(exp_links[i], links[i]);
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
index 8beb111ca59..3049b643709 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
@@ -26,7 +26,7 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels)
{
node_refs.ensure_size(docid + 1, AtomicEntryRef());
// A document cannot be added twice.
- assert(!node_refs[docid].load_acquire().valid());
+ assert(!get_node_ref(docid).valid());
// Note: The level array instance lives as long as the document is present in the index.
vespalib::Array<AtomicEntryRef> levels(num_levels, AtomicEntryRef());
auto node_ref = nodes.add(levels);
@@ -40,7 +40,7 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels)
void
HnswGraph::remove_node_for_document(uint32_t docid)
{
- auto node_ref = node_refs[docid].load_acquire();
+ auto node_ref = get_node_ref(docid);
assert(node_ref.valid());
auto levels = nodes.get(node_ref);
vespalib::datastore::EntryRef invalid;
@@ -48,7 +48,7 @@ HnswGraph::remove_node_for_document(uint32_t docid)
// Ensure data referenced through the old ref can be recycled:
nodes.remove(node_ref);
for (size_t i = 0; i < levels.size(); ++i) {
- auto old_links_ref = levels[i].load_acquire();
+ auto old_links_ref = levels[i].load_relaxed();
links.remove(old_links_ref);
}
if (docid + 1 == node_refs_size.load(std::memory_order_relaxed)) {
@@ -60,7 +60,7 @@ void
HnswGraph::trim_node_refs_size()
{
uint32_t check_doc_id = node_refs_size.load(std::memory_order_relaxed) - 1;
- while (check_doc_id > 0u && !node_refs[check_doc_id].load_relaxed().valid()) {
+ while (check_doc_id > 0u && !get_node_ref(check_doc_id).valid()) {
--check_doc_id;
}
node_refs_size.store(check_doc_id + 1, std::memory_order_release);
@@ -70,11 +70,11 @@ void
HnswGraph::set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& new_links)
{
auto new_links_ref = links.add(new_links);
- auto node_ref = node_refs[docid].load_acquire();
+ auto node_ref = get_node_ref(docid);
assert(node_ref.valid());
auto levels = nodes.get_writable(node_ref);
assert(level < levels.size());
- auto old_links_ref = levels[level].load_acquire();
+ auto old_links_ref = levels[level].load_relaxed();
levels[level].store_release(new_links_ref);
links.remove(old_links_ref);
}
@@ -83,9 +83,9 @@ HnswGraph::Histograms
HnswGraph::histograms() const
{
Histograms result;
- size_t num_nodes = node_refs.size();
+ size_t num_nodes = node_refs_size.load(std::memory_order_acquire);
for (size_t i = 0; i < num_nodes; ++i) {
- auto node_ref = node_refs[i].load_acquire();
+ auto node_ref = acquire_node_ref(i);
if (node_ref.valid()) {
uint32_t levels = 0;
uint32_t l0links = 0;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
index 5d6cff73102..726abb8141d 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
@@ -56,11 +56,15 @@ struct HnswGraph {
void trim_node_refs_size();
NodeRef get_node_ref(uint32_t docid) const {
- return node_refs[docid].load_acquire();
+ return node_refs[docid].load_relaxed();
+ }
+
+ NodeRef acquire_node_ref(uint32_t docid) const {
+ return node_refs.acquire_elem_ref(docid).load_acquire();
}
bool still_valid(uint32_t docid, NodeRef node_ref) const {
- return node_ref.valid() && (get_node_ref(docid) == node_ref);
+ return node_ref.valid() && (acquire_node_ref(docid) == node_ref);
}
LevelArrayRef get_level_array(NodeRef node_ref) const {
@@ -75,6 +79,11 @@ struct HnswGraph {
return get_level_array(node_ref);
}
+ LevelArrayRef acquire_level_array(uint32_t docid) const {
+ auto node_ref = acquire_node_ref(docid);
+ return get_level_array(node_ref);
+ }
+
LinkArrayRef get_link_array(LevelArrayRef levels, uint32_t level) const {
if (level < levels.size()) {
auto links_ref = levels[level].load_acquire();
@@ -90,6 +99,11 @@ struct HnswGraph {
return get_link_array(levels, level);
}
+ LinkArrayRef acquire_link_array(uint32_t docid, uint32_t level) const {
+ auto levels = acquire_level_array(docid);
+ return get_link_array(levels, level);
+ }
+
LinkArrayRef get_link_array(NodeRef node_ref, uint32_t level) const {
auto levels = get_level_array(node_ref);
return get_link_array(levels, level);
@@ -124,7 +138,7 @@ struct HnswGraph {
while (true) {
uint64_t value = get_entry_atomic();
entry.docid = (uint32_t)value;
- entry.node_ref = get_node_ref(entry.docid);
+ entry.node_ref = acquire_node_ref(entry.docid);
entry.level = (int32_t)(value >> 32);
if ((entry.docid == 0)
&& (entry.level == -1)
@@ -144,7 +158,7 @@ struct HnswGraph {
}
}
- size_t size() const { return node_refs.size(); }
+ size_t size() const { return node_refs_size.load(std::memory_order_acquire); }
struct Histograms {
std::vector<uint32_t> level_histogram;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index c99e059815b..5ae26757b0d 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -245,7 +245,7 @@ HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& e
while (keep_searching) {
keep_searching = false;
for (uint32_t neighbor_docid : _graph.get_link_array(nearest.node_ref, level)) {
- auto neighbor_ref = _graph.get_node_ref(neighbor_docid);
+ auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid);
double dist = calc_distance(input, neighbor_docid);
if (_graph.still_valid(neighbor_docid, neighbor_ref)
&& dist < nearest.distance)
@@ -289,7 +289,7 @@ HnswIndex::search_layer_helper(const TypedCells& input, uint32_t neighbors_to_fi
if (neighbor_docid >= doc_id_limit) {
continue;
}
- auto neighbor_ref = _graph.get_node_ref(neighbor_docid);
+ auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid);
if ((! neighbor_ref.valid())
|| ! visited.try_mark(neighbor_docid))
{
@@ -433,7 +433,7 @@ HnswIndex::prepare_add_document(uint32_t docid,
TypedCells vector,
vespalib::GenerationHandler::Guard read_guard) const
{
- uint32_t max_nodes = _graph.node_refs.size();
+ uint32_t max_nodes = _graph.node_refs_size.load(std::memory_order_acquire);
if (max_nodes < _cfg.min_size_before_two_phase()) {
// the first documents added will do all work in write thread
// to ensure they are linked together:
@@ -543,7 +543,7 @@ HnswIndex::compact_link_arrays(CompactionSpec compaction_spec, const CompactionS
auto context = _graph.links.compactWorst(compaction_spec, compaction_strategy);
uint32_t doc_id_limit = _graph.node_refs.size();
for (uint32_t doc_id = 1; doc_id < doc_id_limit; ++doc_id) {
- EntryRef level_ref = _graph.node_refs[doc_id].load_relaxed();
+ EntryRef level_ref = _graph.get_node_ref(doc_id);
if (level_ref.valid()) {
vespalib::ArrayRef<AtomicEntryRef> refs(_graph.nodes.get_writable(level_ref));
context->compact(refs);
@@ -756,7 +756,7 @@ HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k, const BitVecto
HnswNode
HnswIndex::get_node(uint32_t docid) const
{
- auto node_ref = _graph.node_refs[docid].load_acquire();
+ auto node_ref = _graph.acquire_node_ref(docid);
if (!node_ref.valid()) {
return HnswNode();
}
@@ -790,15 +790,16 @@ bool
HnswIndex::check_link_symmetry() const
{
bool all_sym = true;
- for (size_t docid = 0; docid < _graph.node_refs.size(); ++docid) {
- auto node_ref = _graph.node_refs[docid].load_acquire();
+ size_t doc_id_limit = _graph.size();
+ for (size_t docid = 0; docid < doc_id_limit; ++docid) {
+ auto node_ref = _graph.acquire_node_ref(docid);
if (node_ref.valid()) {
auto levels = _graph.nodes.get(node_ref);
uint32_t level = 0;
for (const auto& links_ref : levels) {
auto links = _graph.links.get(links_ref.load_acquire());
for (auto neighbor_docid : links) {
- auto neighbor_links = _graph.get_link_array(neighbor_docid, level);
+ auto neighbor_links = _graph.acquire_link_array(neighbor_docid, level);
if (! has_link_to(neighbor_links, docid)) {
all_sym = false;
LOG(warning, "check_link_symmetry: docid %zu links to %u on level %u, but no backlink",
@@ -822,8 +823,10 @@ HnswIndex::count_reachable_nodes() const
}
std::vector<bool> visited(_graph.size());
LinkArray found_links;
- found_links.push_back(entry.docid);
- visited[entry.docid] = true;
+ if (entry.docid < visited.size()) {
+ found_links.push_back(entry.docid);
+ visited[entry.docid] = true;
+ }
vespalib::steady_time doom = vespalib::steady_clock::now() + MAX_COUNT_DURATION;
while (search_level >= 0) {
for (uint32_t idx = 0; idx < found_links.size(); ++idx) {
@@ -831,11 +834,15 @@ HnswIndex::count_reachable_nodes() const
return {found_links.size(), false};
}
uint32_t docid = found_links[idx];
- auto neighbors = _graph.get_link_array(docid, search_level);
- for (uint32_t neighbor : neighbors) {
- if (visited[neighbor]) continue;
- visited[neighbor] = true;
- found_links.push_back(neighbor);
+ if (docid < visited.size()) {
+ auto neighbors = _graph.acquire_link_array(docid, search_level);
+ for (uint32_t neighbor : neighbors) {
+ if (neighbor >= visited.size() || visited[neighbor]) {
+ continue;
+ }
+ visited[neighbor] = true;
+ found_links.push_back(neighbor);
+ }
}
}
--search_level;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
index 2eb9f7d4acd..cc2ecd584e1 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
@@ -14,7 +14,7 @@ count_valid_link_arrays(const HnswGraph & graph) {
size_t count(0);
size_t num_nodes = graph.node_refs.size();
for (size_t i = 0; i < num_nodes; ++i) {
- auto node_ref = graph.node_refs[i].load_acquire();
+ auto node_ref = graph.get_node_ref(i);
if (node_ref.valid()) {
count += graph.nodes.get(node_ref).size();
}
@@ -47,11 +47,11 @@ HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph)
_meta_data.nodes.reserve(num_nodes+1);
for (size_t i = 0; i < num_nodes; ++i) {
_meta_data.nodes.push_back(_meta_data.refs.size());
- auto node_ref = graph.node_refs[i].load_acquire();
+ auto node_ref = graph.get_node_ref(i);
if (node_ref.valid()) {
auto levels = graph.nodes.get(node_ref);
for (const auto& links_ref : levels) {
- _meta_data.refs.push_back(links_ref.load_acquire());
+ _meta_data.refs.push_back(links_ref.load_relaxed());
}
}
}