Add HnswSimpleNode.

author: Tor Egge <Tor.Egge@online.no> 2022-11-09 11:00:44 +0100
committer: Tor Egge <Tor.Egge@online.no> 2022-11-09 11:00:44 +0100
commit: 13be5ec166cf74c9cffb6a26d29aa112f4141a37 (patch)
tree: efea2ca5ac14fc6802f257ac8a351960907b43bb /searchlib
parent: 30a676fb89f28a618f0dc2c0752cde8c29bf320c (diff)
5 files changed, 62 insertions, 15 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
index 6d8b143cff9..d5312f4bd4c 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
@@ -2,6 +2,7 @@
 
 #include "hnsw_graph.h"
 #include "hnsw_index.h"
+#include <vespa/vespalib/util/rcuvector.hpp>
 #include <vespa/vespalib/datastore/array_store.hpp>
 
 namespace search::tensor {
@@ -13,7 +14,7 @@ HnswGraph::HnswGraph()
     links(HnswIndex::make_default_link_store_config(), {}),
     entry_nodeid_and_level()
 {
-    node_refs.ensure_size(1, AtomicEntryRef());
+    node_refs.ensure_size(1, NodeType());
     EntryNode entry;
     set_entry_node(entry);
 }
@@ -23,13 +24,13 @@ HnswGraph::~HnswGraph() = default;
 HnswGraph::NodeRef
 HnswGraph::make_node(uint32_t nodeid, uint32_t num_levels)
 {
-    node_refs.ensure_size(nodeid + 1, AtomicEntryRef());
+    node_refs.ensure_size(nodeid + 1, NodeType());
     // A document cannot be added twice.
     assert(!get_node_ref(nodeid).valid());
     // Note: The level array instance lives as long as the document is present in the index.
     std::vector<AtomicEntryRef> levels(num_levels, AtomicEntryRef());
     auto node_ref = nodes.add(levels);
-    node_refs[nodeid].store_release(node_ref);
+    node_refs[nodeid].ref().store_release(node_ref);
     if (nodeid >= node_refs_size.load(std::memory_order_relaxed)) {
         node_refs_size.store(nodeid + 1, std::memory_order_release);
     }
@@ -43,7 +44,7 @@ HnswGraph::remove_node(uint32_t nodeid)
     assert(node_ref.valid());
     auto levels = nodes.get(node_ref);
     vespalib::datastore::EntryRef invalid;
-    node_refs[nodeid].store_release(invalid);
+    node_refs[nodeid].ref().store_release(invalid);
     // Ensure data referenced through the old ref can be recycled:
     nodes.remove(node_ref);
     for (size_t i = 0; i < levels.size(); ++i) {
@@ -127,6 +128,7 @@ HnswGraph::set_entry_node(EntryNode node) {
 
 namespace vespalib {
 
-template class RcuVectorBase<vespalib::datastore::AtomicEntryRef>;
+template class RcuVectorBase<search::tensor::HnswSimpleNode>;
+template class RcuVector<search::tensor::HnswSimpleNode>;
 
 }
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
index 60ba7d55f7f..34fcbd9d721 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
@@ -2,6 +2,7 @@
 
 #pragma once
 
+#include "hnsw_simple_node.h"
 #include <vespa/vespalib/datastore/array_store.h>
 #include <vespa/vespalib/datastore/atomic_entry_ref.h>
 #include <vespa/vespalib/datastore/entryref.h>
@@ -23,9 +24,11 @@ struct HnswGraph {
     // This uses 12 bits for buffer id -> 4096 buffers.
     using LinkArrayEntryRefType = vespalib::datastore::EntryRefT<20>;
 
+    using NodeType = HnswSimpleNode;
+
     // Provides mapping from document id -> node reference.
     // The reference is used to lookup the node data in NodeStore.
-    using NodeRefVector = vespalib::RcuVector<AtomicEntryRef>;
+    using NodeRefVector = vespalib::RcuVector<NodeType>;
     using NodeRef = vespalib::datastore::EntryRef;
 
     // This stores the level arrays for all nodes.
@@ -55,11 +58,11 @@ struct HnswGraph {
     void trim_node_refs_size();
 
     NodeRef get_node_ref(uint32_t nodeid) const {
-        return node_refs.get_elem_ref(nodeid).load_relaxed(); // Called from writer only
+        return node_refs.get_elem_ref(nodeid).ref().load_relaxed(); // Called from writer only
     }
 
     NodeRef acquire_node_ref(uint32_t nodeid) const {
-        return node_refs.acquire_elem_ref(nodeid).load_acquire();
+        return node_refs.acquire_elem_ref(nodeid).ref().load_acquire();
     }
 
     bool still_valid(uint32_t nodeid, NodeRef node_ref) const {
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index e9ce77cc0d6..d9384249594 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -535,10 +535,18 @@ HnswIndex::reclaim_memory(generation_t oldest_used_gen)
 void
 HnswIndex::compact_level_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy)
 {
-    auto context = _graph.nodes.compactWorst(compaction_spec, compaction_strategy);
+    auto compacting_buffers = _graph.nodes.start_compact_worst_buffers(compaction_spec, compaction_strategy);
     uint32_t nodeid_limit = _graph.node_refs.size();
-    vespalib::ArrayRef<AtomicEntryRef> refs(&_graph.node_refs[0], nodeid_limit);
-    context->compact(refs);
+    auto filter = compacting_buffers->make_entry_ref_filter();
+    vespalib::ArrayRef<NodeType> refs(&_graph.node_refs[0], nodeid_limit);
+    for (auto& ref : refs) {
+        auto node_ref = ref.ref().load_relaxed();
+        if (node_ref.valid() && filter.has(node_ref)) {
+            EntryRef new_node_ref = _graph.nodes.move_on_compact(node_ref);
+            ref.ref().store_release(new_node_ref);
+        }
+    }
+    compacting_buffers->finish();
 }
 
 void
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 2714464073e..ec55c1a50e6 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -81,10 +81,12 @@ public:
         CompactionSpec link_arrays() const noexcept { return _link_arrays; }
     };
 
-    // Stubs for mapping between docid and nodeid.
-    static uint32_t get_docid(uint32_t nodeid) { return nodeid; }
+    uint32_t get_docid(uint32_t nodeid) const {
+        return _graph.node_refs.acquire_elem_ref(nodeid).acquire_docid(nodeid);
+    }
     static uint32_t get_nodeid(uint32_t docid) { return docid; }
 protected:
+    using NodeType = HnswGraph::NodeType;
     using AtomicEntryRef = HnswGraph::AtomicEntryRef;
     using NodeStore = HnswGraph::NodeStore;
 
@@ -132,8 +134,10 @@ protected:
     void remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t level);
 
     inline TypedCells get_vector(uint32_t nodeid) const {
-        uint32_t docid = get_docid(nodeid);
-        return _vectors.get_vector(docid, 0);
+        auto& ref = _graph.node_refs.acquire_elem_ref(nodeid);
+        uint32_t docid = ref.acquire_docid(nodeid);
+        uint32_t subspace = ref.acquire_subspace();
+        return _vectors.get_vector(docid, subspace);
     }
     inline VectorBundle get_vector_by_docid(uint32_t docid) const {
         return _vectors.get_vectors(docid);
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
new file mode 100644
index 00000000000..2b4f3e0788b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/datastore/atomic_entry_ref.h>
+
+namespace search::tensor {
+
+/**
+ * Represents a graph node for dense tensors (one node per document).
+ */
+class HnswSimpleNode {
+    using AtomicEntryRef = vespalib::datastore::AtomicEntryRef;
+    using EntryRef = vespalib::datastore::EntryRef;
+
+    AtomicEntryRef _ref;
+
+public:
+    HnswSimpleNode()
+        : _ref()
+    {
+    }
+    AtomicEntryRef& ref() noexcept { return _ref; }
+    const AtomicEntryRef& ref() const noexcept { return _ref; }
+    // Mapping from nodeid to docid and subspace.
+    static uint32_t acquire_docid(uint32_t nodeid) noexcept { return nodeid; }
+    static uint32_t acquire_subspace() noexcept { return 0u; }
+};
+
+}
author	Tor Egge <Tor.Egge@online.no>	2022-11-09 11:00:44 +0100
committer	Tor Egge <Tor.Egge@online.no>	2022-11-09 11:00:44 +0100
commit	13be5ec166cf74c9cffb6a26d29aa112f4141a37 (patch)
tree	efea2ca5ac14fc6802f257ac8a351960907b43bb /searchlib
parent	30a676fb89f28a618f0dc2c0752cde8c29bf320c (diff)