summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-11-09 11:00:44 +0100
committerTor Egge <Tor.Egge@online.no>2022-11-09 11:00:44 +0100
commit13be5ec166cf74c9cffb6a26d29aa112f4141a37 (patch)
treeefea2ca5ac14fc6802f257ac8a351960907b43bb /searchlib
parent30a676fb89f28a618f0dc2c0752cde8c29bf320c (diff)
Add HnswSimpleNode.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.h9
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h12
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h30
5 files changed, 62 insertions, 15 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
index 6d8b143cff9..d5312f4bd4c 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
@@ -2,6 +2,7 @@
#include "hnsw_graph.h"
#include "hnsw_index.h"
+#include <vespa/vespalib/util/rcuvector.hpp>
#include <vespa/vespalib/datastore/array_store.hpp>
namespace search::tensor {
@@ -13,7 +14,7 @@ HnswGraph::HnswGraph()
links(HnswIndex::make_default_link_store_config(), {}),
entry_nodeid_and_level()
{
- node_refs.ensure_size(1, AtomicEntryRef());
+ node_refs.ensure_size(1, NodeType());
EntryNode entry;
set_entry_node(entry);
}
@@ -23,13 +24,13 @@ HnswGraph::~HnswGraph() = default;
HnswGraph::NodeRef
HnswGraph::make_node(uint32_t nodeid, uint32_t num_levels)
{
- node_refs.ensure_size(nodeid + 1, AtomicEntryRef());
+ node_refs.ensure_size(nodeid + 1, NodeType());
// A document cannot be added twice.
assert(!get_node_ref(nodeid).valid());
// Note: The level array instance lives as long as the document is present in the index.
std::vector<AtomicEntryRef> levels(num_levels, AtomicEntryRef());
auto node_ref = nodes.add(levels);
- node_refs[nodeid].store_release(node_ref);
+ node_refs[nodeid].ref().store_release(node_ref);
if (nodeid >= node_refs_size.load(std::memory_order_relaxed)) {
node_refs_size.store(nodeid + 1, std::memory_order_release);
}
@@ -43,7 +44,7 @@ HnswGraph::remove_node(uint32_t nodeid)
assert(node_ref.valid());
auto levels = nodes.get(node_ref);
vespalib::datastore::EntryRef invalid;
- node_refs[nodeid].store_release(invalid);
+ node_refs[nodeid].ref().store_release(invalid);
// Ensure data referenced through the old ref can be recycled:
nodes.remove(node_ref);
for (size_t i = 0; i < levels.size(); ++i) {
@@ -127,6 +128,7 @@ HnswGraph::set_entry_node(EntryNode node) {
namespace vespalib {
-template class RcuVectorBase<vespalib::datastore::AtomicEntryRef>;
+template class RcuVectorBase<search::tensor::HnswSimpleNode>;
+template class RcuVector<search::tensor::HnswSimpleNode>;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
index 60ba7d55f7f..34fcbd9d721 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
@@ -2,6 +2,7 @@
#pragma once
+#include "hnsw_simple_node.h"
#include <vespa/vespalib/datastore/array_store.h>
#include <vespa/vespalib/datastore/atomic_entry_ref.h>
#include <vespa/vespalib/datastore/entryref.h>
@@ -23,9 +24,11 @@ struct HnswGraph {
// This uses 12 bits for buffer id -> 4096 buffers.
using LinkArrayEntryRefType = vespalib::datastore::EntryRefT<20>;
+ using NodeType = HnswSimpleNode;
+
// Provides mapping from document id -> node reference.
// The reference is used to lookup the node data in NodeStore.
- using NodeRefVector = vespalib::RcuVector<AtomicEntryRef>;
+ using NodeRefVector = vespalib::RcuVector<NodeType>;
using NodeRef = vespalib::datastore::EntryRef;
// This stores the level arrays for all nodes.
@@ -55,11 +58,11 @@ struct HnswGraph {
void trim_node_refs_size();
NodeRef get_node_ref(uint32_t nodeid) const {
- return node_refs.get_elem_ref(nodeid).load_relaxed(); // Called from writer only
+ return node_refs.get_elem_ref(nodeid).ref().load_relaxed(); // Called from writer only
}
NodeRef acquire_node_ref(uint32_t nodeid) const {
- return node_refs.acquire_elem_ref(nodeid).load_acquire();
+ return node_refs.acquire_elem_ref(nodeid).ref().load_acquire();
}
bool still_valid(uint32_t nodeid, NodeRef node_ref) const {
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index e9ce77cc0d6..d9384249594 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -535,10 +535,18 @@ HnswIndex::reclaim_memory(generation_t oldest_used_gen)
void
HnswIndex::compact_level_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy)
{
- auto context = _graph.nodes.compactWorst(compaction_spec, compaction_strategy);
+ auto compacting_buffers = _graph.nodes.start_compact_worst_buffers(compaction_spec, compaction_strategy);
uint32_t nodeid_limit = _graph.node_refs.size();
- vespalib::ArrayRef<AtomicEntryRef> refs(&_graph.node_refs[0], nodeid_limit);
- context->compact(refs);
+ auto filter = compacting_buffers->make_entry_ref_filter();
+ vespalib::ArrayRef<NodeType> refs(&_graph.node_refs[0], nodeid_limit);
+ for (auto& ref : refs) {
+ auto node_ref = ref.ref().load_relaxed();
+ if (node_ref.valid() && filter.has(node_ref)) {
+ EntryRef new_node_ref = _graph.nodes.move_on_compact(node_ref);
+ ref.ref().store_release(new_node_ref);
+ }
+ }
+ compacting_buffers->finish();
}
void
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 2714464073e..ec55c1a50e6 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -81,10 +81,12 @@ public:
CompactionSpec link_arrays() const noexcept { return _link_arrays; }
};
- // Stubs for mapping between docid and nodeid.
- static uint32_t get_docid(uint32_t nodeid) { return nodeid; }
+ uint32_t get_docid(uint32_t nodeid) const {
+ return _graph.node_refs.acquire_elem_ref(nodeid).acquire_docid(nodeid);
+ }
static uint32_t get_nodeid(uint32_t docid) { return docid; }
protected:
+ using NodeType = HnswGraph::NodeType;
using AtomicEntryRef = HnswGraph::AtomicEntryRef;
using NodeStore = HnswGraph::NodeStore;
@@ -132,8 +134,10 @@ protected:
void remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t level);
inline TypedCells get_vector(uint32_t nodeid) const {
- uint32_t docid = get_docid(nodeid);
- return _vectors.get_vector(docid, 0);
+ auto& ref = _graph.node_refs.acquire_elem_ref(nodeid);
+ uint32_t docid = ref.acquire_docid(nodeid);
+ uint32_t subspace = ref.acquire_subspace();
+ return _vectors.get_vector(docid, subspace);
}
inline VectorBundle get_vector_by_docid(uint32_t docid) const {
return _vectors.get_vectors(docid);
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
new file mode 100644
index 00000000000..2b4f3e0788b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
@@ -0,0 +1,30 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/datastore/atomic_entry_ref.h>
+
+namespace search::tensor {
+
+/**
+ * Represents a graph node for dense tensors (one node per document).
+ */
+class HnswSimpleNode {
+ using AtomicEntryRef = vespalib::datastore::AtomicEntryRef;
+ using EntryRef = vespalib::datastore::EntryRef;
+
+ AtomicEntryRef _ref;
+
+public:
+ HnswSimpleNode()
+ : _ref()
+ {
+ }
+ AtomicEntryRef& ref() noexcept { return _ref; }
+ const AtomicEntryRef& ref() const noexcept { return _ref; }
+ // Mapping from nodeid to docid and subspace.
+ static uint32_t acquire_docid(uint32_t nodeid) noexcept { return nodeid; }
+ static uint32_t acquire_subspace() noexcept { return 0u; }
+};
+
+}