diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-11-09 11:00:44 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-11-09 11:00:44 +0100 |
commit | 13be5ec166cf74c9cffb6a26d29aa112f4141a37 (patch) | |
tree | efea2ca5ac14fc6802f257ac8a351960907b43bb /searchlib | |
parent | 30a676fb89f28a618f0dc2c0752cde8c29bf320c (diff) |
Add HnswSimpleNode.
Diffstat (limited to 'searchlib')
5 files changed, 62 insertions, 15 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp index 6d8b143cff9..d5312f4bd4c 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp @@ -2,6 +2,7 @@ #include "hnsw_graph.h" #include "hnsw_index.h" +#include <vespa/vespalib/util/rcuvector.hpp> #include <vespa/vespalib/datastore/array_store.hpp> namespace search::tensor { @@ -13,7 +14,7 @@ HnswGraph::HnswGraph() links(HnswIndex::make_default_link_store_config(), {}), entry_nodeid_and_level() { - node_refs.ensure_size(1, AtomicEntryRef()); + node_refs.ensure_size(1, NodeType()); EntryNode entry; set_entry_node(entry); } @@ -23,13 +24,13 @@ HnswGraph::~HnswGraph() = default; HnswGraph::NodeRef HnswGraph::make_node(uint32_t nodeid, uint32_t num_levels) { - node_refs.ensure_size(nodeid + 1, AtomicEntryRef()); + node_refs.ensure_size(nodeid + 1, NodeType()); // A document cannot be added twice. assert(!get_node_ref(nodeid).valid()); // Note: The level array instance lives as long as the document is present in the index. std::vector<AtomicEntryRef> levels(num_levels, AtomicEntryRef()); auto node_ref = nodes.add(levels); - node_refs[nodeid].store_release(node_ref); + node_refs[nodeid].ref().store_release(node_ref); if (nodeid >= node_refs_size.load(std::memory_order_relaxed)) { node_refs_size.store(nodeid + 1, std::memory_order_release); } @@ -43,7 +44,7 @@ HnswGraph::remove_node(uint32_t nodeid) assert(node_ref.valid()); auto levels = nodes.get(node_ref); vespalib::datastore::EntryRef invalid; - node_refs[nodeid].store_release(invalid); + node_refs[nodeid].ref().store_release(invalid); // Ensure data referenced through the old ref can be recycled: nodes.remove(node_ref); for (size_t i = 0; i < levels.size(); ++i) { @@ -127,6 +128,7 @@ HnswGraph::set_entry_node(EntryNode node) { namespace vespalib { -template class RcuVectorBase<vespalib::datastore::AtomicEntryRef>; +template class RcuVectorBase<search::tensor::HnswSimpleNode>; +template class RcuVector<search::tensor::HnswSimpleNode>; } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h index 60ba7d55f7f..34fcbd9d721 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h @@ -2,6 +2,7 @@ #pragma once +#include "hnsw_simple_node.h" #include <vespa/vespalib/datastore/array_store.h> #include <vespa/vespalib/datastore/atomic_entry_ref.h> #include <vespa/vespalib/datastore/entryref.h> @@ -23,9 +24,11 @@ struct HnswGraph { // This uses 12 bits for buffer id -> 4096 buffers. using LinkArrayEntryRefType = vespalib::datastore::EntryRefT<20>; + using NodeType = HnswSimpleNode; + // Provides mapping from document id -> node reference. // The reference is used to lookup the node data in NodeStore. - using NodeRefVector = vespalib::RcuVector<AtomicEntryRef>; + using NodeRefVector = vespalib::RcuVector<NodeType>; using NodeRef = vespalib::datastore::EntryRef; // This stores the level arrays for all nodes. @@ -55,11 +58,11 @@ struct HnswGraph { void trim_node_refs_size(); NodeRef get_node_ref(uint32_t nodeid) const { - return node_refs.get_elem_ref(nodeid).load_relaxed(); // Called from writer only + return node_refs.get_elem_ref(nodeid).ref().load_relaxed(); // Called from writer only } NodeRef acquire_node_ref(uint32_t nodeid) const { - return node_refs.acquire_elem_ref(nodeid).load_acquire(); + return node_refs.acquire_elem_ref(nodeid).ref().load_acquire(); } bool still_valid(uint32_t nodeid, NodeRef node_ref) const { diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index e9ce77cc0d6..d9384249594 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -535,10 +535,18 @@ HnswIndex::reclaim_memory(generation_t oldest_used_gen) void HnswIndex::compact_level_arrays(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) { - auto context = _graph.nodes.compactWorst(compaction_spec, compaction_strategy); + auto compacting_buffers = _graph.nodes.start_compact_worst_buffers(compaction_spec, compaction_strategy); uint32_t nodeid_limit = _graph.node_refs.size(); - vespalib::ArrayRef<AtomicEntryRef> refs(&_graph.node_refs[0], nodeid_limit); - context->compact(refs); + auto filter = compacting_buffers->make_entry_ref_filter(); + vespalib::ArrayRef<NodeType> refs(&_graph.node_refs[0], nodeid_limit); + for (auto& ref : refs) { + auto node_ref = ref.ref().load_relaxed(); + if (node_ref.valid() && filter.has(node_ref)) { + EntryRef new_node_ref = _graph.nodes.move_on_compact(node_ref); + ref.ref().store_release(new_node_ref); + } + } + compacting_buffers->finish(); } void diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 2714464073e..ec55c1a50e6 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -81,10 +81,12 @@ public: CompactionSpec link_arrays() const noexcept { return _link_arrays; } }; - // Stubs for mapping between docid and nodeid. - static uint32_t get_docid(uint32_t nodeid) { return nodeid; } + uint32_t get_docid(uint32_t nodeid) const { + return _graph.node_refs.acquire_elem_ref(nodeid).acquire_docid(nodeid); + } static uint32_t get_nodeid(uint32_t docid) { return docid; } protected: + using NodeType = HnswGraph::NodeType; using AtomicEntryRef = HnswGraph::AtomicEntryRef; using NodeStore = HnswGraph::NodeStore; @@ -132,8 +134,10 @@ protected: void remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32_t level); inline TypedCells get_vector(uint32_t nodeid) const { - uint32_t docid = get_docid(nodeid); - return _vectors.get_vector(docid, 0); + auto& ref = _graph.node_refs.acquire_elem_ref(nodeid); + uint32_t docid = ref.acquire_docid(nodeid); + uint32_t subspace = ref.acquire_subspace(); + return _vectors.get_vector(docid, subspace); } inline VectorBundle get_vector_by_docid(uint32_t docid) const { return _vectors.get_vectors(docid); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h new file mode 100644 index 00000000000..2b4f3e0788b --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/datastore/atomic_entry_ref.h> + +namespace search::tensor { + +/** + * Represents a graph node for dense tensors (one node per document). + */ +class HnswSimpleNode { + using AtomicEntryRef = vespalib::datastore::AtomicEntryRef; + using EntryRef = vespalib::datastore::EntryRef; + + AtomicEntryRef _ref; + +public: + HnswSimpleNode() + : _ref() + { + } + AtomicEntryRef& ref() noexcept { return _ref; } + const AtomicEntryRef& ref() const noexcept { return _ref; } + // Mapping from nodeid to docid and subspace. + static uint32_t acquire_docid(uint32_t nodeid) noexcept { return nodeid; } + static uint32_t acquire_subspace() noexcept { return 0u; } +}; + +} |