diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-11-09 14:43:23 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-11-09 14:43:23 +0100 |
commit | bbd8080e32ca0a994e1e7cae7ccd71af995be2ad (patch) | |
tree | b636cf8717ec008a351d713a40354e27fb17974d /searchlib/src | |
parent | 3e22b115a7b8a3f5422063daf35ae9a80ddcd086 (diff) |
Add HnswIdentityMapping.
Diffstat (limited to 'searchlib/src')
8 files changed, 65 insertions, 16 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp index ea22aaabaff..a94b09621df 100644 --- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp @@ -52,12 +52,12 @@ using V = std::vector<uint32_t>; void populate(HnswGraph &graph) { // no 0 - graph.make_node(1, 1); - auto er = graph.make_node(2, 2); + graph.make_node(1, 1, 0, 1); + auto er = graph.make_node(2, 2, 0, 2); // no 3 - graph.make_node(4, 2); - graph.make_node(5, 0); - graph.make_node(6, 1); + graph.make_node(4, 4, 0, 2); + graph.make_node(5, 5, 0, 0); + graph.make_node(6, 6, 0, 1); graph.set_link_array(1, 0, V{2, 4, 6}); graph.set_link_array(2, 0, V{1, 4, 6}); @@ -71,7 +71,7 @@ void populate(HnswGraph &graph) { void modify(HnswGraph &graph) { graph.remove_node(2); graph.remove_node(6); - graph.make_node(7, 2); + graph.make_node(7, 7, 0, 2); graph.set_link_array(1, 0, V{7, 4}); graph.set_link_array(4, 0, V{7, 2}); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp index d5312f4bd4c..2cb63af863d 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp @@ -22,7 +22,7 @@ HnswGraph::HnswGraph() HnswGraph::~HnswGraph() = default; HnswGraph::NodeRef -HnswGraph::make_node(uint32_t nodeid, uint32_t num_levels) +HnswGraph::make_node(uint32_t nodeid, uint32_t docid, uint32_t subspace, uint32_t num_levels) { node_refs.ensure_size(nodeid + 1, NodeType()); // A document cannot be added twice. @@ -30,7 +30,10 @@ HnswGraph::make_node(uint32_t nodeid, uint32_t num_levels) // Note: The level array instance lives as long as the document is present in the index. std::vector<AtomicEntryRef> levels(num_levels, AtomicEntryRef()); auto node_ref = nodes.add(levels); - node_refs[nodeid].ref().store_release(node_ref); + auto& node = node_refs[nodeid]; + node.ref().store_release(node_ref); + node.store_docid(docid); + node.store_subspace(subspace); if (nodeid >= node_refs_size.load(std::memory_order_relaxed)) { node_refs_size.store(nodeid + 1, std::memory_order_release); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h index 34fcbd9d721..9c0945fb8e3 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h @@ -51,7 +51,7 @@ struct HnswGraph { HnswGraph(); ~HnswGraph(); - NodeRef make_node(uint32_t nodeid, uint32_t num_levels); + NodeRef make_node(uint32_t nodeid, uint32_t docid, uint32_t subspace, uint32_t num_levels); void remove_node(uint32_t nodeid); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h new file mode 100644 index 00000000000..00ddf8cd59b --- /dev/null +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h @@ -0,0 +1,34 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/util/arrayref.h> +#include <cstdint> +#include <cassert> + +namespace search::tensor { + +/* + * Class used to maintain mapping from docid to nodeid for dense tensors + * (one node per document). + */ +class HnswIdentityMapping { + uint32_t _nodeid; +public: + HnswIdentityMapping() + : _nodeid(0u) + { + } + vespalib::ConstArrayRef<uint32_t> allocate_ids(uint32_t docid, uint32_t subspaces) { + assert(subspaces == 1u); + _nodeid = docid; + return {&_nodeid, 1}; + } + vespalib::ConstArrayRef<uint32_t> get_ids(uint32_t docid) { + _nodeid = docid; + return {&_nodeid, 1}; + } + void free_ids(uint32_t docid) { (void) docid; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 5f27e141cf8..203e93c3a32 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -331,6 +331,7 @@ HnswIndex::HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distan _vectors(vectors), _distance_func(std::move(distance_func)), _level_generator(std::move(level_generator)), + _id_mapping(), _cfg(cfg), _compaction_spec() { @@ -414,8 +415,10 @@ HnswIndex::filter_valid_nodeids(uint32_t level, const PreparedAddDoc::Links &nei void HnswIndex::internal_complete_add(uint32_t docid, PreparedAddDoc &op) { - auto nodeid = get_nodeid(docid); - auto node_ref = _graph.make_node(nodeid, op.max_level + 1); + auto nodeids = _id_mapping.allocate_ids(docid, 1); + assert(nodeids.size() == 1); + auto nodeid = nodeids[0]; + auto node_ref = _graph.make_node(nodeid, docid, 0, op.max_level + 1); for (int level = 0; level <= op.max_level; ++level) { auto neighbors = filter_valid_nodeids(level, op.connections[level], nodeid); connect_new_node(nodeid, neighbors, level); @@ -510,8 +513,12 @@ HnswIndex::remove_node(uint32_t nodeid) void HnswIndex::remove_document(uint32_t docid) { - auto nodeid = get_nodeid(docid); - remove_node(nodeid); + auto nodeids = _id_mapping.get_ids(docid); + assert(nodeids.size() == 1u); + for (auto nodeid : nodeids) { + remove_node(nodeid); + } + _id_mapping.free_ids(docid); } void @@ -782,7 +789,7 @@ HnswIndex::set_node(uint32_t nodeid, const HnswTestNode &node) { size_t num_levels = node.size(); assert(num_levels > 0); - auto node_ref = _graph.make_node(nodeid, num_levels); + auto node_ref = _graph.make_node(nodeid, nodeid, 0, num_levels); for (size_t level = 0; level < num_levels; ++level) { connect_new_node(nodeid, node.level(level), level); } diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 442d8eba5dc..91eb0fcfed5 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -4,6 +4,7 @@ #include "distance_function.h" #include "doc_vector_access.h" +#include "hnsw_identity_mapping.h" #include "hnsw_index_utils.h" #include "hnsw_test_node.h" #include "nearest_neighbor_index.h" @@ -88,7 +89,8 @@ public: return _graph.node_refs.acquire_elem_ref(nodeid).acquire_docid(); } } - static uint32_t get_nodeid(uint32_t docid) { return docid; } + + using IdMapping = HnswIdentityMapping; protected: using NodeType = HnswGraph::NodeType; using AtomicEntryRef = HnswGraph::AtomicEntryRef; @@ -106,6 +108,7 @@ protected: const DocVectorAccess& _vectors; DistanceFunction::UP _distance_func; RandomLevelGenerator::UP _level_generator; + IdMapping _id_mapping; // mapping from docid to nodeid vector Config _cfg; HnswIndexCompactionSpec _compaction_spec; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp index 1c38e7d7936..9228695f8be 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp @@ -43,7 +43,7 @@ HnswIndexLoader<ReaderType>::load_next() if (_nodeid < _num_nodes) { uint32_t num_levels = next_int(); if (num_levels > 0) { - _graph.make_node(_nodeid, num_levels); + _graph.make_node(_nodeid, _nodeid, 0, num_levels); for (uint32_t level = 0; level < num_levels; ++level) { uint32_t num_links = next_int(); _link_array.clear(); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h index e72f1108a7e..9740cab5c31 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h @@ -22,6 +22,8 @@ public: } AtomicEntryRef& ref() noexcept { return _ref; } const AtomicEntryRef& ref() const noexcept { return _ref; } + void store_docid(uint32_t docid) noexcept { (void) docid; } + void store_subspace(uint32_t subspace) noexcept { (void) subspace; } // Mapping from nodeid to docid and subspace. static uint32_t acquire_docid() noexcept { return 0u; } static uint32_t acquire_subspace() noexcept { return 0u; } |