summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-11-09 14:43:23 +0100
committerTor Egge <Tor.Egge@online.no>2022-11-09 14:43:23 +0100
commitbbd8080e32ca0a994e1e7cae7ccd71af995be2ad (patch)
treeb636cf8717ec008a351d713a40354e27fb17974d /searchlib/src
parent3e22b115a7b8a3f5422063daf35ae9a80ddcd086 (diff)
Add HnswIdentityMapping.
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.h2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h34
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h5
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h2
8 files changed, 65 insertions, 16 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
index ea22aaabaff..a94b09621df 100644
--- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
@@ -52,12 +52,12 @@ using V = std::vector<uint32_t>;
void populate(HnswGraph &graph) {
// no 0
- graph.make_node(1, 1);
- auto er = graph.make_node(2, 2);
+ graph.make_node(1, 1, 0, 1);
+ auto er = graph.make_node(2, 2, 0, 2);
// no 3
- graph.make_node(4, 2);
- graph.make_node(5, 0);
- graph.make_node(6, 1);
+ graph.make_node(4, 4, 0, 2);
+ graph.make_node(5, 5, 0, 0);
+ graph.make_node(6, 6, 0, 1);
graph.set_link_array(1, 0, V{2, 4, 6});
graph.set_link_array(2, 0, V{1, 4, 6});
@@ -71,7 +71,7 @@ void populate(HnswGraph &graph) {
void modify(HnswGraph &graph) {
graph.remove_node(2);
graph.remove_node(6);
- graph.make_node(7, 2);
+ graph.make_node(7, 7, 0, 2);
graph.set_link_array(1, 0, V{7, 4});
graph.set_link_array(4, 0, V{7, 2});
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
index d5312f4bd4c..2cb63af863d 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
@@ -22,7 +22,7 @@ HnswGraph::HnswGraph()
HnswGraph::~HnswGraph() = default;
HnswGraph::NodeRef
-HnswGraph::make_node(uint32_t nodeid, uint32_t num_levels)
+HnswGraph::make_node(uint32_t nodeid, uint32_t docid, uint32_t subspace, uint32_t num_levels)
{
node_refs.ensure_size(nodeid + 1, NodeType());
// A document cannot be added twice.
@@ -30,7 +30,10 @@ HnswGraph::make_node(uint32_t nodeid, uint32_t num_levels)
// Note: The level array instance lives as long as the document is present in the index.
std::vector<AtomicEntryRef> levels(num_levels, AtomicEntryRef());
auto node_ref = nodes.add(levels);
- node_refs[nodeid].ref().store_release(node_ref);
+ auto& node = node_refs[nodeid];
+ node.ref().store_release(node_ref);
+ node.store_docid(docid);
+ node.store_subspace(subspace);
if (nodeid >= node_refs_size.load(std::memory_order_relaxed)) {
node_refs_size.store(nodeid + 1, std::memory_order_release);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
index 34fcbd9d721..9c0945fb8e3 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
@@ -51,7 +51,7 @@ struct HnswGraph {
HnswGraph();
~HnswGraph();
- NodeRef make_node(uint32_t nodeid, uint32_t num_levels);
+ NodeRef make_node(uint32_t nodeid, uint32_t docid, uint32_t subspace, uint32_t num_levels);
void remove_node(uint32_t nodeid);
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h
new file mode 100644
index 00000000000..00ddf8cd59b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_identity_mapping.h
@@ -0,0 +1,34 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/util/arrayref.h>
+#include <cstdint>
+#include <cassert>
+
+namespace search::tensor {
+
+/*
+ * Class used to maintain mapping from docid to nodeid for dense tensors
+ * (one node per document).
+ */
+class HnswIdentityMapping {
+ uint32_t _nodeid;
+public:
+ HnswIdentityMapping()
+ : _nodeid(0u)
+ {
+ }
+ vespalib::ConstArrayRef<uint32_t> allocate_ids(uint32_t docid, uint32_t subspaces) {
+ assert(subspaces == 1u);
+ _nodeid = docid;
+ return {&_nodeid, 1};
+ }
+ vespalib::ConstArrayRef<uint32_t> get_ids(uint32_t docid) {
+ _nodeid = docid;
+ return {&_nodeid, 1};
+ }
+ void free_ids(uint32_t docid) { (void) docid; }
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index 5f27e141cf8..203e93c3a32 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -331,6 +331,7 @@ HnswIndex::HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distan
_vectors(vectors),
_distance_func(std::move(distance_func)),
_level_generator(std::move(level_generator)),
+ _id_mapping(),
_cfg(cfg),
_compaction_spec()
{
@@ -414,8 +415,10 @@ HnswIndex::filter_valid_nodeids(uint32_t level, const PreparedAddDoc::Links &nei
void
HnswIndex::internal_complete_add(uint32_t docid, PreparedAddDoc &op)
{
- auto nodeid = get_nodeid(docid);
- auto node_ref = _graph.make_node(nodeid, op.max_level + 1);
+ auto nodeids = _id_mapping.allocate_ids(docid, 1);
+ assert(nodeids.size() == 1);
+ auto nodeid = nodeids[0];
+ auto node_ref = _graph.make_node(nodeid, docid, 0, op.max_level + 1);
for (int level = 0; level <= op.max_level; ++level) {
auto neighbors = filter_valid_nodeids(level, op.connections[level], nodeid);
connect_new_node(nodeid, neighbors, level);
@@ -510,8 +513,12 @@ HnswIndex::remove_node(uint32_t nodeid)
void
HnswIndex::remove_document(uint32_t docid)
{
- auto nodeid = get_nodeid(docid);
- remove_node(nodeid);
+ auto nodeids = _id_mapping.get_ids(docid);
+ assert(nodeids.size() == 1u);
+ for (auto nodeid : nodeids) {
+ remove_node(nodeid);
+ }
+ _id_mapping.free_ids(docid);
}
void
@@ -782,7 +789,7 @@ HnswIndex::set_node(uint32_t nodeid, const HnswTestNode &node)
{
size_t num_levels = node.size();
assert(num_levels > 0);
- auto node_ref = _graph.make_node(nodeid, num_levels);
+ auto node_ref = _graph.make_node(nodeid, nodeid, 0, num_levels);
for (size_t level = 0; level < num_levels; ++level) {
connect_new_node(nodeid, node.level(level), level);
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 442d8eba5dc..91eb0fcfed5 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -4,6 +4,7 @@
#include "distance_function.h"
#include "doc_vector_access.h"
+#include "hnsw_identity_mapping.h"
#include "hnsw_index_utils.h"
#include "hnsw_test_node.h"
#include "nearest_neighbor_index.h"
@@ -88,7 +89,8 @@ public:
return _graph.node_refs.acquire_elem_ref(nodeid).acquire_docid();
}
}
- static uint32_t get_nodeid(uint32_t docid) { return docid; }
+
+ using IdMapping = HnswIdentityMapping;
protected:
using NodeType = HnswGraph::NodeType;
using AtomicEntryRef = HnswGraph::AtomicEntryRef;
@@ -106,6 +108,7 @@ protected:
const DocVectorAccess& _vectors;
DistanceFunction::UP _distance_func;
RandomLevelGenerator::UP _level_generator;
+ IdMapping _id_mapping; // mapping from docid to nodeid vector
Config _cfg;
HnswIndexCompactionSpec _compaction_spec;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp
index 1c38e7d7936..9228695f8be 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.hpp
@@ -43,7 +43,7 @@ HnswIndexLoader<ReaderType>::load_next()
if (_nodeid < _num_nodes) {
uint32_t num_levels = next_int();
if (num_levels > 0) {
- _graph.make_node(_nodeid, num_levels);
+ _graph.make_node(_nodeid, _nodeid, 0, num_levels);
for (uint32_t level = 0; level < num_levels; ++level) {
uint32_t num_links = next_int();
_link_array.clear();
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
index e72f1108a7e..9740cab5c31 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_simple_node.h
@@ -22,6 +22,8 @@ public:
}
AtomicEntryRef& ref() noexcept { return _ref; }
const AtomicEntryRef& ref() const noexcept { return _ref; }
+ void store_docid(uint32_t docid) noexcept { (void) docid; }
+ void store_subspace(uint32_t subspace) noexcept { (void) subspace; }
// Mapping from nodeid to docid and subspace.
static uint32_t acquire_docid() noexcept { return 0u; }
static uint32_t acquire_subspace() noexcept { return 0u; }