summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_graph.h82
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp40
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h7
6 files changed, 99 insertions, 43 deletions
diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
index bcc886fccad..2db6437664e 100644
--- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
@@ -37,7 +37,7 @@ using V = std::vector<uint32_t>;
void populate(HnswGraph &graph) {
// no 0
graph.make_node_for_document(1, 1);
- graph.make_node_for_document(2, 2);
+ auto er = graph.make_node_for_document(2, 2);
// no 3
graph.make_node_for_document(4, 2);
graph.make_node_for_document(5, 0);
@@ -49,7 +49,7 @@ void populate(HnswGraph &graph) {
graph.set_link_array(6, 0, V{1, 2, 4});
graph.set_link_array(2, 1, V{4});
graph.set_link_array(4, 1, V{2});
- graph.set_entry_node({2, 1});
+ graph.set_entry_node({2, er, 1});
}
void modify(HnswGraph &graph) {
@@ -63,7 +63,7 @@ void modify(HnswGraph &graph) {
graph.set_link_array(4, 1, V{7});
graph.set_link_array(7, 1, V{4});
- graph.set_entry_node({4, 1});
+ graph.set_entry_node({4, graph.get_node_ref(4), 1});
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
index 37e3ea1adbd..69a1fbf2065 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
@@ -13,13 +13,14 @@ HnswGraph::HnswGraph()
links(HnswIndex::make_default_link_store_config()),
entry_docid_and_level()
{
+ node_refs.ensure_size(1, AtomicEntryRef());
EntryNode entry;
set_entry_node(entry);
}
HnswGraph::~HnswGraph() {}
-void
+HnswGraph::NodeRef
HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels)
{
node_refs.ensure_size(docid + 1, AtomicEntryRef());
@@ -29,6 +30,7 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels)
vespalib::Array<AtomicEntryRef> levels(num_levels, AtomicEntryRef());
auto node_ref = nodes.add(levels);
node_refs[docid].store_release(node_ref);
+ return node_ref;
}
void
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
index bbf9616c38c..e94c3a29181 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
@@ -46,7 +46,7 @@ struct HnswGraph {
~HnswGraph();
- void make_node_for_document(uint32_t docid, uint32_t num_levels);
+ NodeRef make_node_for_document(uint32_t docid, uint32_t num_levels);
void remove_node_for_document(uint32_t docid);
@@ -54,46 +54,56 @@ struct HnswGraph {
return node_refs[docid].load_acquire();
}
- LevelArrayRef get_level_array(uint32_t docid) const {
- auto node_ref = get_node_ref(docid);
- assert(node_ref.valid());
- return nodes.get(node_ref);
+ bool still_valid(uint32_t docid, NodeRef node_ref) const {
+ return node_ref.valid() && (get_node_ref(docid) == node_ref);
}
- LevelArrayRef get_level_array(uint32_t docid, NodeRef cached_ref) const {
- auto node_ref = get_node_ref(docid);
- if (node_ref.valid() && (cached_ref == node_ref)) {
+ LevelArrayRef get_level_array(NodeRef node_ref) const {
+ if (node_ref.valid()) {
return nodes.get(node_ref);
}
return LevelArrayRef();
}
- LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const {
- auto levels = get_level_array(docid);
- assert(level < levels.size());
- return links.get(levels[level].load_acquire());
+ LevelArrayRef get_level_array(uint32_t docid) const {
+ auto node_ref = get_node_ref(docid);
+ return get_level_array(node_ref);
}
- LinkArrayRef get_link_array(uint32_t docid, LevelArrayRef cached_levels, uint32_t level) const {
- auto node_ref = get_node_ref(docid);
- auto levels = get_level_array(docid, node_ref);
- if ((levels == cached_levels) && (level < levels.size())) {
- return links.get(levels[level].load_acquire());
+ LinkArrayRef get_link_array(LevelArrayRef levels, uint32_t level) const {
+ if (level < levels.size()) {
+ auto links_ref = levels[level].load_acquire();
+ if (links_ref.valid()) {
+ return links.get(links_ref);
+ }
}
return LinkArrayRef();
}
+ LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const {
+ auto levels = get_level_array(docid);
+ return get_link_array(levels, level);
+ }
+
+ LinkArrayRef get_link_array(NodeRef node_ref, uint32_t level) const {
+ auto levels = get_level_array(node_ref);
+ return get_link_array(levels, level);
+ }
+
void set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& new_links);
struct EntryNode {
uint32_t docid;
+ NodeRef node_ref;
int32_t level;
EntryNode()
: docid(0), // Note that docid 0 is reserved and never used
+ node_ref(),
level(-1)
{}
- EntryNode(uint32_t docid_in, int32_t level_in)
+ EntryNode(uint32_t docid_in, NodeRef node_ref_in, int32_t level_in)
: docid(docid_in),
+ node_ref(node_ref_in),
level(level_in)
{}
};
@@ -102,15 +112,43 @@ struct HnswGraph {
uint64_t value = node.level;
value <<= 32;
value |= node.docid;
+ if (node.node_ref.valid()) {
+ assert(node.level >= 0);
+ assert(node.docid > 0);
+ } else {
+ assert(node.level == -1);
+ assert(node.docid == 0);
+ }
entry_docid_and_level.store(value, std::memory_order_release);
}
+ uint64_t get_entry_atomic() const {
+ return entry_docid_and_level.load(std::memory_order_acquire);
+ }
+
EntryNode get_entry_node() const {
EntryNode entry;
- uint64_t value = entry_docid_and_level.load(std::memory_order_acquire);
- entry.docid = (uint32_t)value;
- entry.level = (int32_t)(value >> 32);
- return entry;
+ while (true) {
+ uint64_t value = get_entry_atomic();
+ entry.docid = (uint32_t)value;
+ entry.node_ref = get_node_ref(entry.docid);
+ entry.level = (int32_t)(value >> 32);
+ if ((entry.docid == 0)
+ && (entry.level == -1)
+ && ! entry.node_ref.valid())
+ {
+ // invalid in every way
+ return entry;
+ }
+ if ((entry.docid > 0)
+ && (entry.level > -1)
+ && entry.node_ref.valid()
+ && (get_entry_atomic() == value))
+ {
+ // valid in every way
+ return entry;
+ }
+ }
}
size_t size() const { return node_refs.size(); }
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index a03614a785e..9382fef13f6 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -201,10 +201,13 @@ HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& e
bool keep_searching = true;
while (keep_searching) {
keep_searching = false;
- for (uint32_t neighbor_docid : _graph.get_link_array(nearest.docid, level)) {
+ for (uint32_t neighbor_docid : _graph.get_link_array(nearest.node_ref, level)) {
+ auto neighbor_ref = _graph.get_node_ref(neighbor_docid);
double dist = calc_distance(input, neighbor_docid);
- if (dist < nearest.distance) {
- nearest = HnswCandidate(neighbor_docid, dist);
+ if (_graph.still_valid(neighbor_docid, neighbor_ref)
+ && dist < nearest.distance)
+ {
+ nearest = HnswCandidate(neighbor_docid, neighbor_ref, dist);
keep_searching = true;
}
}
@@ -239,16 +242,20 @@ HnswIndex::search_layer(const TypedCells& input, uint32_t neighbors_to_find,
break;
}
candidates.pop();
- for (uint32_t neighbor_docid : _graph.get_link_array(cand.docid, level)) {
- if ((neighbor_docid >= doc_id_limit) || visited.is_marked(neighbor_docid)) {
+ for (uint32_t neighbor_docid : _graph.get_link_array(cand.node_ref, level)) {
+ auto neighbor_ref = _graph.get_node_ref(neighbor_docid);
+ if ((! neighbor_ref.valid())
+ || (neighbor_docid >= doc_id_limit)
+ || visited.is_marked(neighbor_docid))
+ {
continue;
}
visited.mark(neighbor_docid);
double dist_to_input = calc_distance(input, neighbor_docid);
if (dist_to_input < limit_dist) {
- candidates.emplace(neighbor_docid, dist_to_input);
+ candidates.emplace(neighbor_docid, neighbor_ref, dist_to_input);
if ((!filter) || filter->testBit(neighbor_docid)) {
- best_neighbors.emplace(neighbor_docid, dist_to_input);
+ best_neighbors.emplace(neighbor_docid, neighbor_ref, dist_to_input);
if (best_neighbors.size() > neighbors_to_find) {
best_neighbors.pop();
limit_dist = best_neighbors.top().distance;
@@ -287,11 +294,12 @@ HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector) const
PreparedAddDoc op(docid, level);
auto entry = _graph.get_entry_node();
if (entry.docid == 0) {
+ // graph has no entry point
return op;
}
int search_level = entry.level;
double entry_dist = calc_distance(input_vector, entry.docid);
- HnswCandidate entry_point(entry.docid, entry_dist);
+ HnswCandidate entry_point(entry.docid, entry.node_ref, entry_dist);
while (search_level > op.max_level) {
entry_point = find_nearest_in_layer(input_vector, entry_point, search_level);
--search_level;
@@ -329,13 +337,13 @@ HnswIndex::filter_valid_docids(const LinkArrayRef &docids)
void
HnswIndex::internal_complete_add(uint32_t docid, PreparedAddDoc &op)
{
- _graph.make_node_for_document(docid, op.max_level + 1);
+ auto node_ref = _graph.make_node_for_document(docid, op.max_level + 1);
for (int level = 0; level <= op.max_level; ++level) {
auto neighbors = filter_valid_docids(op.connections[level]);
connect_new_node(docid, neighbors, level);
}
if (op.max_level > get_entry_level()) {
- _graph.set_entry_node({docid, op.max_level});
+ _graph.set_entry_node({docid, node_ref, op.max_level});
}
}
@@ -398,7 +406,8 @@ HnswIndex::remove_document(uint32_t docid)
LinkArrayRef my_links = _graph.get_link_array(docid, level);
for (uint32_t neighbor_id : my_links) {
if (need_new_entrypoint) {
- _graph.set_entry_node({neighbor_id, level});
+ auto entry_node_ref = _graph.get_node_ref(neighbor_id);
+ _graph.set_entry_node({neighbor_id, entry_node_ref, level});
need_new_entrypoint = false;
}
remove_link_to(neighbor_id, docid, level);
@@ -530,12 +539,13 @@ HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k, const BitVecto
{
FurthestPriQ best_neighbors;
auto entry = _graph.get_entry_node();
- if (entry.level < 0) {
+ if (entry.docid == 0) {
+ // graph has no entry point
return best_neighbors;
}
int search_level = entry.level;
double entry_dist = calc_distance(vector, entry.docid);
- HnswCandidate entry_point(entry.docid, entry_dist);
+ HnswCandidate entry_point(entry.docid, entry.node_ref, entry_dist);
while (search_level > 0) {
entry_point = find_nearest_in_layer(vector, entry_point, search_level);
--search_level;
@@ -568,13 +578,13 @@ HnswIndex::set_node(uint32_t docid, const HnswNode &node)
{
size_t num_levels = node.size();
assert(num_levels > 0);
- _graph.make_node_for_document(docid, num_levels);
+ auto node_ref = _graph.make_node_for_document(docid, num_levels);
for (size_t level = 0; level < num_levels; ++level) {
connect_new_node(docid, node.level(level), level);
}
int max_level = num_levels - 1;
if (get_entry_level() < max_level) {
- _graph.set_entry_node({docid, max_level});
+ _graph.set_entry_node({docid, node_ref, max_level});
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp
index 9f49c0647c6..ac98b28d105 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_loader.cpp
@@ -39,7 +39,8 @@ HnswIndexLoader::load(const fileutil::LoadedBuffer& buf)
}
if (_failed) return false;
_graph.node_refs.ensure_size(num_nodes);
- _graph.set_entry_node({entry_docid, entry_level});
+ auto entry_node_ref = _graph.get_node_ref(entry_docid);
+ _graph.set_entry_node({entry_docid, entry_node_ref, entry_level});
return true;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
index b11d3f36a7a..99266505780 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_utils.h
@@ -5,6 +5,7 @@
#include <cstdint>
#include <queue>
#include <vector>
+#include "hnsw_graph.h"
namespace search::tensor {
@@ -13,8 +14,12 @@ namespace search::tensor {
*/
struct HnswCandidate {
uint32_t docid;
+ HnswGraph::NodeRef node_ref;
double distance;
- HnswCandidate(uint32_t docid_in, double distance_in) : docid(docid_in), distance(distance_in) {}
+ HnswCandidate(uint32_t docid_in, double distance_in)
+ : docid(docid_in), node_ref(), distance(distance_in) {}
+ HnswCandidate(uint32_t docid_in, HnswGraph::NodeRef node_ref_in, double distance_in)
+ : docid(docid_in), node_ref(node_ref_in), distance(distance_in) {}
};
struct GreaterDistance {