summaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-06-17 11:01:25 +0000
committerArne Juul <arnej@verizonmedia.com>2020-06-17 11:01:25 +0000
commit1da494f6f83b0c506170518e5ed6caa090f752c7 (patch)
tree6cabb61e4661ec8d8b1cfee590bba5ab5b75c569 /searchlib/src
parent86fa3c7610c7f35b8b0730a661f6e852a660e412 (diff)
save node_ref in prepare_add_doc and check its validity in complete
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp52
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hnsw_index.h10
2 files changed, 37 insertions, 25 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
index f766e614867..d78c7a64c89 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
@@ -72,10 +72,10 @@ HnswIndex::max_links_for_level(uint32_t level) const
}
bool
-HnswIndex::have_closer_distance(HnswCandidate candidate, const LinkArrayRef& result) const
+HnswIndex::have_closer_distance(HnswCandidate candidate, const HnswCandidateVector& result) const
{
- for (uint32_t result_docid : result) {
- double dist = calc_distance(candidate.docid, result_docid);
+ for (const auto & neighbor : result) {
+ double dist = calc_distance(candidate.docid, neighbor.docid);
if (dist < candidate.distance) {
return true;
}
@@ -91,7 +91,7 @@ HnswIndex::select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_
SelectResult result;
for (const auto & candidate : sorted) {
if (result.used.size() < max_links) {
- result.used.push_back(candidate.docid);
+ result.used.push_back(candidate);
} else {
result.unused.push_back(candidate.docid);
}
@@ -114,7 +114,7 @@ HnswIndex::select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint
result.unused.push_back(candidate.docid);
continue;
}
- result.used.push_back(candidate.docid);
+ result.used.push_back(candidate);
if (result.used.size() == max_links) {
while (!nearest.empty()) {
candidate = nearest.top();
@@ -148,7 +148,12 @@ HnswIndex::shrink_if_needed(uint32_t docid, uint32_t level)
neighbors.emplace_back(neighbor_docid, dist);
}
auto split = select_neighbors(neighbors, max_links);
- _graph.set_link_array(docid, level, split.used);
+ LinkArray new_links;
+ new_links.reserve(split.used.size());
+ for (const auto & neighbor : split.used) {
+ new_links.push_back(neighbor.docid);
+ }
+ _graph.set_link_array(docid, level, new_links);
for (uint32_t removed_docid : split.unused) {
remove_link_to(removed_docid, docid, level);
}
@@ -314,29 +319,34 @@ HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector) const
while (search_level >= 0) {
search_layer(input_vector, _cfg.neighbors_to_explore_at_construction(), best_neighbors, search_level);
auto neighbors = select_neighbors(best_neighbors.peek(), _cfg.max_links_on_inserts());
- auto use = neighbors.used;
- op.connections[search_level].assign(use.begin(), use.end());
+ op.connections[search_level].reserve(neighbors.used.size());
+ for (const auto & neighbor : neighbors.used) {
+ auto neighbor_levels = _graph.get_level_array(neighbor.node_ref);
+ if (size_t(search_level) < neighbor_levels.size()) {
+ op.connections[search_level].emplace_back(neighbor.docid, neighbor.node_ref);
+ } else {
+ LOG(warning, "in prepare_add(%u), selected neighbor %u is missing level %d (has %zu levels)",
+ docid, neighbor.docid, search_level, neighbor_levels.size());
+ }
+ }
--search_level;
}
return op;
}
HnswIndex::LinkArray
-HnswIndex::filter_valid_docids(uint32_t level, const LinkArrayRef &docids, uint32_t me)
+HnswIndex::filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t me)
{
LinkArray valid;
- valid.reserve(docids.size());
- for (uint32_t docid : docids) {
- if (docid == me) {
- continue;
- }
- auto node_ref = _graph.node_refs[docid].load_acquire();
- if (! node_ref.valid()) {
- continue;
- }
- auto levels = _graph.get_level_array(node_ref);
- if (level < levels.size()) {
- valid.push_back(docid);
+ valid.reserve(neighbors.size());
+ for (const auto & neighbor : neighbors) {
+ uint32_t docid = neighbor.first;
+ HnswGraph::NodeRef node_ref = neighbor.second;
+ if (_graph.still_valid(docid, node_ref)) {
+ auto levels = _graph.get_level_array(node_ref);
+ if ((docid != me) && (level < levels.size())) {
+ valid.push_back(docid);
+ }
}
}
return valid;
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
index 6cf8d9f4225..ab3eced8fdc 100644
--- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
+++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h
@@ -91,10 +91,11 @@ protected:
* where the candidate is located.
* Used by select_neighbors_heuristic().
*/
- bool have_closer_distance(HnswCandidate candidate, const LinkArrayRef& curr_result) const;
+ bool have_closer_distance(HnswCandidate candidate, const HnswCandidateVector& curr_result) const;
struct SelectResult {
- LinkArray used;
+ HnswCandidateVector used;
LinkArray unused;
+ ~SelectResult() {}
};
SelectResult select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint32_t max_links) const;
SelectResult select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const;
@@ -123,7 +124,8 @@ protected:
struct PreparedAddDoc : public PrepareResult {
uint32_t docid;
int32_t max_level;
- std::vector<LinkArray> connections;
+ using Links = std::vector<std::pair<uint32_t, HnswGraph::NodeRef>>;
+ std::vector<Links> connections;
PreparedAddDoc(uint32_t docid_in, int32_t max_level_in)
: docid(docid_in), max_level(max_level_in), connections(max_level+1)
{}
@@ -131,7 +133,7 @@ protected:
PreparedAddDoc(PreparedAddDoc&& other) = default;
};
PreparedAddDoc internal_prepare_add(uint32_t docid, TypedCells input_vector) const;
- LinkArray filter_valid_docids(uint32_t level, const LinkArrayRef &docids, uint32_t me);
+ LinkArray filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t me);
void internal_complete_add(uint32_t docid, PreparedAddDoc &op);
public:
HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distance_func,