diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-06-17 11:01:25 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-06-17 11:01:25 +0000 |
commit | 1da494f6f83b0c506170518e5ed6caa090f752c7 (patch) | |
tree | 6cabb61e4661ec8d8b1cfee590bba5ab5b75c569 /searchlib/src | |
parent | 86fa3c7610c7f35b8b0730a661f6e852a660e412 (diff) |
save node_ref in prepare_add_doc and check its validity in complete
Diffstat (limited to 'searchlib/src')
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp | 52 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index.h | 10 |
2 files changed, 37 insertions, 25 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index f766e614867..d78c7a64c89 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -72,10 +72,10 @@ HnswIndex::max_links_for_level(uint32_t level) const } bool -HnswIndex::have_closer_distance(HnswCandidate candidate, const LinkArrayRef& result) const +HnswIndex::have_closer_distance(HnswCandidate candidate, const HnswCandidateVector& result) const { - for (uint32_t result_docid : result) { - double dist = calc_distance(candidate.docid, result_docid); + for (const auto & neighbor : result) { + double dist = calc_distance(candidate.docid, neighbor.docid); if (dist < candidate.distance) { return true; } @@ -91,7 +91,7 @@ HnswIndex::select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_ SelectResult result; for (const auto & candidate : sorted) { if (result.used.size() < max_links) { - result.used.push_back(candidate.docid); + result.used.push_back(candidate); } else { result.unused.push_back(candidate.docid); } @@ -114,7 +114,7 @@ HnswIndex::select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint result.unused.push_back(candidate.docid); continue; } - result.used.push_back(candidate.docid); + result.used.push_back(candidate); if (result.used.size() == max_links) { while (!nearest.empty()) { candidate = nearest.top(); @@ -148,7 +148,12 @@ HnswIndex::shrink_if_needed(uint32_t docid, uint32_t level) neighbors.emplace_back(neighbor_docid, dist); } auto split = select_neighbors(neighbors, max_links); - _graph.set_link_array(docid, level, split.used); + LinkArray new_links; + new_links.reserve(split.used.size()); + for (const auto & neighbor : split.used) { + new_links.push_back(neighbor.docid); + } + _graph.set_link_array(docid, level, new_links); for (uint32_t removed_docid : split.unused) { remove_link_to(removed_docid, docid, level); } @@ -314,29 +319,34 @@ HnswIndex::internal_prepare_add(uint32_t docid, TypedCells input_vector) const while (search_level >= 0) { search_layer(input_vector, _cfg.neighbors_to_explore_at_construction(), best_neighbors, search_level); auto neighbors = select_neighbors(best_neighbors.peek(), _cfg.max_links_on_inserts()); - auto use = neighbors.used; - op.connections[search_level].assign(use.begin(), use.end()); + op.connections[search_level].reserve(neighbors.used.size()); + for (const auto & neighbor : neighbors.used) { + auto neighbor_levels = _graph.get_level_array(neighbor.node_ref); + if (size_t(search_level) < neighbor_levels.size()) { + op.connections[search_level].emplace_back(neighbor.docid, neighbor.node_ref); + } else { + LOG(warning, "in prepare_add(%u), selected neighbor %u is missing level %d (has %zu levels)", + docid, neighbor.docid, search_level, neighbor_levels.size()); + } + } --search_level; } return op; } HnswIndex::LinkArray -HnswIndex::filter_valid_docids(uint32_t level, const LinkArrayRef &docids, uint32_t me) +HnswIndex::filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t me) { LinkArray valid; - valid.reserve(docids.size()); - for (uint32_t docid : docids) { - if (docid == me) { - continue; - } - auto node_ref = _graph.node_refs[docid].load_acquire(); - if (! node_ref.valid()) { - continue; - } - auto levels = _graph.get_level_array(node_ref); - if (level < levels.size()) { - valid.push_back(docid); + valid.reserve(neighbors.size()); + for (const auto & neighbor : neighbors) { + uint32_t docid = neighbor.first; + HnswGraph::NodeRef node_ref = neighbor.second; + if (_graph.still_valid(docid, node_ref)) { + auto levels = _graph.get_level_array(node_ref); + if ((docid != me) && (level < levels.size())) { + valid.push_back(docid); + } } } return valid; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 6cf8d9f4225..ab3eced8fdc 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -91,10 +91,11 @@ protected: * where the candidate is located. * Used by select_neighbors_heuristic(). */ - bool have_closer_distance(HnswCandidate candidate, const LinkArrayRef& curr_result) const; + bool have_closer_distance(HnswCandidate candidate, const HnswCandidateVector& curr_result) const; struct SelectResult { - LinkArray used; + HnswCandidateVector used; LinkArray unused; + ~SelectResult() {} }; SelectResult select_neighbors_heuristic(const HnswCandidateVector& neighbors, uint32_t max_links) const; SelectResult select_neighbors_simple(const HnswCandidateVector& neighbors, uint32_t max_links) const; @@ -123,7 +124,8 @@ protected: struct PreparedAddDoc : public PrepareResult { uint32_t docid; int32_t max_level; - std::vector<LinkArray> connections; + using Links = std::vector<std::pair<uint32_t, HnswGraph::NodeRef>>; + std::vector<Links> connections; PreparedAddDoc(uint32_t docid_in, int32_t max_level_in) : docid(docid_in), max_level(max_level_in), connections(max_level+1) {} @@ -131,7 +133,7 @@ protected: PreparedAddDoc(PreparedAddDoc&& other) = default; }; PreparedAddDoc internal_prepare_add(uint32_t docid, TypedCells input_vector) const; - LinkArray filter_valid_docids(uint32_t level, const LinkArrayRef &docids, uint32_t me); + LinkArray filter_valid_docids(uint32_t level, const PreparedAddDoc::Links &neighbors, uint32_t me); void internal_complete_add(uint32_t docid, PreparedAddDoc &op); public: HnswIndex(const DocVectorAccess& vectors, DistanceFunction::UP distance_func, |