diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-05-23 15:18:45 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-05-23 15:18:45 +0200 |
commit | d94f8b8947f97e5cfb9411bef274bc8df8893deb (patch) | |
tree | a627bdb052125b5ff60c56fddfad033093c9eadd /searchlib | |
parent | cdda3f7d16c59c8bd61b8779e15f67cecff3f588 (diff) |
Reuse distance function when calculating multiple distances from a node.
Diffstat (limited to 'searchlib')
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp | 23 | ||||
-rw-r--r-- | searchlib/src/vespa/searchlib/tensor/hnsw_index.h | 1 |
2 files changed, 9 insertions, 15 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index 3e0cd71be8b..f2f7ad212de 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -176,8 +176,9 @@ template <HnswIndexType type> bool HnswIndex<type>::have_closer_distance(HnswTraversalCandidate candidate, const HnswTraversalCandidateVector& result) const { + auto df = _distance_ff->for_insertion_vector(get_vector(candidate.nodeid)); for (const auto & neighbor : result) { - double dist = calc_distance(candidate.nodeid, neighbor.nodeid); + double dist = calc_distance(*df, neighbor.nodeid); if (dist < candidate.distance) { return true; } @@ -253,8 +254,9 @@ HnswIndex<type>::shrink_if_needed(uint32_t nodeid, uint32_t level) if (old_links.size() > max_links) { HnswTraversalCandidateVector neighbors; neighbors.reserve(old_links.size()); + auto df = _distance_ff->for_insertion_vector(get_vector(nodeid)); for (uint32_t neighbor_nodeid : old_links) { - double dist = calc_distance(nodeid, neighbor_nodeid); + double dist = calc_distance(*df, neighbor_nodeid); neighbors.emplace_back(neighbor_nodeid, dist); } auto split = select_neighbors(neighbors, max_links); @@ -297,17 +299,6 @@ HnswIndex<type>::remove_link_to(uint32_t remove_from, uint32_t remove_id, uint32 _graph.set_link_array(remove_from, level, new_links); } - -template <HnswIndexType type> -double -HnswIndex<type>::calc_distance(uint32_t lhs_nodeid, uint32_t rhs_nodeid) const -{ - auto lhs = get_vector(lhs_nodeid); - auto df = _distance_ff->for_insertion_vector(lhs); - auto rhs = get_vector(rhs_nodeid); - return df->calc(rhs); -} - template <HnswIndexType type> double HnswIndex<type>::calc_distance(const BoundDistanceFunction &df, uint32_t rhs_nodeid) const @@ -639,10 +630,14 @@ HnswIndex<type>::mutual_reconnect(const LinkArrayRef &cluster, uint32_t level) for (uint32_t i = 0; i + 1 < cluster.size(); ++i) { uint32_t n_id_1 = cluster[i]; LinkArrayRef n_list_1 = _graph.get_link_array(n_id_1, level); + std::unique_ptr<BoundDistanceFunction> df; for (uint32_t j = i + 1; j < cluster.size(); ++j) { uint32_t n_id_2 = cluster[j]; if (has_link_to(n_list_1, n_id_2)) continue; - pairs.emplace_back(n_id_1, n_id_2, calc_distance(n_id_1, n_id_2)); + if (!df) { + df = _distance_ff->for_insertion_vector(get_vector(n_id_1)); + } + pairs.emplace_back(n_id_1, n_id_2, calc_distance(*df, n_id_2)); } } std::sort(pairs.begin(), pairs.end()); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h index 7858cb65bf9..20ab0dbea92 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.h @@ -158,7 +158,6 @@ protected: return _vectors.get_vectors(docid); } - double calc_distance(uint32_t lhs_nodeid, uint32_t rhs_nodeid) const; double calc_distance(const BoundDistanceFunction &df, uint32_t rhs_nodeid) const; double calc_distance(const BoundDistanceFunction &df, uint32_t rhs_docid, uint32_t rhs_subspace) const; uint32_t estimate_visited_nodes(uint32_t level, uint32_t nodeid_limit, uint32_t neighbors_to_find, const GlobalFilter* filter) const; |