aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/tensor/hnsw_multi_best_neighbors.h
blob: 67eff4b33c71f984a95375602ebd042271823a60 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "hnsw_index_utils.h"
#include "nearest_neighbor_index.h"
#include <vespa/vespalib/stllike/hash_map.h>
#include <cassert>

namespace search::tensor {

/*
 * A priority queue of best neighbors for hnsw index. Used for search
 * when hnsw index has multiple nodes per document.
 */
class HnswMultiBestNeighbors {
    using EntryRef = vespalib::datastore::EntryRef;
    FurthestPriQ _candidates;
    vespalib::hash_map<uint32_t, uint32_t> _docids;

    void add_docid(uint32_t docid) {
        auto insres = _docids.insert(std::make_pair(docid, 1));
        if (!insres.second) {
            ++insres.first->second;
        }
    }

    bool remove_docid(uint32_t docid) {
        auto itr = _docids.find(docid);
        assert(itr != _docids.end());
        if (itr->second > 1) {
            --itr->second;
            return false;
        } else {
            _docids.erase(docid);
            return true;
        }
    }
public:
    HnswMultiBestNeighbors()
        : _candidates(),
          _docids()
    {
    }
    ~HnswMultiBestNeighbors();

    std::vector<NearestNeighborIndex::Neighbor> get_neighbors(uint32_t k, double distance_threshold);

    void push(const HnswCandidate& candidate) {
        add_docid(candidate.docid);
        _candidates.push(candidate);
    }
    void pop() {
        assert(!_candidates.empty());
        uint32_t docid = _candidates.top().docid;
        remove_docid(docid);
        _candidates.pop();
    }
    const HnswCandidateVector& peek() const { return _candidates.peek(); }
    bool empty() const { return _candidates.empty(); }
    const HnswCandidate& top() const { return _candidates.top(); }
    size_t size() const { return _docids.size(); }
    void emplace(uint32_t nodeid, uint32_t docid, EntryRef ref, double distance) {
        add_docid(docid);
        _candidates.emplace(nodeid, docid, ref, distance);
    }
};

}