blob: 67eff4b33c71f984a95375602ebd042271823a60 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
#include "hnsw_index_utils.h"
#include "nearest_neighbor_index.h"
#include <vespa/vespalib/stllike/hash_map.h>
#include <cassert>
namespace search::tensor {
/*
* A priority queue of best neighbors for hnsw index. Used for search
* when hnsw index has multiple nodes per document.
*/
class HnswMultiBestNeighbors {
using EntryRef = vespalib::datastore::EntryRef;
FurthestPriQ _candidates;
vespalib::hash_map<uint32_t, uint32_t> _docids;
void add_docid(uint32_t docid) {
auto insres = _docids.insert(std::make_pair(docid, 1));
if (!insres.second) {
++insres.first->second;
}
}
bool remove_docid(uint32_t docid) {
auto itr = _docids.find(docid);
assert(itr != _docids.end());
if (itr->second > 1) {
--itr->second;
return false;
} else {
_docids.erase(docid);
return true;
}
}
public:
HnswMultiBestNeighbors()
: _candidates(),
_docids()
{
}
~HnswMultiBestNeighbors();
std::vector<NearestNeighborIndex::Neighbor> get_neighbors(uint32_t k, double distance_threshold);
void push(const HnswCandidate& candidate) {
add_docid(candidate.docid);
_candidates.push(candidate);
}
void pop() {
assert(!_candidates.empty());
uint32_t docid = _candidates.top().docid;
remove_docid(docid);
_candidates.pop();
}
const HnswCandidateVector& peek() const { return _candidates.peek(); }
bool empty() const { return _candidates.empty(); }
const HnswCandidate& top() const { return _candidates.top(); }
size_t size() const { return _docids.size(); }
void emplace(uint32_t nodeid, uint32_t docid, EntryRef ref, double distance) {
add_docid(docid);
_candidates.emplace(nodeid, docid, ref, distance);
}
};
}
|