diff options
author | Arne Juul <arnej@verizonmedia.com> | 2020-07-06 07:17:59 +0000 |
---|---|---|
committer | Arne Juul <arnej@verizonmedia.com> | 2020-07-06 07:17:59 +0000 |
commit | 041e31a87a535c0d7bbcd035c9c11082643d39ed (patch) | |
tree | 676a4281f115678bdc2f1176b99d630681181a35 /eval | |
parent | c44eaefc2f3b541c2a435f360709fd4fc7b1de4d (diff) |
use more descriptive variable names
Diffstat (limited to 'eval')
-rw-r--r-- | eval/src/tests/ann/extended-hnsw.cpp | 12 | ||||
-rw-r--r-- | eval/src/tests/ann/hnsw-like.h | 6 | ||||
-rw-r--r-- | eval/src/tests/ann/nns.h | 2 | ||||
-rw-r--r-- | eval/src/tests/ann/sift_benchmark.cpp | 32 | ||||
-rw-r--r-- | eval/src/tests/ann/xp-annoy-nns.cpp | 16 | ||||
-rw-r--r-- | eval/src/tests/ann/xp-hnsw-wrap.cpp | 4 | ||||
-rw-r--r-- | eval/src/tests/ann/xp-hnswlike-nns.cpp | 12 | ||||
-rw-r--r-- | eval/src/tests/ann/xp-lsh-nns.cpp | 4 |
8 files changed, 44 insertions, 44 deletions
diff --git a/eval/src/tests/ann/extended-hnsw.cpp b/eval/src/tests/ann/extended-hnsw.cpp index fbc4bedec05..95cd674a815 100644 --- a/eval/src/tests/ann/extended-hnsw.cpp +++ b/eval/src/tests/ann/extended-hnsw.cpp @@ -319,7 +319,7 @@ HnswLikeNns::distance(Vector v, uint32_t b) const } std::vector<NnsHit> -HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) +HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) { std::vector<NnsHit> result; if (_entryLevel < 0) return result; @@ -343,12 +343,12 @@ HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitV FurthestPriQ w; w.push(entryPoint); #endif - search_layer_with_filter(vector, w, visited, std::max(k, search_k), 0, blacklist); + search_layer_with_filter(vector, w, visited, std::max(k, search_k), 0, skipDocIds); NearestList tmp = w.steal(); std::sort(tmp.begin(), tmp.end(), LesserDist()); result.reserve(std::min((size_t)k, tmp.size())); for (const auto & hit : tmp) { - if (blacklist.isSet(hit.docid)) continue; + if (skipDocIds.isSet(hit.docid)) continue; result.emplace_back(hit.docid, SqDist(hit.dist)); if (result.size() == k) break; } @@ -407,14 +407,14 @@ void HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w, VisitedSet &visited, uint32_t ef, uint32_t searchLevel, - const BitVector &blacklist) + const BitVector &skipDocIds) { NearestPriQ candidates; for (const HnswHit & entry : w.peek()) { candidates.push(entry); visited.mark(entry.docid); - if (blacklist.isSet(entry.docid)) ++ef; + if (skipDocIds.isSet(entry.docid)) ++ef; } double limd = std::numeric_limits<double>::max(); while (! candidates.empty()) { @@ -430,7 +430,7 @@ HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w, ++distcalls_search_layer; if (e_dist < limd) { candidates.emplace(e_id, SqDist(e_dist)); - if (blacklist.isSet(e_id)) continue; + if (skipDocIds.isSet(e_id)) continue; w.emplace(e_id, SqDist(e_dist)); if (w.size() > ef) { w.pop(); diff --git a/eval/src/tests/ann/hnsw-like.h b/eval/src/tests/ann/hnsw-like.h index 36064c69860..841957c1ccb 100644 --- a/eval/src/tests/ann/hnsw-like.h +++ b/eval/src/tests/ann/hnsw-like.h @@ -166,11 +166,11 @@ public: uint32_t ef, uint32_t searchLevel); void search_layer_with_filter(Vector vector, FurthestPriQ &w, uint32_t ef, uint32_t searchLevel, - const BitVector &blacklist); + const BitVector &skipDocIds); void search_layer_with_filter(Vector vector, FurthestPriQ &w, VisitedSet &visited, uint32_t ef, uint32_t searchLevel, - const BitVector &blacklist); + const BitVector &skipDocIds); bool haveCloserDistance(HnswHit e, const LinkList &r) const; @@ -199,5 +199,5 @@ public: std::vector<NnsHit> topK(uint32_t k, Vector vector, uint32_t search_k) override; - std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) override; + std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) override; }; diff --git a/eval/src/tests/ann/nns.h b/eval/src/tests/ann/nns.h index ef3e4b5d69c..7a20a132248 100644 --- a/eval/src/tests/ann/nns.h +++ b/eval/src/tests/ann/nns.h @@ -75,7 +75,7 @@ public: using Vector = vespalib::ConstArrayRef<FltType>; virtual std::vector<NnsHit> topK(uint32_t k, Vector vector, uint32_t search_k) = 0; - virtual std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) = 0; + virtual std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) = 0; virtual ~NNS() {} protected: uint32_t _numDims; diff --git a/eval/src/tests/ann/sift_benchmark.cpp b/eval/src/tests/ann/sift_benchmark.cpp index 4bbe8f61ef1..ea279bf1395 100644 --- a/eval/src/tests/ann/sift_benchmark.cpp +++ b/eval/src/tests/ann/sift_benchmark.cpp @@ -26,11 +26,11 @@ #include "read-vecs.h" #include "bruteforce-nns.h" -TopK bruteforce_nns_filter(const PointVector &query, const BitVector &blacklist) { +TopK bruteforce_nns_filter(const PointVector &query, const BitVector &skipDocIds) { TopK result; BfHitHeap heap(result.K); for (uint32_t docid = 0; docid < NUM_DOCS; ++docid) { - if (blacklist.isSet(docid)) continue; + if (skipDocIds.isSet(docid)) continue; const PointVector &docvector = generatedDocs[docid]; double d = l2distCalc.l2sq_dist(query, docvector); Hit h(docid, d); @@ -46,19 +46,19 @@ TopK bruteforce_nns_filter(const PointVector &query, const BitVector &blacklist) void timing_bf_filter(int percent) { - BitVector blacklist(NUM_DOCS); + BitVector skipDocIds(NUM_DOCS); RndGen rnd; for (uint32_t idx = 0; idx < NUM_DOCS; ++idx) { if (rnd.nextUniform() < 0.01 * percent) { - blacklist.setBit(idx); + skipDocIds.setBit(idx); } else { - blacklist.clearBit(idx); + skipDocIds.clearBit(idx); } } TimePoint bef = std::chrono::steady_clock::now(); for (int cnt = 0; cnt < NUM_Q; ++cnt) { const PointVector &qv = generatedQueries[cnt]; - auto res = bruteforce_nns_filter(qv, blacklist); + auto res = bruteforce_nns_filter(qv, skipDocIds); EXPECT_TRUE(res.hits[res.K - 1].distance > 0.0); } TimePoint aft = std::chrono::steady_clock::now(); @@ -89,11 +89,11 @@ TEST("require that brute force works") { using NNS_API = NNS<float>; size_t search_with_filter(uint32_t sk, NNS_API &nns, uint32_t qid, - const BitVector &blacklist) + const BitVector &skipDocIds) { const PointVector &qv = generatedQueries[qid]; vespalib::ConstArrayRef<float> query(qv.v, NUM_DIMS); - auto rv = nns.topKfilter(100, query, sk, blacklist); + auto rv = nns.topKfilter(100, query, sk, skipDocIds); return rv.size(); } @@ -101,12 +101,12 @@ size_t search_with_filter(uint32_t sk, NNS_API &nns, uint32_t qid, #include "verify-top-k.h" void verify_with_filter(uint32_t sk, NNS_API &nns, uint32_t qid, - const BitVector &blacklist) + const BitVector &skipDocIds) { const PointVector &qv = generatedQueries[qid]; - auto expected = bruteforce_nns_filter(qv, blacklist); + auto expected = bruteforce_nns_filter(qv, skipDocIds); vespalib::ConstArrayRef<float> query(qv.v, NUM_DIMS); - auto rv = nns.topKfilter(expected.K, query, sk, blacklist); + auto rv = nns.topKfilter(expected.K, query, sk, skipDocIds); TopK actual; for (size_t i = 0; i < actual.K; ++i) { actual.hits[i] = Hit(rv[i].docid, rv[i].sq.distance); @@ -117,19 +117,19 @@ void verify_with_filter(uint32_t sk, NNS_API &nns, uint32_t qid, void timing_nns_filter(const char *name, NNS_API &nns, std::vector<uint32_t> sk_list, int percent) { - BitVector blacklist(NUM_DOCS); + BitVector skipDocIds(NUM_DOCS); RndGen rnd; for (uint32_t idx = 0; idx < NUM_DOCS; ++idx) { if (rnd.nextUniform() < 0.01 * percent) { - blacklist.setBit(idx); + skipDocIds.setBit(idx); } else { - blacklist.clearBit(idx); + skipDocIds.clearBit(idx); } } for (uint32_t search_k : sk_list) { TimePoint bef = std::chrono::steady_clock::now(); for (int cnt = 0; cnt < NUM_Q; ++cnt) { - uint32_t nh = search_with_filter(search_k, nns, cnt, blacklist); + uint32_t nh = search_with_filter(search_k, nns, cnt, skipDocIds); EXPECT_EQUAL(nh, 100u); } TimePoint aft = std::chrono::steady_clock::now(); @@ -138,7 +138,7 @@ void timing_nns_filter(const char *name, NNS_API &nns, #if 0 fprintf(stderr, "Quality check for %s filter %d %%:\n", name, percent); for (int cnt = 0; cnt < NUM_Q; ++cnt) { - verify_with_filter(search_k, nns, cnt, blacklist); + verify_with_filter(search_k, nns, cnt, skipDocIds); } #endif } diff --git a/eval/src/tests/ann/xp-annoy-nns.cpp b/eval/src/tests/ann/xp-annoy-nns.cpp index 213e583d95a..3be6414d35a 100644 --- a/eval/src/tests/ann/xp-annoy-nns.cpp +++ b/eval/src/tests/ann/xp-annoy-nns.cpp @@ -27,7 +27,7 @@ struct Node { virtual Node *addDoc(uint32_t docid, V vector, AnnoyLikeNns &meta) = 0; virtual int remove(uint32_t docid, V vector) = 0; virtual void findCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist) const = 0; - virtual void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &blacklist) const = 0; + virtual void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &skipDocIds) const = 0; virtual void stats(std::vector<uint32_t> &depths) = 0; }; @@ -39,7 +39,7 @@ struct LeafNode : public Node { Node *addDoc(uint32_t docid, V vector, AnnoyLikeNns &meta) override; int remove(uint32_t docid, V vector) override; void findCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist) const override; - void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &blacklist) const override; + void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &skipDocIds) const override; Node *split(AnnoyLikeNns &meta); virtual void stats(std::vector<uint32_t> &depths) override { depths.push_back(1); } @@ -57,7 +57,7 @@ struct SplitNode : public Node { Node *addDoc(uint32_t docid, V vector, AnnoyLikeNns &meta) override; int remove(uint32_t docid, V vector) override; void findCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist) const override; - void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &blacklist) const override; + void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &skipDocIds) const override; double planeDistance(V vector) const; virtual void stats(std::vector<uint32_t> &depths) override { @@ -310,10 +310,10 @@ LeafNode::findCandidates(std::set<uint32_t> &cands, V, NodeQueue &, double) cons } void -LeafNode::filterCandidates(std::set<uint32_t> &cands, V, NodeQueue &, double, const BitVector &blacklist) const +LeafNode::filterCandidates(std::set<uint32_t> &cands, V, NodeQueue &, double, const BitVector &skipDocIds) const { for (uint32_t d : docids) { - if (blacklist.isSet(d)) continue; + if (skipDocIds.isSet(d)) continue; cands.insert(d); } } @@ -412,7 +412,7 @@ AnnoyLikeNns::topK(uint32_t k, Vector vector, uint32_t search_k) } std::vector<NnsHit> -AnnoyLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) +AnnoyLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) { ++find_top_k_cnt; std::vector<NnsHit> r; @@ -429,11 +429,11 @@ AnnoyLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const Bit // fprintf(stderr, "find candidates: node with min distance %g\n", md); Node *n = top.second; queue.pop(); - n->filterCandidates(candidates, vector, queue, md, blacklist); + n->filterCandidates(candidates, vector, queue, md, skipDocIds); ++find_cand_cnt; } for (uint32_t docid : candidates) { - if (blacklist.isSet(docid)) continue; + if (skipDocIds.isSet(docid)) continue; double dist = l2distCalc.l2sq_dist(vector, _dva.get(docid)); NnsHit hit(docid, SqDist(dist)); r.push_back(hit); diff --git a/eval/src/tests/ann/xp-hnsw-wrap.cpp b/eval/src/tests/ann/xp-hnsw-wrap.cpp index 45c7a974254..179e78f72a1 100644 --- a/eval/src/tests/ann/xp-hnsw-wrap.cpp +++ b/eval/src/tests/ann/xp-hnsw-wrap.cpp @@ -47,7 +47,7 @@ public: return result; } - std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) override { + std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) override { std::vector<NnsHit> reversed; uint32_t adjusted_k = k+4; uint32_t adjusted_sk = search_k+4; @@ -57,7 +57,7 @@ public: auto priQ = _hnsw.searchKnn(vector.cbegin(), adjusted_k); while (! priQ.empty()) { auto pair = priQ.top(); - if (! blacklist.isSet(pair.second)) { + if (! skipDocIds.isSet(pair.second)) { reversed.emplace_back(pair.second, SqDist(pair.first)); } priQ.pop(); diff --git a/eval/src/tests/ann/xp-hnswlike-nns.cpp b/eval/src/tests/ann/xp-hnswlike-nns.cpp index b7cae9f731c..494734f8ea2 100644 --- a/eval/src/tests/ann/xp-hnswlike-nns.cpp +++ b/eval/src/tests/ann/xp-hnswlike-nns.cpp @@ -253,7 +253,7 @@ HnswLikeNns::distance(Vector v, uint32_t b) const } std::vector<NnsHit> -HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) +HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) { std::vector<NnsHit> result; if (_entryLevel < 0) return result; @@ -267,12 +267,12 @@ HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitV } FurthestPriQ w; w.push(entryPoint); - search_layer_with_filter(vector, w, std::max(k, search_k), 0, blacklist); + search_layer_with_filter(vector, w, std::max(k, search_k), 0, skipDocIds); NearestList tmp = w.steal(); std::sort(tmp.begin(), tmp.end(), LesserDist()); result.reserve(std::min((size_t)k, tmp.size())); for (const auto & hit : tmp) { - if (blacklist.isSet(hit.docid)) continue; + if (skipDocIds.isSet(hit.docid)) continue; result.emplace_back(hit.docid, SqDist(hit.dist)); if (result.size() == k) break; } @@ -330,7 +330,7 @@ HnswLikeNns::search_layer(Vector vector, FurthestPriQ &w, void HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w, uint32_t ef, uint32_t searchLevel, - const BitVector &blacklist) + const BitVector &skipDocIds) { NearestPriQ candidates; VisitedSet &visited = _visitedSetPool.get(_nodes.size()); @@ -338,7 +338,7 @@ HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w, for (const HnswHit & entry : w.peek()) { candidates.push(entry); visited.mark(entry.docid); - if (blacklist.isSet(entry.docid)) ++ef; + if (skipDocIds.isSet(entry.docid)) ++ef; } double limd = std::numeric_limits<double>::max(); while (! candidates.empty()) { @@ -354,7 +354,7 @@ HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w, ++distcalls_search_layer; if (e_dist < limd) { candidates.emplace(e_id, SqDist(e_dist)); - if (blacklist.isSet(e_id)) continue; + if (skipDocIds.isSet(e_id)) continue; w.emplace(e_id, SqDist(e_dist)); if (w.size() > ef) { w.pop(); diff --git a/eval/src/tests/ann/xp-lsh-nns.cpp b/eval/src/tests/ann/xp-lsh-nns.cpp index c028a07a9d7..97877688a2e 100644 --- a/eval/src/tests/ann/xp-lsh-nns.cpp +++ b/eval/src/tests/ann/xp-lsh-nns.cpp @@ -197,7 +197,7 @@ public: }; std::vector<NnsHit> -RpLshNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) +RpLshNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) { std::vector<NnsHit> result; result.reserve(k); @@ -213,7 +213,7 @@ RpLshNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVect int whdcCnt = 0; size_t docidLimit = _generated_doc_hashes.size(); for (uint32_t docid = 0; docid < docidLimit; ++docid) { - if (blacklist.isSet(docid)) continue; + if (skipDocIds.isSet(docid)) continue; int hd = hash_dist(query_hash, _generated_doc_hashes[docid]); if (hd <= limit_hash_dist) { ++fullCnt; |