summaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2020-07-06 07:17:59 +0000
committerArne Juul <arnej@verizonmedia.com>2020-07-06 07:17:59 +0000
commit041e31a87a535c0d7bbcd035c9c11082643d39ed (patch)
tree676a4281f115678bdc2f1176b99d630681181a35 /eval
parentc44eaefc2f3b541c2a435f360709fd4fc7b1de4d (diff)
use more descriptive variable names
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/ann/extended-hnsw.cpp12
-rw-r--r--eval/src/tests/ann/hnsw-like.h6
-rw-r--r--eval/src/tests/ann/nns.h2
-rw-r--r--eval/src/tests/ann/sift_benchmark.cpp32
-rw-r--r--eval/src/tests/ann/xp-annoy-nns.cpp16
-rw-r--r--eval/src/tests/ann/xp-hnsw-wrap.cpp4
-rw-r--r--eval/src/tests/ann/xp-hnswlike-nns.cpp12
-rw-r--r--eval/src/tests/ann/xp-lsh-nns.cpp4
8 files changed, 44 insertions, 44 deletions
diff --git a/eval/src/tests/ann/extended-hnsw.cpp b/eval/src/tests/ann/extended-hnsw.cpp
index fbc4bedec05..95cd674a815 100644
--- a/eval/src/tests/ann/extended-hnsw.cpp
+++ b/eval/src/tests/ann/extended-hnsw.cpp
@@ -319,7 +319,7 @@ HnswLikeNns::distance(Vector v, uint32_t b) const
}
std::vector<NnsHit>
-HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist)
+HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds)
{
std::vector<NnsHit> result;
if (_entryLevel < 0) return result;
@@ -343,12 +343,12 @@ HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitV
FurthestPriQ w;
w.push(entryPoint);
#endif
- search_layer_with_filter(vector, w, visited, std::max(k, search_k), 0, blacklist);
+ search_layer_with_filter(vector, w, visited, std::max(k, search_k), 0, skipDocIds);
NearestList tmp = w.steal();
std::sort(tmp.begin(), tmp.end(), LesserDist());
result.reserve(std::min((size_t)k, tmp.size()));
for (const auto & hit : tmp) {
- if (blacklist.isSet(hit.docid)) continue;
+ if (skipDocIds.isSet(hit.docid)) continue;
result.emplace_back(hit.docid, SqDist(hit.dist));
if (result.size() == k) break;
}
@@ -407,14 +407,14 @@ void
HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w,
VisitedSet &visited,
uint32_t ef, uint32_t searchLevel,
- const BitVector &blacklist)
+ const BitVector &skipDocIds)
{
NearestPriQ candidates;
for (const HnswHit & entry : w.peek()) {
candidates.push(entry);
visited.mark(entry.docid);
- if (blacklist.isSet(entry.docid)) ++ef;
+ if (skipDocIds.isSet(entry.docid)) ++ef;
}
double limd = std::numeric_limits<double>::max();
while (! candidates.empty()) {
@@ -430,7 +430,7 @@ HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w,
++distcalls_search_layer;
if (e_dist < limd) {
candidates.emplace(e_id, SqDist(e_dist));
- if (blacklist.isSet(e_id)) continue;
+ if (skipDocIds.isSet(e_id)) continue;
w.emplace(e_id, SqDist(e_dist));
if (w.size() > ef) {
w.pop();
diff --git a/eval/src/tests/ann/hnsw-like.h b/eval/src/tests/ann/hnsw-like.h
index 36064c69860..841957c1ccb 100644
--- a/eval/src/tests/ann/hnsw-like.h
+++ b/eval/src/tests/ann/hnsw-like.h
@@ -166,11 +166,11 @@ public:
uint32_t ef, uint32_t searchLevel);
void search_layer_with_filter(Vector vector, FurthestPriQ &w,
uint32_t ef, uint32_t searchLevel,
- const BitVector &blacklist);
+ const BitVector &skipDocIds);
void search_layer_with_filter(Vector vector, FurthestPriQ &w,
VisitedSet &visited,
uint32_t ef, uint32_t searchLevel,
- const BitVector &blacklist);
+ const BitVector &skipDocIds);
bool haveCloserDistance(HnswHit e, const LinkList &r) const;
@@ -199,5 +199,5 @@ public:
std::vector<NnsHit> topK(uint32_t k, Vector vector, uint32_t search_k) override;
- std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) override;
+ std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) override;
};
diff --git a/eval/src/tests/ann/nns.h b/eval/src/tests/ann/nns.h
index ef3e4b5d69c..7a20a132248 100644
--- a/eval/src/tests/ann/nns.h
+++ b/eval/src/tests/ann/nns.h
@@ -75,7 +75,7 @@ public:
using Vector = vespalib::ConstArrayRef<FltType>;
virtual std::vector<NnsHit> topK(uint32_t k, Vector vector, uint32_t search_k) = 0;
- virtual std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) = 0;
+ virtual std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) = 0;
virtual ~NNS() {}
protected:
uint32_t _numDims;
diff --git a/eval/src/tests/ann/sift_benchmark.cpp b/eval/src/tests/ann/sift_benchmark.cpp
index 4bbe8f61ef1..ea279bf1395 100644
--- a/eval/src/tests/ann/sift_benchmark.cpp
+++ b/eval/src/tests/ann/sift_benchmark.cpp
@@ -26,11 +26,11 @@
#include "read-vecs.h"
#include "bruteforce-nns.h"
-TopK bruteforce_nns_filter(const PointVector &query, const BitVector &blacklist) {
+TopK bruteforce_nns_filter(const PointVector &query, const BitVector &skipDocIds) {
TopK result;
BfHitHeap heap(result.K);
for (uint32_t docid = 0; docid < NUM_DOCS; ++docid) {
- if (blacklist.isSet(docid)) continue;
+ if (skipDocIds.isSet(docid)) continue;
const PointVector &docvector = generatedDocs[docid];
double d = l2distCalc.l2sq_dist(query, docvector);
Hit h(docid, d);
@@ -46,19 +46,19 @@ TopK bruteforce_nns_filter(const PointVector &query, const BitVector &blacklist)
void timing_bf_filter(int percent)
{
- BitVector blacklist(NUM_DOCS);
+ BitVector skipDocIds(NUM_DOCS);
RndGen rnd;
for (uint32_t idx = 0; idx < NUM_DOCS; ++idx) {
if (rnd.nextUniform() < 0.01 * percent) {
- blacklist.setBit(idx);
+ skipDocIds.setBit(idx);
} else {
- blacklist.clearBit(idx);
+ skipDocIds.clearBit(idx);
}
}
TimePoint bef = std::chrono::steady_clock::now();
for (int cnt = 0; cnt < NUM_Q; ++cnt) {
const PointVector &qv = generatedQueries[cnt];
- auto res = bruteforce_nns_filter(qv, blacklist);
+ auto res = bruteforce_nns_filter(qv, skipDocIds);
EXPECT_TRUE(res.hits[res.K - 1].distance > 0.0);
}
TimePoint aft = std::chrono::steady_clock::now();
@@ -89,11 +89,11 @@ TEST("require that brute force works") {
using NNS_API = NNS<float>;
size_t search_with_filter(uint32_t sk, NNS_API &nns, uint32_t qid,
- const BitVector &blacklist)
+ const BitVector &skipDocIds)
{
const PointVector &qv = generatedQueries[qid];
vespalib::ConstArrayRef<float> query(qv.v, NUM_DIMS);
- auto rv = nns.topKfilter(100, query, sk, blacklist);
+ auto rv = nns.topKfilter(100, query, sk, skipDocIds);
return rv.size();
}
@@ -101,12 +101,12 @@ size_t search_with_filter(uint32_t sk, NNS_API &nns, uint32_t qid,
#include "verify-top-k.h"
void verify_with_filter(uint32_t sk, NNS_API &nns, uint32_t qid,
- const BitVector &blacklist)
+ const BitVector &skipDocIds)
{
const PointVector &qv = generatedQueries[qid];
- auto expected = bruteforce_nns_filter(qv, blacklist);
+ auto expected = bruteforce_nns_filter(qv, skipDocIds);
vespalib::ConstArrayRef<float> query(qv.v, NUM_DIMS);
- auto rv = nns.topKfilter(expected.K, query, sk, blacklist);
+ auto rv = nns.topKfilter(expected.K, query, sk, skipDocIds);
TopK actual;
for (size_t i = 0; i < actual.K; ++i) {
actual.hits[i] = Hit(rv[i].docid, rv[i].sq.distance);
@@ -117,19 +117,19 @@ void verify_with_filter(uint32_t sk, NNS_API &nns, uint32_t qid,
void timing_nns_filter(const char *name, NNS_API &nns,
std::vector<uint32_t> sk_list, int percent)
{
- BitVector blacklist(NUM_DOCS);
+ BitVector skipDocIds(NUM_DOCS);
RndGen rnd;
for (uint32_t idx = 0; idx < NUM_DOCS; ++idx) {
if (rnd.nextUniform() < 0.01 * percent) {
- blacklist.setBit(idx);
+ skipDocIds.setBit(idx);
} else {
- blacklist.clearBit(idx);
+ skipDocIds.clearBit(idx);
}
}
for (uint32_t search_k : sk_list) {
TimePoint bef = std::chrono::steady_clock::now();
for (int cnt = 0; cnt < NUM_Q; ++cnt) {
- uint32_t nh = search_with_filter(search_k, nns, cnt, blacklist);
+ uint32_t nh = search_with_filter(search_k, nns, cnt, skipDocIds);
EXPECT_EQUAL(nh, 100u);
}
TimePoint aft = std::chrono::steady_clock::now();
@@ -138,7 +138,7 @@ void timing_nns_filter(const char *name, NNS_API &nns,
#if 0
fprintf(stderr, "Quality check for %s filter %d %%:\n", name, percent);
for (int cnt = 0; cnt < NUM_Q; ++cnt) {
- verify_with_filter(search_k, nns, cnt, blacklist);
+ verify_with_filter(search_k, nns, cnt, skipDocIds);
}
#endif
}
diff --git a/eval/src/tests/ann/xp-annoy-nns.cpp b/eval/src/tests/ann/xp-annoy-nns.cpp
index 213e583d95a..3be6414d35a 100644
--- a/eval/src/tests/ann/xp-annoy-nns.cpp
+++ b/eval/src/tests/ann/xp-annoy-nns.cpp
@@ -27,7 +27,7 @@ struct Node {
virtual Node *addDoc(uint32_t docid, V vector, AnnoyLikeNns &meta) = 0;
virtual int remove(uint32_t docid, V vector) = 0;
virtual void findCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist) const = 0;
- virtual void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &blacklist) const = 0;
+ virtual void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &skipDocIds) const = 0;
virtual void stats(std::vector<uint32_t> &depths) = 0;
};
@@ -39,7 +39,7 @@ struct LeafNode : public Node {
Node *addDoc(uint32_t docid, V vector, AnnoyLikeNns &meta) override;
int remove(uint32_t docid, V vector) override;
void findCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist) const override;
- void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &blacklist) const override;
+ void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &skipDocIds) const override;
Node *split(AnnoyLikeNns &meta);
virtual void stats(std::vector<uint32_t> &depths) override { depths.push_back(1); }
@@ -57,7 +57,7 @@ struct SplitNode : public Node {
Node *addDoc(uint32_t docid, V vector, AnnoyLikeNns &meta) override;
int remove(uint32_t docid, V vector) override;
void findCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist) const override;
- void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &blacklist) const override;
+ void filterCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist, const BitVector &skipDocIds) const override;
double planeDistance(V vector) const;
virtual void stats(std::vector<uint32_t> &depths) override {
@@ -310,10 +310,10 @@ LeafNode::findCandidates(std::set<uint32_t> &cands, V, NodeQueue &, double) cons
}
void
-LeafNode::filterCandidates(std::set<uint32_t> &cands, V, NodeQueue &, double, const BitVector &blacklist) const
+LeafNode::filterCandidates(std::set<uint32_t> &cands, V, NodeQueue &, double, const BitVector &skipDocIds) const
{
for (uint32_t d : docids) {
- if (blacklist.isSet(d)) continue;
+ if (skipDocIds.isSet(d)) continue;
cands.insert(d);
}
}
@@ -412,7 +412,7 @@ AnnoyLikeNns::topK(uint32_t k, Vector vector, uint32_t search_k)
}
std::vector<NnsHit>
-AnnoyLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist)
+AnnoyLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds)
{
++find_top_k_cnt;
std::vector<NnsHit> r;
@@ -429,11 +429,11 @@ AnnoyLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const Bit
// fprintf(stderr, "find candidates: node with min distance %g\n", md);
Node *n = top.second;
queue.pop();
- n->filterCandidates(candidates, vector, queue, md, blacklist);
+ n->filterCandidates(candidates, vector, queue, md, skipDocIds);
++find_cand_cnt;
}
for (uint32_t docid : candidates) {
- if (blacklist.isSet(docid)) continue;
+ if (skipDocIds.isSet(docid)) continue;
double dist = l2distCalc.l2sq_dist(vector, _dva.get(docid));
NnsHit hit(docid, SqDist(dist));
r.push_back(hit);
diff --git a/eval/src/tests/ann/xp-hnsw-wrap.cpp b/eval/src/tests/ann/xp-hnsw-wrap.cpp
index 45c7a974254..179e78f72a1 100644
--- a/eval/src/tests/ann/xp-hnsw-wrap.cpp
+++ b/eval/src/tests/ann/xp-hnsw-wrap.cpp
@@ -47,7 +47,7 @@ public:
return result;
}
- std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist) override {
+ std::vector<NnsHit> topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds) override {
std::vector<NnsHit> reversed;
uint32_t adjusted_k = k+4;
uint32_t adjusted_sk = search_k+4;
@@ -57,7 +57,7 @@ public:
auto priQ = _hnsw.searchKnn(vector.cbegin(), adjusted_k);
while (! priQ.empty()) {
auto pair = priQ.top();
- if (! blacklist.isSet(pair.second)) {
+ if (! skipDocIds.isSet(pair.second)) {
reversed.emplace_back(pair.second, SqDist(pair.first));
}
priQ.pop();
diff --git a/eval/src/tests/ann/xp-hnswlike-nns.cpp b/eval/src/tests/ann/xp-hnswlike-nns.cpp
index b7cae9f731c..494734f8ea2 100644
--- a/eval/src/tests/ann/xp-hnswlike-nns.cpp
+++ b/eval/src/tests/ann/xp-hnswlike-nns.cpp
@@ -253,7 +253,7 @@ HnswLikeNns::distance(Vector v, uint32_t b) const
}
std::vector<NnsHit>
-HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist)
+HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds)
{
std::vector<NnsHit> result;
if (_entryLevel < 0) return result;
@@ -267,12 +267,12 @@ HnswLikeNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitV
}
FurthestPriQ w;
w.push(entryPoint);
- search_layer_with_filter(vector, w, std::max(k, search_k), 0, blacklist);
+ search_layer_with_filter(vector, w, std::max(k, search_k), 0, skipDocIds);
NearestList tmp = w.steal();
std::sort(tmp.begin(), tmp.end(), LesserDist());
result.reserve(std::min((size_t)k, tmp.size()));
for (const auto & hit : tmp) {
- if (blacklist.isSet(hit.docid)) continue;
+ if (skipDocIds.isSet(hit.docid)) continue;
result.emplace_back(hit.docid, SqDist(hit.dist));
if (result.size() == k) break;
}
@@ -330,7 +330,7 @@ HnswLikeNns::search_layer(Vector vector, FurthestPriQ &w,
void
HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w,
uint32_t ef, uint32_t searchLevel,
- const BitVector &blacklist)
+ const BitVector &skipDocIds)
{
NearestPriQ candidates;
VisitedSet &visited = _visitedSetPool.get(_nodes.size());
@@ -338,7 +338,7 @@ HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w,
for (const HnswHit & entry : w.peek()) {
candidates.push(entry);
visited.mark(entry.docid);
- if (blacklist.isSet(entry.docid)) ++ef;
+ if (skipDocIds.isSet(entry.docid)) ++ef;
}
double limd = std::numeric_limits<double>::max();
while (! candidates.empty()) {
@@ -354,7 +354,7 @@ HnswLikeNns::search_layer_with_filter(Vector vector, FurthestPriQ &w,
++distcalls_search_layer;
if (e_dist < limd) {
candidates.emplace(e_id, SqDist(e_dist));
- if (blacklist.isSet(e_id)) continue;
+ if (skipDocIds.isSet(e_id)) continue;
w.emplace(e_id, SqDist(e_dist));
if (w.size() > ef) {
w.pop();
diff --git a/eval/src/tests/ann/xp-lsh-nns.cpp b/eval/src/tests/ann/xp-lsh-nns.cpp
index c028a07a9d7..97877688a2e 100644
--- a/eval/src/tests/ann/xp-lsh-nns.cpp
+++ b/eval/src/tests/ann/xp-lsh-nns.cpp
@@ -197,7 +197,7 @@ public:
};
std::vector<NnsHit>
-RpLshNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &blacklist)
+RpLshNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVector &skipDocIds)
{
std::vector<NnsHit> result;
result.reserve(k);
@@ -213,7 +213,7 @@ RpLshNns::topKfilter(uint32_t k, Vector vector, uint32_t search_k, const BitVect
int whdcCnt = 0;
size_t docidLimit = _generated_doc_hashes.size();
for (uint32_t docid = 0; docid < docidLimit; ++docid) {
- if (blacklist.isSet(docid)) continue;
+ if (skipDocIds.isSet(docid)) continue;
int hd = hash_dist(query_hash, _generated_doc_hashes[docid]);
if (hd <= limit_hash_dist) {
++fullCnt;