aboutsummaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorArne Juul <arnej@verizonmedia.com>2019-12-19 11:21:15 +0000
committerArne Juul <arnej@verizonmedia.com>2020-01-16 13:07:44 +0000
commitc5b953d3d2b061dbab8537209c6ddedb1c02cf14 (patch)
treec8a7b8490b5b4d3106833b603beafa6a60671b2b /eval
parente05a5b8d30eead5e98439a50507074b56241ab45 (diff)
add some statistics and refactor
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/ann/nns-l2.h37
-rw-r--r--eval/src/tests/ann/nns.h23
-rw-r--r--eval/src/tests/ann/sift_benchmark.cpp84
-rw-r--r--eval/src/tests/ann/xp-annoy-nns.cpp59
-rw-r--r--eval/src/tests/ann/xp-lsh-nns.cpp2
5 files changed, 145 insertions, 60 deletions
diff --git a/eval/src/tests/ann/nns-l2.h b/eval/src/tests/ann/nns-l2.h
index cfa5fed704f..dcad5f1bda6 100644
--- a/eval/src/tests/ann/nns-l2.h
+++ b/eval/src/tests/ann/nns-l2.h
@@ -1,8 +1,36 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
+#include <string.h>
#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+template <typename T, size_t VLEN>
+static double hw_l2_sq_dist(const T * af, const T * bf, size_t sz)
+{
+ constexpr const size_t OpsPerV = VLEN/sizeof(T);
+ typedef T V __attribute__ ((vector_size (VLEN), aligned(VLEN)));
+
+ const V * a = reinterpret_cast<const V *>(af);
+ const V * b = reinterpret_cast<const V *>(bf);
+
+ V tmp_diff;
+ V tmp_squa;
+ V tmp_sum;
+ memset(&tmp_sum, 0, sizeof(tmp_sum));
+
+ const size_t numOps = sz/OpsPerV;
+ for (size_t i = 0; i < numOps; ++i) {
+ tmp_diff = a[i] - b[i];
+ tmp_squa = tmp_diff * tmp_diff;
+ tmp_sum += tmp_squa;
+ }
+ double sum = 0;
+ for (size_t i = 0; i < OpsPerV; ++i) {
+ sum += tmp_sum[i];
+ }
+ return sum;
+}
+
template <typename FltType = float>
struct L2DistCalc {
vespalib::hwaccelrated::IAccelrated::UP _hw;
@@ -11,7 +39,10 @@ struct L2DistCalc {
using Arr = vespalib::ArrayRef<FltType>;
using ConstArr = vespalib::ConstArrayRef<FltType>;
-
+
+ double product(const FltType *v1, const FltType *v2, size_t sz) {
+ return _hw->dotProduct(v1, v2, sz);
+ }
double product(ConstArr v1, ConstArr v2) {
const FltType *p1 = v1.begin();
const FltType *p2 = v2.begin();
@@ -28,9 +59,7 @@ struct L2DistCalc {
return l2sq(tmp);
}
double l2sq_dist(ConstArr v1, ConstArr v2) {
- std::vector<FltType> tmp;
- tmp.resize(v1.size());
- return l2sq_dist(v1, v2, Arr(tmp));
+ return hw_l2_sq_dist<FltType, 32>(v1.cbegin(), v2.cbegin(), v1.size());
}
};
diff --git a/eval/src/tests/ann/nns.h b/eval/src/tests/ann/nns.h
index 2e6666309bd..79c1aac4379 100644
--- a/eval/src/tests/ann/nns.h
+++ b/eval/src/tests/ann/nns.h
@@ -6,23 +6,28 @@
#include "nns-l2.h"
#include <memory>
+struct SqDist {
+ double distance;
+ explicit SqDist(double d) : distance(d) {}
+};
+
struct NnsHit {
uint32_t docid;
- double sqDistance;
- NnsHit(uint32_t di, double sqD)
- : docid(di), sqDistance(sqD) {}
+ SqDist sq;
+ NnsHit(uint32_t di, SqDist sqD)
+ : docid(di), sq(sqD) {}
};
struct NnsHitComparatorLessDistance {
bool operator() (const NnsHit &lhs, const NnsHit& rhs) const {
- if (lhs.sqDistance > rhs.sqDistance) return false;
- if (lhs.sqDistance < rhs.sqDistance) return true;
+ if (lhs.sq.distance > rhs.sq.distance) return false;
+ if (lhs.sq.distance < rhs.sq.distance) return true;
return (lhs.docid > rhs.docid);
}
};
struct NnsHitComparatorGreaterDistance {
bool operator() (const NnsHit &lhs, const NnsHit& rhs) const {
- if (lhs.sqDistance < rhs.sqDistance) return false;
- if (lhs.sqDistance > rhs.sqDistance) return true;
+ if (lhs.sq.distance < rhs.sq.distance) return false;
+ if (lhs.sq.distance > rhs.sq.distance) return true;
return (lhs.docid > rhs.docid);
}
};
@@ -58,3 +63,7 @@ make_annoy_nns(uint32_t numDims, const DocVectorAccess<float> &dva);
extern
std::unique_ptr<NNS<float>>
make_rplsh_nns(uint32_t numDims, const DocVectorAccess<float> &dva);
+
+extern
+std::unique_ptr<NNS<float>>
+make_hnsw_nns(uint32_t numDims, const DocVectorAccess<float> &dva);
diff --git a/eval/src/tests/ann/sift_benchmark.cpp b/eval/src/tests/ann/sift_benchmark.cpp
index f64351166c1..451d4e1ba50 100644
--- a/eval/src/tests/ann/sift_benchmark.cpp
+++ b/eval/src/tests/ann/sift_benchmark.cpp
@@ -10,7 +10,7 @@
#include <chrono>
#define NUM_DIMS 128
-#define NUM_DOCS 1000000
+#define NUM_DOCS 250000
#define NUM_Q 1000
#include "doc_vector_access.h"
@@ -19,6 +19,7 @@
#include "for-sift-top-k.h"
std::vector<TopK> bruteforceResults;
+std::vector<float> tmp_v(NUM_DIMS);
struct PointVector {
float v[NUM_DIMS];
@@ -26,11 +27,17 @@ struct PointVector {
operator ConstArr() const { return ConstArr(v, NUM_DIMS); }
};
-static PointVector *generatedQueries =
- (PointVector *) malloc(NUM_Q * sizeof(PointVector));
+static PointVector *aligned_alloc(size_t num) {
+ char *mem = (char *)malloc(num * sizeof(PointVector) + 512);
+ mem += 512;
+ size_t val = (size_t)mem;
+ size_t unalign = val % 512;
+ mem -= unalign;
+ return (PointVector *)mem;
+}
-static PointVector *generatedDocs =
- (PointVector *) malloc(NUM_DOCS * sizeof(PointVector));
+static PointVector *generatedQueries = aligned_alloc(NUM_Q);
+static PointVector *generatedDocs = aligned_alloc(NUM_DOCS);
struct DocVectorAdapter : public DocVectorAccess<float>
{
@@ -42,7 +49,7 @@ struct DocVectorAdapter : public DocVectorAccess<float>
double computeDistance(const PointVector &query, uint32_t docid) {
const PointVector &docvector = generatedDocs[docid];
- return l2distCalc.l2sq_dist(query, docvector);
+ return l2distCalc.l2sq_dist(query, docvector, tmp_v);
}
void read_queries(std::string fn) {
@@ -137,7 +144,6 @@ public:
TopK bruteforce_nns(const PointVector &query) {
TopK result;
BfHitHeap heap(result.K);
- std::vector<float> tmp_v(NUM_DIMS);
for (uint32_t docid = 0; docid < NUM_DOCS; ++docid) {
const PointVector &docvector = generatedDocs[docid];
double d = l2distCalc.l2sq_dist(query, docvector, tmp_v);
@@ -173,6 +179,7 @@ void verifyBF(uint32_t qid) {
TEST("require that brute force works") {
TimePoint bef = std::chrono::steady_clock::now();
+ bruteforceResults.reserve(NUM_Q);
for (uint32_t cnt = 0; cnt < NUM_Q; ++cnt) {
const PointVector &query = generatedQueries[cnt];
bruteforceResults.emplace_back(bruteforce_nns(query));
@@ -193,7 +200,7 @@ TopK find_with_nns(uint32_t sk, NNS_API &nns, uint32_t qid) {
vespalib::ConstArrayRef<float> query(qv.v, NUM_DIMS);
auto rv = nns.topK(result.K, query, sk);
for (size_t i = 0; i < result.K; ++i) {
- result.hits[i] = Hit(rv[i].docid, rv[i].sqDistance);
+ result.hits[i] = Hit(rv[i].docid, rv[i].sq.distance);
}
return result;
}
@@ -207,6 +214,10 @@ void verify_nns_quality(uint32_t sk, NNS_API &nns, uint32_t qid) {
double c_factor = 1.0;
for (size_t i = 0; i < result.K; ++i) {
double factor = (result.hits[i].distance / perfect.hits[i].distance);
+ if (factor < 0.99 || factor > 25) {
+ fprintf(stderr, "hit[%zu] got distance %.3f, expected %.3f\n",
+ i, result.hits[i].distance, perfect.hits[i].distance);
+ }
sum_error += factor;
c_factor = std::max(c_factor, factor);
}
@@ -223,57 +234,48 @@ void verify_nns_quality(uint32_t sk, NNS_API &nns, uint32_t qid) {
}
}
-TEST("require that Locality Sensitive Hashing mostly works") {
+void benchmark_nns(const char *name, NNS_API &nns, std::vector<uint32_t> sk_list) {
+ fprintf(stderr, "trying %s indexing...\n", name);
TimePoint bef = std::chrono::steady_clock::now();
- DocVectorAdapter adapter;
- std::unique_ptr<NNS_API> nns = make_rplsh_nns(NUM_DIMS, adapter);
for (uint32_t i = 0; i < NUM_DOCS; ++i) {
- nns->addDoc(i);
+ nns.addDoc(i);
}
fprintf(stderr, "added %u documents...\n", NUM_DOCS);
+ find_with_nns(1, nns, 0);
TimePoint aft = std::chrono::steady_clock::now();
- fprintf(stderr, "build RPLSH index: %.3f ms\n", to_ms(aft - bef));
+ fprintf(stderr, "build %s index: %.3f ms\n", name, to_ms(aft - bef));
- for (uint32_t search_k : { 200, 1000 }) {
+ for (uint32_t search_k : sk_list) {
bef = std::chrono::steady_clock::now();
for (int cnt = 0; cnt < NUM_Q; ++cnt) {
- find_with_nns(search_k, *nns, cnt);
+ find_with_nns(search_k, nns, cnt);
}
aft = std::chrono::steady_clock::now();
- fprintf(stderr, "timing for RPLSH search_k=%u: %.3f ms = %.3f ms per query\n",
- search_k, to_ms(aft - bef), to_ms(aft - bef)/NUM_Q);
+ fprintf(stderr, "timing for %s search_k=%u: %.3f ms = %.3f ms/q\n",
+ name, search_k, to_ms(aft - bef), to_ms(aft - bef)/NUM_Q);
for (int cnt = 0; cnt < NUM_Q; ++cnt) {
- verify_nns_quality(search_k, *nns, cnt);
+ verify_nns_quality(search_k, nns, cnt);
}
}
}
-TEST("require that Indexing via NNS api mostly works") {
- fprintf(stderr, "trying indexing...\n");
- TimePoint bef = std::chrono::steady_clock::now();
+
+#if 1
+TEST("require that Locality Sensitive Hashing mostly works") {
DocVectorAdapter adapter;
- std::unique_ptr<NNS_API> nns = make_annoy_nns(NUM_DIMS, adapter);
- for (uint32_t i = 0; i < NUM_DOCS; ++i) {
- nns->addDoc(i);
- }
- fprintf(stderr, "added %u documents...\n", NUM_DOCS);
- nns->topK(1, adapter.get(0), 1);
- TimePoint aft = std::chrono::steady_clock::now();
- fprintf(stderr, "build annoy index: %.3f ms\n", to_ms(aft - bef));
+ std::unique_ptr<NNS_API> nns = make_rplsh_nns(NUM_DIMS, adapter);
+ benchmark_nns("RPLSH", *nns, { 200, 1000 });
+}
+#endif
- for (uint32_t search_k : { 10000, 20000 }) {
- bef = std::chrono::steady_clock::now();
- for (int cnt = 0; cnt < NUM_Q; ++cnt) {
- find_with_nns(search_k, *nns, cnt);
- }
- aft = std::chrono::steady_clock::now();
- fprintf(stderr, "timing for index search_k=%u: %.3f ms = %.3f ms per query\n",
- search_k, to_ms(aft - bef), to_ms(aft - bef)/NUM_Q);
- for (int cnt = 0; cnt < NUM_Q; ++cnt) {
- verify_nns_quality(search_k, *nns, cnt);
- }
- }
+#if 1
+TEST("require that Annoy via NNS api mostly works") {
+ DocVectorAdapter adapter;
+ std::unique_ptr<NNS_API> nns = make_annoy_nns(NUM_DIMS, adapter);
+ benchmark_nns("Annoy", *nns, { 8000, 10000 });
}
+#endif
+
int main(int argc, char **argv) {
TEST_MASTER.init(__FILE__);
diff --git a/eval/src/tests/ann/xp-annoy-nns.cpp b/eval/src/tests/ann/xp-annoy-nns.cpp
index e5661c0c044..45392084c80 100644
--- a/eval/src/tests/ann/xp-annoy-nns.cpp
+++ b/eval/src/tests/ann/xp-annoy-nns.cpp
@@ -11,6 +11,12 @@ using V = vespalib::ConstArrayRef<float>;
class AnnoyLikeNns;
struct Node;
+static uint64_t plane_dist_cnt = 0;
+static uint64_t w_cen_dist_cnt = 0;
+static uint64_t leaf_split_cnt = 0;
+static uint64_t find_top_k_cnt = 0;
+static uint64_t find_cand_cnt = 0;
+
using QueueNode = std::pair<double, Node *>;
using NodeQueue = std::priority_queue<QueueNode>;
@@ -20,6 +26,7 @@ struct Node {
virtual Node *addDoc(uint32_t docid, V vector, AnnoyLikeNns &meta) = 0;
virtual int remove(uint32_t docid, V vector) = 0;
virtual void findCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist) const = 0;
+ virtual void stats(std::vector<uint32_t> &depths) = 0;
};
struct LeafNode : public Node {
@@ -32,6 +39,7 @@ struct LeafNode : public Node {
void findCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist) const override;
Node *split(AnnoyLikeNns &meta);
+ virtual void stats(std::vector<uint32_t> &depths) override { depths.push_back(1); }
};
struct SplitNode : public Node {
@@ -48,6 +56,12 @@ struct SplitNode : public Node {
void findCandidates(std::set<uint32_t> &cands, V vector, NodeQueue &queue, double minDist) const override;
double planeDistance(V vector) const;
+ virtual void stats(std::vector<uint32_t> &depths) override {
+ size_t i = depths.size();
+ leftChildren->stats(depths);
+ rightChildren->stats(depths);
+ while (i < depths.size()) { ++depths[i++]; }
+ }
};
class AnnoyLikeNns : public NNS<float>
@@ -67,7 +81,10 @@ public:
}
}
+ void dumpStats();
+
~AnnoyLikeNns() {
+ dumpStats();
for (Node *root : _roots) {
delete root;
}
@@ -97,11 +114,9 @@ public:
double
SplitNode::planeDistance(V vector) const
{
+ ++plane_dist_cnt;
assert(vector.size() == hyperPlane.size());
- double dp = 0.0;
- for (size_t i = 0; i < vector.size(); ++i) {
- dp += vector[i] * hyperPlane[i];
- }
+ double dp = l2distCalc.product(&vector[0], &hyperPlane[0], vector.size());
return dp - offsetFromOrigo;
}
@@ -167,6 +182,7 @@ struct WeightedCentroid {
return r;
}
double weightedDistance(V vector) {
+ ++w_cen_dist_cnt;
size_t sz = vector.size();
for (size_t i = 0; i < sz; ++i) {
tmp_vector[i] = vector[i] * cnt;
@@ -179,6 +195,7 @@ struct WeightedCentroid {
Node *
LeafNode::split(AnnoyLikeNns &meta)
{
+ ++leaf_split_cnt;
uint32_t dims = meta.dims();
uint32_t retries = 3;
retry:
@@ -232,6 +249,8 @@ retry:
std::vector<uint32_t> leftDs;
std::vector<uint32_t> rightDs;
+ leftDs.reserve(128);
+ rightDs.reserve(128);
for (uint32_t docid : docids) {
V vector = meta.getVector(docid);
@@ -260,9 +279,9 @@ retry:
#endif
LeafNode *newRightNode = new LeafNode();
- newRightNode->docids = rightDs;
+ newRightNode->docids = std::move(rightDs);
s->rightChildren = newRightNode;
- this->docids = leftDs;
+ this->docids = std::move(leftDs);
s->leftChildren = this;
return s;
}
@@ -327,6 +346,7 @@ SplitNode::findCandidates(std::set<uint32_t> &, V vector, NodeQueue &queue, doub
std::vector<NnsHit>
AnnoyLikeNns::topK(uint32_t k, Vector vector, uint32_t search_k)
{
+ ++find_top_k_cnt;
std::vector<float> tmp;
tmp.resize(_numDims);
vespalib::ArrayRef<float> tmpArr(tmp);
@@ -347,6 +367,7 @@ AnnoyLikeNns::topK(uint32_t k, Vector vector, uint32_t search_k)
Node *n = top.second;
queue.pop();
n->findCandidates(candidates, vector, queue, md);
+ ++find_cand_cnt;
}
#if 0
while (queue.size() > 0) {
@@ -357,7 +378,7 @@ AnnoyLikeNns::topK(uint32_t k, Vector vector, uint32_t search_k)
#endif
for (uint32_t docid : candidates) {
double dist = l2distCalc.l2sq_dist(vector, _dva.get(docid), tmpArr);
- NnsHit hit(docid, dist);
+ NnsHit hit(docid, SqDist(dist));
r.push_back(hit);
}
std::sort(r.begin(), r.end(), NnsHitComparatorLessDistance());
@@ -365,6 +386,30 @@ AnnoyLikeNns::topK(uint32_t k, Vector vector, uint32_t search_k)
return r;
}
+void
+AnnoyLikeNns::dumpStats() {
+ fprintf(stderr, "stats for AnnoyLikeNns:\n");
+ fprintf(stderr, "planeDistance() calls: %zu\n", plane_dist_cnt);
+ fprintf(stderr, "weightedDistance() calls: %zu\n", w_cen_dist_cnt);
+ fprintf(stderr, "leaf split() calls: %zu\n", leaf_split_cnt);
+ fprintf(stderr, "topK() calls: %zu\n", find_top_k_cnt);
+ fprintf(stderr, "findCandidates() calls: %zu\n", find_cand_cnt);
+ std::vector<uint32_t> depths;
+ _roots[0]->stats(depths);
+ std::vector<uint32_t> counts;
+ for (uint32_t deep : depths) {
+ while (counts.size() <= deep) counts.push_back(0);
+ counts[deep]++;
+ }
+ fprintf(stderr, "depths for %zu leaves [\n", depths.size());
+ for (uint32_t deep = 0; deep < counts.size(); ++deep) {
+ if (counts[deep] > 0) {
+ fprintf(stderr, "%u deep count %u\n", deep, counts[deep]);
+ }
+ }
+ fprintf(stderr, "]\n");
+}
+
std::unique_ptr<NNS<float>>
make_annoy_nns(uint32_t numDims, const DocVectorAccess<float> &dva)
{
diff --git a/eval/src/tests/ann/xp-lsh-nns.cpp b/eval/src/tests/ann/xp-lsh-nns.cpp
index 285985167c0..0ea119a9c70 100644
--- a/eval/src/tests/ann/xp-lsh-nns.cpp
+++ b/eval/src/tests/ann/xp-lsh-nns.cpp
@@ -231,7 +231,7 @@ RpLshNns::topK(uint32_t k, Vector vector, uint32_t search_k)
std::vector<LshHit> best = heap.bestLshHits();
size_t numHits = std::min((size_t)k, best.size());
for (size_t i = 0; i < numHits; ++i) {
- result.emplace_back(best[i].docid, best[i].distance);
+ result.emplace_back(best[i].docid, SqDist(best[i].distance));
}
return result;
}