aboutsummaryrefslogtreecommitdiffstats
path: root/eval
diff options
context:
space:
mode:
authorGeir Storli <geirst@verizonmedia.com>2020-01-24 13:47:42 +0100
committerGitHub <noreply@github.com>2020-01-24 13:47:42 +0100
commite2d7d10f664ec221708d051ec754d68fc6cee5b6 (patch)
tree9ae5c82109c629332084325adcf3a4ba22d19854 /eval
parent66d44e61436bc4cc057f38904eb8dfdfb1b1b857 (diff)
parent533ebe974d72bbdbc0cc6fcf06895d2d869a05e5 (diff)
Merge pull request #11921 from vespa-engine/geirst/simplify-running-gist-data-set
Make it simpler to run the ANN_GIST1M data set.
Diffstat (limited to 'eval')
-rw-r--r--eval/src/tests/ann/sift_benchmark.cpp37
1 files changed, 27 insertions, 10 deletions
diff --git a/eval/src/tests/ann/sift_benchmark.cpp b/eval/src/tests/ann/sift_benchmark.cpp
index dcfe1cf9c5c..f37fd36652b 100644
--- a/eval/src/tests/ann/sift_benchmark.cpp
+++ b/eval/src/tests/ann/sift_benchmark.cpp
@@ -92,13 +92,14 @@ double to_ms(Duration elapsed) {
return ms.count();
}
-void read_data(std::string dir) {
+void read_data(const std::string& dir, const std::string& data_set) {
+ fprintf(stderr, "read data set '%s' from directory '%s'\n", data_set.c_str(), dir.c_str());
TimePoint bef = std::chrono::steady_clock::now();
- read_queries(dir + "/sift_query.fvecs");
+ read_queries(dir + "/" + data_set + "_query.fvecs");
TimePoint aft = std::chrono::steady_clock::now();
fprintf(stderr, "read queries: %.3f ms\n", to_ms(aft - bef));
bef = std::chrono::steady_clock::now();
- read_docs(dir + "/sift_base.fvecs");
+ read_docs(dir + "/" + data_set + "_base.fvecs");
aft = std::chrono::steady_clock::now();
fprintf(stderr, "read docs: %.3f ms\n", to_ms(aft - bef));
}
@@ -290,23 +291,39 @@ TEST("require that HNSW via NNS api mostly works") {
* wget ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz
* tar -xf sift.tar.gz
*
+ * To run the program:
+ * ./eval_sift_benchmark_app <data_dir>
+ *
* The benchmark program will load the data set from $HOME/sift if no directory is specified.
*
- * More information about the dataset is found here: http://corpus-texmex.irisa.fr/.
+ *
+ * The ANN_GIST1M data set can also be used (as it has the same file format):
+ * wget ftp://ftp.irisa.fr/local/texmex/corpus/gist.tar.gz
+ * tar -xf gist.tar.gz
+ *
+ * Note that #define NUM_DIMS must be changed to 960 before recompiling and running the program:
+ * ./eval_sift_benchmark_app gist <data_dir>
+ *
+ *
+ * More information about the datasets is found here: http://corpus-texmex.irisa.fr/.
*/
int main(int argc, char **argv) {
TEST_MASTER.init(__FILE__);
- std::string sift_dir = ".";
- if (argc > 1) {
- sift_dir = argv[1];
+ std::string data_set = "sift";
+ std::string data_dir = ".";
+ if (argc > 2) {
+ data_set = argv[1];
+ data_dir = argv[2];
+ } else if (argc > 1) {
+ data_dir = argv[1];
} else {
char *home = getenv("HOME");
if (home) {
- sift_dir = home;
- sift_dir += "/sift";
+ data_dir = home;
+ data_dir += "/" + data_set;
}
}
- read_data(sift_dir);
+ read_data(data_dir, data_set);
TEST_RUN_ALL();
return (TEST_MASTER.fini() ? 0 : 1);
}