diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-01-24 13:47:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-24 13:47:42 +0100 |
commit | e2d7d10f664ec221708d051ec754d68fc6cee5b6 (patch) | |
tree | 9ae5c82109c629332084325adcf3a4ba22d19854 /eval | |
parent | 66d44e61436bc4cc057f38904eb8dfdfb1b1b857 (diff) | |
parent | 533ebe974d72bbdbc0cc6fcf06895d2d869a05e5 (diff) |
Merge pull request #11921 from vespa-engine/geirst/simplify-running-gist-data-set
Make it simpler to run the ANN_GIST1M data set.
Diffstat (limited to 'eval')
-rw-r--r-- | eval/src/tests/ann/sift_benchmark.cpp | 37 |
1 files changed, 27 insertions, 10 deletions
diff --git a/eval/src/tests/ann/sift_benchmark.cpp b/eval/src/tests/ann/sift_benchmark.cpp index dcfe1cf9c5c..f37fd36652b 100644 --- a/eval/src/tests/ann/sift_benchmark.cpp +++ b/eval/src/tests/ann/sift_benchmark.cpp @@ -92,13 +92,14 @@ double to_ms(Duration elapsed) { return ms.count(); } -void read_data(std::string dir) { +void read_data(const std::string& dir, const std::string& data_set) { + fprintf(stderr, "read data set '%s' from directory '%s'\n", data_set.c_str(), dir.c_str()); TimePoint bef = std::chrono::steady_clock::now(); - read_queries(dir + "/sift_query.fvecs"); + read_queries(dir + "/" + data_set + "_query.fvecs"); TimePoint aft = std::chrono::steady_clock::now(); fprintf(stderr, "read queries: %.3f ms\n", to_ms(aft - bef)); bef = std::chrono::steady_clock::now(); - read_docs(dir + "/sift_base.fvecs"); + read_docs(dir + "/" + data_set + "_base.fvecs"); aft = std::chrono::steady_clock::now(); fprintf(stderr, "read docs: %.3f ms\n", to_ms(aft - bef)); } @@ -290,23 +291,39 @@ TEST("require that HNSW via NNS api mostly works") { * wget ftp://ftp.irisa.fr/local/texmex/corpus/sift.tar.gz * tar -xf sift.tar.gz * + * To run the program: + * ./eval_sift_benchmark_app <data_dir> + * * The benchmark program will load the data set from $HOME/sift if no directory is specified. * - * More information about the dataset is found here: http://corpus-texmex.irisa.fr/. + * + * The ANN_GIST1M data set can also be used (as it has the same file format): + * wget ftp://ftp.irisa.fr/local/texmex/corpus/gist.tar.gz + * tar -xf gist.tar.gz + * + * Note that #define NUM_DIMS must be changed to 960 before recompiling and running the program: + * ./eval_sift_benchmark_app gist <data_dir> + * + * + * More information about the datasets is found here: http://corpus-texmex.irisa.fr/. */ int main(int argc, char **argv) { TEST_MASTER.init(__FILE__); - std::string sift_dir = "."; - if (argc > 1) { - sift_dir = argv[1]; + std::string data_set = "sift"; + std::string data_dir = "."; + if (argc > 2) { + data_set = argv[1]; + data_dir = argv[2]; + } else if (argc > 1) { + data_dir = argv[1]; } else { char *home = getenv("HOME"); if (home) { - sift_dir = home; - sift_dir += "/sift"; + data_dir = home; + data_dir += "/" + data_set; } } - read_data(sift_dir); + read_data(data_dir, data_set); TEST_RUN_ALL(); return (TEST_MASTER.fini() ? 0 : 1); } |