diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-05-14 07:53:19 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-05-14 12:26:38 +0000 |
commit | 692de958dbdc69556562ff8b2aa774a89a810946 (patch) | |
tree | cb1fa68235bc4b70af46a625f6e38676fa20e02f /searchlib/src/tests/tensor | |
parent | b9864c970ae741ad97aacaf487cefed8424c9d8c (diff) |
Add benchmark for distance functions
Diffstat (limited to 'searchlib/src/tests/tensor')
3 files changed, 126 insertions, 4 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt b/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt index e1a54f7883a..92ad9ae2648 100644 --- a/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt +++ b/searchlib/src/tests/tensor/distance_functions/CMakeLists.txt @@ -7,3 +7,10 @@ vespa_add_executable(searchlib_distance_functions_test_app TEST GTest::GTest ) vespa_add_test(NAME searchlib_distance_functions_test_app COMMAND searchlib_distance_functions_test_app) + +vespa_add_executable(searchlib_distance_functions_benchmark_app TEST + SOURCES + distance_functions_benchmark.cpp + DEPENDS + searchlib +) diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp new file mode 100644 index 00000000000..f633ea05a5a --- /dev/null +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp @@ -0,0 +1,115 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/eval/eval/typed_cells.h> +#include <vespa/searchlib/common/geo_gcd.h> +#include <vespa/searchlib/tensor/distance_functions.h> +#include <vespa/searchlib/tensor/distance_function_factory.h> +#include <vespa/searchlib/tensor/mips_distance_transform.h> +#include <vespa/vespalib/util/time.h> +#include <vespa/vespalib/util/classname.h> + +using namespace search::tensor; +using vespalib::eval::Int8Float; +using vespalib::BFloat16; +using vespalib::eval::TypedCells; +using search::attribute::DistanceMetric; + +size_t npos = std::string::npos; + +void run_calc(size_t iterations, TypedCells b, const BoundDistanceFunction & df) __attribute_noinline__; +void run_calc_with_limit(size_t iterations, TypedCells b, const BoundDistanceFunction & df) __attribute_noinline__; + +void +run_calc(size_t iterations, TypedCells b, const BoundDistanceFunction & df) { + vespalib::steady_time start = vespalib::steady_clock::now(); + for (size_t i(0); i < iterations; i++) { + df.calc(b); + } + vespalib::duration used = vespalib::steady_clock::now() - start; + printf("%s::calc: Time used = %1.3f\n", vespalib::getClassName(df).c_str(), vespalib::to_s(used)); +} + +void +run_calc_with_limit(size_t iterations, TypedCells b, const BoundDistanceFunction & df) { + vespalib::steady_time start = vespalib::steady_clock::now(); + for (size_t i(0); i < iterations; i++) { + df.calc_with_limit(b, std::numeric_limits<double>::max()); + } + vespalib::duration used = vespalib::steady_clock::now() - start; + printf("%s::calc_with_limit: Time used = %1.3f\n", vespalib::getClassName(df).c_str(), vespalib::to_s(used)); +} + +template<typename T> +void benchmark(size_t iterations, size_t elems) __attribute_noinline__; + +template<typename T> +void benchmark(size_t iterations, size_t elems, const DistanceFunctionFactory & df) { + std::vector<T> av, bv; + srand(7); + av.reserve(elems); + bv.reserve(elems); + for (size_t i(0); i < elems; i++) { + av.push_back(rand()); + bv.push_back(rand()); + } + TypedCells a_cells(av), b_cells(bv); + + run_calc(iterations, b_cells, *df.for_query_vector(a_cells)); + run_calc_with_limit(iterations, b_cells, *df.for_query_vector(a_cells)); +} + +template<typename T> +void benchmark(size_t iterations, size_t elems, const std::string & dist_functions) { + if (dist_functions.find("euclid") != npos) { + benchmark<T>(iterations, elems, EuclideanDistanceFunctionFactory<T>()); + } + if (dist_functions.find("angular") != npos) { + if (std::is_same<T, double>() || std::is_same<T, float>()) { + benchmark<T>(iterations, elems, AngularDistanceFunctionFactory<T>()); + } + } + if (dist_functions.find("prenorm") != npos) { + if (std::is_same<T, double>() || std::is_same<T, float>()) { + benchmark<T>(iterations, elems, PrenormalizedAngularDistanceFunctionFactory<T>()); + } + } + if (dist_functions.find("mips") != npos) { + if (std::is_same<T, double>() || std::is_same<T, float>() || std::is_same<T, Int8Float>()) { + benchmark<T>(iterations, elems, MipsDistanceFunctionFactory<T>()); + } + } +} + +void +benchmark(size_t iterations, size_t elems, const std::string & dist_functions, const std::string & data_types) { + if (data_types.find("double") != npos) { + benchmark<double>(iterations, elems, dist_functions); + } + if (data_types.find("float32") != npos) { + benchmark<float>(iterations, elems, dist_functions); + } + if (data_types.find("bfloat16") != npos) { + benchmark<BFloat16>(iterations, elems, dist_functions); + } + if (data_types.find("float8") != npos) { + benchmark<Int8Float>(iterations, elems, dist_functions); + } +} + +int +main(int argc, char *argv[]) { + size_t num_iterations = 10000000; + size_t num_elems = 1024; + std::string dist_functions = "angular euclid prenorm mips"; + std::string data_types = "double float32 bfloat16 float8"; + if (argc > 1) { num_iterations = atol(argv[1]); } + if (argc > 2) { num_elems = atol(argv[2]); } + if (argc > 3) { dist_functions = argv[3]; } + if (argc > 4) { data_types = argv[4]; } + + printf("Benchmarking %ld iterations with vector length %ld with distance functions '%s' for data types '%s'\n", + num_iterations, num_elems, dist_functions.c_str(), data_types.c_str()); + benchmark(num_iterations, num_elems, dist_functions, data_types); + + return 0; +}
\ No newline at end of file diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp index a1cf86c95cc..97b88bc787a 100644 --- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp @@ -111,7 +111,7 @@ class MyBoundDistanceFunction : public BoundDistanceFunction { std::unique_ptr<BoundDistanceFunction> _real; public: - MyBoundDistanceFunction(std::unique_ptr<BoundDistanceFunction> real) + explicit MyBoundDistanceFunction(std::unique_ptr<BoundDistanceFunction> real) : _real(std::move(real)) { } @@ -147,19 +147,19 @@ class MyDistanceFunctionFactory : public DistanceFunctionFactory { std::unique_ptr<DistanceFunctionFactory> _real; public: - MyDistanceFunctionFactory(std::unique_ptr<DistanceFunctionFactory> real) + explicit MyDistanceFunctionFactory(std::unique_ptr<DistanceFunctionFactory> real) : _real(std::move(real)) { } ~MyDistanceFunctionFactory() override; - std::unique_ptr<BoundDistanceFunction> for_query_vector(TypedCells lhs) override { + std::unique_ptr<BoundDistanceFunction> for_query_vector(TypedCells lhs) const override { EXPECT_FALSE(lhs.non_existing_attribute_value()); return std::make_unique<MyBoundDistanceFunction>(_real->for_query_vector(lhs)); } - std::unique_ptr<BoundDistanceFunction> for_insertion_vector(TypedCells lhs) override { + std::unique_ptr<BoundDistanceFunction> for_insertion_vector(TypedCells lhs) const override { EXPECT_FALSE(lhs.non_existing_attribute_value()); return std::make_unique<MyBoundDistanceFunction>(_real->for_insertion_vector(lhs)); } |