diff options
author | Arne Juul <arnej@yahooinc.com> | 2023-04-28 11:29:13 +0000 |
---|---|---|
committer | Arne Juul <arnej@yahooinc.com> | 2023-04-28 11:29:13 +0000 |
commit | 78e64bf345d40a863c4a31a79a50482bedff04c6 (patch) | |
tree | 91c71818c56b33f6c7d86d70ef547b61fb4125ce /searchlib | |
parent | b0235cd1042315a1cb6ffc97fc0644e269d5931f (diff) |
add unit test
Diffstat (limited to 'searchlib')
3 files changed, 166 insertions, 2 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp index a1b29c90986..9d0b7259912 100644 --- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp @@ -4,6 +4,7 @@ #include <vespa/searchlib/common/geo_gcd.h> #include <vespa/searchlib/tensor/distance_functions.h> #include <vespa/searchlib/tensor/distance_function_factory.h> +#include <vespa/searchlib/tensor/mips_distance_transform.h> #include <vespa/vespalib/gtest/gtest.h> #include <vector> @@ -508,5 +509,166 @@ TEST(GeoDegreesTest, gives_expected_score) verify_geo_miles(g9_jfk, g9_jfk, 0); } + +double computeTransformedMipsChecked(TypedCells a, TypedCells b, bool check_insert = true) { + MipsDistanceFunctionFactory<float> flt_dff; + MipsDistanceFunctionFactory<double> dbl_dff; + + auto d_n = dbl_dff.for_query_vector(a); + auto d_f = flt_dff.for_query_vector(a); + auto d_r = dbl_dff.for_query_vector(b); + // normal: + double result = d_n->calc(b); + // reverse: + EXPECT_DOUBLE_EQ(d_r->calc(a), result); + // float factory: + EXPECT_FLOAT_EQ(d_f->calc(b), result); + double closeness_n = d_n->to_rawscore(result); + double closeness_f = d_f->to_rawscore(result); + double closeness_r = d_r->to_rawscore(result); + EXPECT_DOUBLE_EQ(closeness_n, closeness_f); + EXPECT_DOUBLE_EQ(closeness_n, closeness_r); + EXPECT_GT(closeness_n, 0.0); + EXPECT_LE(closeness_n, 1.0); + if (check_insert) { + auto d_i = dbl_dff.for_insertion_vector(a); + EXPECT_DOUBLE_EQ(d_i->calc(b), result); + } + return result; +} + +TEST(DistanceFunctionsTest, transformed_mips_basic_scores) +{ + std::vector<double> p1{1.0, 0.0, 0.0}; + std::vector<double> p2{0.0, 1.0, 0.0}; + std::vector<double> p3{0.0, 0.0, 1.0}; + std::vector<double> p4{0.5, 0.5, sq_root_half}; + std::vector<double> p5{0.0,-1.0, 0.0}; + + double i12 = computeTransformedMipsChecked(t(p1), t(p2)); + double i13 = computeTransformedMipsChecked(t(p1), t(p3)); + double i23 = computeTransformedMipsChecked(t(p2), t(p3)); + EXPECT_DOUBLE_EQ(i12, 0.0); + EXPECT_DOUBLE_EQ(i13, 0.0); + EXPECT_DOUBLE_EQ(i23, 0.0); + + double i14 = computeTransformedMipsChecked(t(p1), t(p4)); + double i24 = computeTransformedMipsChecked(t(p2), t(p4)); + EXPECT_DOUBLE_EQ(i14, -0.5); + EXPECT_DOUBLE_EQ(i24, -0.5); + + double i34 = computeTransformedMipsChecked(t(p3), t(p4)); + EXPECT_FLOAT_EQ(i34, -sq_root_half); + + double i25 = computeTransformedMipsChecked(t(p2), t(p5)); + EXPECT_DOUBLE_EQ(i25, 1.0); + + double i44 = computeTransformedMipsChecked(t(p4), t(p4)); + EXPECT_DOUBLE_EQ(i44, -1.0); + + std::vector<double> p6{ 0.0, 4.0, -4.0}; + std::vector<double> p7{-4.0, 0.0, 4.0}; + std::vector<double> p8{ 4.0, -4.0, 0.0}; + + double i66 = computeTransformedMipsChecked(t(p6), t(p6)); + EXPECT_DOUBLE_EQ(i66, -32.0); + + double i67 = computeTransformedMipsChecked(t(p6), t(p7)); + EXPECT_DOUBLE_EQ(i67, 16.0); + + double i68 = computeTransformedMipsChecked(t(p6), t(p8)); + EXPECT_DOUBLE_EQ(i68, 16.0); + + double i78 = computeTransformedMipsChecked(t(p7), t(p8)); + EXPECT_DOUBLE_EQ(i78, 16.0); +} + +TEST(DistanceFunctionsTest, transformed_mips_growing_norm) +{ + std::vector<double> p1{1.0, 0.0, 0.0}; + std::vector<double> p2{0.0, 1.0, 0.0}; + std::vector<double> p3{0.0, 0.0, 1.0}; + std::vector<double> p6{ 0.0, 4.0, -4.0}; + std::vector<double> p7{-4.0, 0.0, 4.0}; + std::vector<double> p8{ 4.0, -4.0, 0.0}; + + MipsDistanceFunctionFactory<double> dff; + auto f = dff.for_insertion_vector(t(p1)); + EXPECT_DOUBLE_EQ(-1.0, f->calc(t(p1))); + EXPECT_DOUBLE_EQ(0.0, f->calc(t(p2))); + EXPECT_DOUBLE_EQ(0.0, f->calc(t(p3))); + EXPECT_DOUBLE_EQ(0.0, f->calc(t(p6))); + EXPECT_DOUBLE_EQ(4.0, f->calc(t(p7))); + EXPECT_DOUBLE_EQ(-4.0, f->calc(t(p8))); + + // closeness + EXPECT_DOUBLE_EQ(0.25, f->to_rawscore(1.0)); + EXPECT_DOUBLE_EQ(0.50, f->to_rawscore(0.0)); + EXPECT_DOUBLE_EQ(0.75, f->to_rawscore(-1.0)); + + // now "insert" a bigger vector + f = dff.for_insertion_vector(t(p6)); + EXPECT_DOUBLE_EQ(0.0, f->calc(t(p1))); + EXPECT_DOUBLE_EQ(-4.0, f->calc(t(p2))); + EXPECT_DOUBLE_EQ(4.0, f->calc(t(p3))); + EXPECT_DOUBLE_EQ(-32.0, f->calc(t(p6))); + EXPECT_DOUBLE_EQ(16.0, f->calc(t(p7))); + EXPECT_DOUBLE_EQ(16.0, f->calc(t(p8))); + + // now max squared norm is 32, so p1 is "closer" to itself + f = dff.for_insertion_vector(t(p1)); + EXPECT_DOUBLE_EQ(-32.0, f->calc(t(p1))); + // closeness (rawscore) is also different: + EXPECT_DOUBLE_EQ(0.25, f->to_rawscore(32.0)); + EXPECT_DOUBLE_EQ(1/3., f->to_rawscore(16.0)); + EXPECT_DOUBLE_EQ(0.50, f->to_rawscore(0.0)); + EXPECT_DOUBLE_EQ(2/3., f->to_rawscore(-16.0)); + EXPECT_DOUBLE_EQ(0.75, f->to_rawscore(-32.0)); + + // also closer to other small vectors + EXPECT_DOUBLE_EQ(-31.0, f->calc(t(p2))); + EXPECT_DOUBLE_EQ(-31.0, f->calc(t(p3))); + std::vector<double> p9a{-5.0, 0.0, 0.0}; + // 32 - (-5)^2 = 32 - 25 = 7 + EXPECT_DOUBLE_EQ(5.0 - std::sqrt(31.0 * 7), f->calc(t(p9a))); + std::vector<double> p9b{-3.0, 4.0, 0.0}; + std::vector<double> p9c{0.0, -3.0, 4.0}; + std::vector<double> p9d{-4.0, 0.0, 3.0}; + EXPECT_DOUBLE_EQ(3.0 - std::sqrt(31.0 * 7), f->calc(t(p9b))); + EXPECT_DOUBLE_EQ(0.0 - std::sqrt(31.0 * 7), f->calc(t(p9c))); + EXPECT_DOUBLE_EQ(4.0 - std::sqrt(31.0 * 7), f->calc(t(p9d))); + + // but only for insert: + f = dff.for_query_vector(t(p1)); + EXPECT_DOUBLE_EQ(-1.0, f->calc(t(p1))); + + std::vector<double> big{-100, 100, -100}; + f = dff.for_insertion_vector(t(big)); + EXPECT_DOUBLE_EQ(100.0, f->calc(t(p1))); + + // much bigger numbers expected: + f = dff.for_insertion_vector(t(p1)); + EXPECT_DOUBLE_EQ(-30000.0, f->calc(t(p1))); + EXPECT_DOUBLE_EQ(-29999.0, f->calc(t(p2))); + EXPECT_DOUBLE_EQ(-29999.0, f->calc(t(p3))); + // all these have larger distance: + EXPECT_LT(-29999.0, f->calc(t(p6))); + EXPECT_LT(-29999.0, f->calc(t(p7))); + EXPECT_LT(-29999.0, f->calc(t(p8))); + EXPECT_LT(-29999.0, f->calc(t(p9a))); + EXPECT_LT(-29999.0, f->calc(t(p9b))); + EXPECT_LT(-29999.0, f->calc(t(p9c))); + EXPECT_LT(-29999.0, f->calc(t(p9d))); + // but not by much: + EXPECT_GT(-29900.0, f->calc(t(p6))); + EXPECT_GT(-29900.0, f->calc(t(p7))); + EXPECT_GT(-29900.0, f->calc(t(p8))); + EXPECT_GT(-29900.0, f->calc(t(p9a))); + EXPECT_GT(-29900.0, f->calc(t(p9b))); + EXPECT_GT(-29900.0, f->calc(t(p9c))); + EXPECT_GT(-29900.0, f->calc(t(p9d))); +} + + GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp index 30e1b7fc903..1e238aaacc7 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp @@ -51,7 +51,9 @@ public: double dp = _computer.dotProduct(cast(a), cast(b), rhs.size); if constexpr (extra_dim) { double rhs_sq_norm = _computer.dotProduct(cast(b), cast(b), rhs.size); - double rhs_extra_dim = std::sqrt(_max_sq_norm - rhs_sq_norm); + // avoid sqrt(negative) for robustness: + double diff = std::max(0.0, _max_sq_norm - rhs_sq_norm); + double rhs_extra_dim = std::sqrt(diff); dp += _lhs_extra_dim * rhs_extra_dim; } return -dp; diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h index 86dbe6e4d1e..929bfdcc8c4 100644 --- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h +++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h @@ -15,7 +15,7 @@ private: std::mutex _lock; double _max_sq_norm; public: - MaximumSquaredNormStore() noexcept : _lock(), _max_sq_norm(0.0) {} + MaximumSquaredNormStore() noexcept : _lock(), _max_sq_norm(1.0) {} double get_max(double value = 0.0) { std::lock_guard<std::mutex> guard(_lock); if (value > _max_sq_norm) [[unlikely]] { |