diff options
author | Geir Storli <geirst@vespa.ai> | 2024-04-22 17:50:16 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-22 17:50:16 +0200 |
commit | bfc337c2d7280c21d411c66d9c446760a718edfa (patch) | |
tree | 4a32b2fc33376a330d34d327d481002296dcf44a /searchlib | |
parent | ff7a82f9a4413055244427946f2265bf89a05f6d (diff) | |
parent | 75c2cbf170ba2fbbff48ead39d806724d6c75815 (diff) |
Merge pull request #30995 from vespa-engine/balder/remove-asserts-and-test-with-invalid-typedcells
Test that distance calculation with invalid typed cells does not trig… MERGEOK
Diffstat (limited to 'searchlib')
9 files changed, 34 insertions, 18 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp index 391e2d91d08..eeae12e1695 100644 --- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp +++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp @@ -20,6 +20,16 @@ using search::attribute::DistanceMetric; template <typename T> TypedCells t(const std::vector<T> &v) { return TypedCells(v); } +template<typename T> +struct EmptyCells { + explicit EmptyCells(size_t elems) : _zero(elems, 0), cells(_zero) { cells.size = 0; } + std::vector<T> _zero; + TypedCells cells; +}; + +template <typename T> +EmptyCells<T> e(size_t elems) { return EmptyCells<T>(elems); } + void verify_geo_miles(const std::vector<double> &p1, const std::vector<double> &p2, double exp_miles) @@ -49,6 +59,15 @@ void verify_geo_miles(const std::vector<double> &p1, } } +template<typename T> +void verifyInvalidQueryVector(DistanceFunctionFactory & dff, double expected_distance_to_origo) { + std::vector<T> origo = {0,0,0}; + EXPECT_FLOAT_EQ(expected_distance_to_origo, dff.for_query_vector(t(origo))->calc(e<double>(origo.size()).cells)); + EXPECT_FLOAT_EQ(expected_distance_to_origo, dff.for_query_vector(t(origo))->calc(e<float>(origo.size()).cells)); + EXPECT_FLOAT_EQ(expected_distance_to_origo, dff.for_query_vector(t(origo))->calc(e<Int8Float>(origo.size()).cells)); + EXPECT_FLOAT_EQ(expected_distance_to_origo, dff.for_query_vector(t(origo))->calc(e<vespalib::BFloat16>(origo.size()).cells)); +} + double computeEuclideanChecked(TypedCells a, TypedCells b) { static EuclideanDistanceFunctionFactory<Int8Float> i8f_dff; static EuclideanDistanceFunctionFactory<float> flt_dff; @@ -92,6 +111,7 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score) EXPECT_EQ(d12, 2.0); EuclideanDistanceFunctionFactory<double> dff; + verifyInvalidQueryVector<double>(dff, 0.0); auto euclid = dff.for_query_vector(t(p0)); EXPECT_DOUBLE_EQ(euclid->to_rawscore(d12), 1.0/(1.0 + sqrt(2.0))); double threshold = euclid->convert_threshold(8.0); @@ -128,10 +148,7 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score) EXPECT_EQ(computeEuclideanChecked(t(p6), t(p6)), 0.0); // smoke test for bfloat16: - std::vector<vespalib::BFloat16> bf16v; - bf16v.emplace_back(1.0); - bf16v.emplace_back(1.0); - bf16v.emplace_back(1.0); + std::vector<vespalib::BFloat16> bf16v{1.0, 1.0, 1.0}; EXPECT_EQ(computeEuclideanChecked(t(bf16v), t(p0)), 3.0); EXPECT_EQ(computeEuclideanChecked(t(bf16v), t(p1)), 2.0); EXPECT_EQ(computeEuclideanChecked(t(bf16v), t(p2)), 2.0); @@ -188,6 +205,7 @@ TEST(DistanceFunctionsTest, angular_gives_expected_score) AngularDistanceFunctionFactory<double> dff; auto angular = dff.for_query_vector(t(p0)); + verifyInvalidQueryVector<double>(dff, 1.0); constexpr double pi = 3.14159265358979323846; double a12 = computeAngularChecked(t(p1), t(p2)); double a13 = computeAngularChecked(t(p1), t(p3)); @@ -315,6 +333,7 @@ TEST(DistanceFunctionsTest, prenormalized_angular_gives_expected_score) std::vector<double> p8{3.0, 0.0, 0.0}; PrenormalizedAngularDistanceFunctionFactory<double> dff; + verifyInvalidQueryVector<double>(dff, 1.0); auto pnad = dff.for_query_vector(t(p0)); double i12 = computePrenormalizedAngularChecked(t(p1), t(p2)); @@ -360,7 +379,8 @@ TEST(DistanceFunctionsTest, prenormalized_angular_gives_expected_score) TEST(DistanceFunctionsTest, hamming_gives_expected_score) { - static HammingDistanceFunctionFactory<double> dff; + HammingDistanceFunctionFactory<double> dff; + verifyInvalidQueryVector<double>(dff, 0.0); std::vector<std::vector<double>> points{{0.0, 0.0, 0.0}, {1.0, 0.0, 0.0}, @@ -376,6 +396,7 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score) EXPECT_EQ(h0, 0.0); EXPECT_EQ(dist_fun->to_rawscore(h0), 1.0); } + double d12 = dff.for_query_vector(t(points[1]))->calc(t(points[2])); EXPECT_EQ(d12, 3.0); EXPECT_DOUBLE_EQ(hamming->to_rawscore(d12), 1.0/(1.0 + 3.0)); @@ -579,6 +600,9 @@ TEST(DistanceFunctionsTest, transformed_mips_basic_scores) std::vector<double> p4{0.5, 0.5, sq_root_half}; std::vector<double> p5{0.0,-1.0, 0.0}; + MipsDistanceFunctionFactory<double> dff; + verifyInvalidQueryVector<double>(dff, 0.0); + double i12 = computeTransformedMipsChecked(t(p1), t(p2)); double i13 = computeTransformedMipsChecked(t(p1), t(p3)); double i23 = computeTransformedMipsChecked(t(p2), t(p3)); diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp index 07e490f4575..af99260979d 100644 --- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp @@ -31,7 +31,6 @@ public: double calc(TypedCells rhs) const noexcept override { size_t sz = _lhs.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); - assert(sz == rhs_vector.size()); auto a = _lhs.data(); auto b = rhs_vector.data(); double b_norm_sq = _computer.dotProduct(b, b, sz); diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h index 9dbd12650cb..7ff9448c5af 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h @@ -45,7 +45,7 @@ public: if (has_single_subspace) { auto cells = _attr_tensor.get_vector(docid, 0); double min_rawscore = _dist_fun->min_rawscore(); - if (cells.size == 0) [[unlikely]] { + if ( ! cells.valid() ) [[unlikely]] { return min_rawscore; } return std::max(min_rawscore, _dist_fun->to_rawscore(_dist_fun->calc(cells))); @@ -66,7 +66,7 @@ public: double calc_with_limit(uint32_t docid, double limit) const noexcept { if (has_single_subspace) { auto cells = _attr_tensor.get_vector(docid, 0); - if (cells.size == 0) [[unlikely]] { + if ( ! cells.valid() ) [[unlikely]] { return std::numeric_limits<double>::max(); } return _dist_fun->calc_with_limit(cells, limit); diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp index d581dbd129e..a7168b5eae6 100644 --- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp +++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp @@ -11,7 +11,7 @@ EmptySubspace::EmptySubspace(const SubspaceType& type) { _empty_space.resize(type.mem_size()); // Set size to zero to signal empty/invalid subspace - _cells = vespalib::eval::TypedCells(&_empty_space[0], type.cell_type(), 0); + _cells = vespalib::eval::TypedCells(_empty_space.data(), type.cell_type(), 0); } EmptySubspace::~EmptySubspace() = default; diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp index 6a730132ad1..3ab3a1123eb 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp @@ -31,12 +31,10 @@ public: _lhs_vector(_tmpSpace.storeLhs(lhs)) {} double calc(TypedCells rhs) const noexcept override { - size_t sz = _lhs_vector.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); - assert(sz == rhs_vector.size()); auto a = _lhs_vector.data(); auto b = rhs_vector.data(); - return _computer.squaredEuclideanDistance(cast(a), cast(b), sz); + return _computer.squaredEuclideanDistance(cast(a), cast(b), _lhs_vector.size()); } double convert_threshold(double threshold) const noexcept override { return threshold*threshold; diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp index 0be920b9c03..7f29a100492 100644 --- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp @@ -28,7 +28,6 @@ public: if constexpr (std::is_same<Int8Float, FloatType>::value) { return (double) vespalib::binary_hamming_distance(_lhs_vector.data(), rhs_vector.data(), sz); } else { - assert(sz == rhs_vector.size()); size_t sum = 0; for (size_t i = 0; i < sz; ++i) { sum += (_lhs_vector[i] == rhs_vector[i]) ? 0 : 1; diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp index 267f91bb4e0..4bc90001227 100644 --- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp +++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp @@ -29,12 +29,10 @@ public: } } double calc(TypedCells rhs) const noexcept override { - size_t sz = _lhs.size(); vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs); - assert(sz == rhs_vector.size()); auto a = _lhs.data(); auto b = rhs_vector.data(); - double dot_product = _computer.dotProduct(a, b, sz); + double dot_product = _computer.dotProduct(a, b, _lhs.size()); double distance = _lhs_norm_sq - dot_product; return distance; } diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp index b1018555212..4753e9d7c87 100644 --- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp +++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp @@ -38,7 +38,6 @@ struct ConvertCellsSelector template <typename FloatType> ConstArrayRef<FloatType> TemporaryVectorStore<FloatType>::internal_convert(TypedCells cells, size_t offset) noexcept { - assert(cells.size * 2 == _tmpSpace.size()); ArrayRef<FloatType> where(_tmpSpace.data() + offset, cells.size); using MyTypify = vespalib::eval::TypifyCellType; using MySelector = ConvertCellsSelector<FloatType>; diff --git a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h index 087c0f43b60..e8b65c5b6b2 100644 --- a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h +++ b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h @@ -39,7 +39,6 @@ public: ~VectorBundle() = default; uint32_t subspaces() const noexcept { return _subspaces; } vespalib::eval::TypedCells cells(uint32_t subspace) const noexcept { - assert(subspace < _subspaces); return {static_cast<const char*>(_data) + _subspace_mem_size * subspace, _cell_type, _subspace_size}; } }; |