aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-04-22 14:04:56 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-04-22 14:04:56 +0000
commit75c2cbf170ba2fbbff48ead39d806724d6c75815 (patch)
tree23f24c88c5b860d5a6b316ea53e1c0886b0901b4
parent8708c617ce8f4e837cb924a8c3ab39bb68555a3c (diff)
Test that distance calculation with invalid typed cells does not trigger assert.
-rw-r--r--searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp34
-rw-r--r--searchlib/src/vespa/searchlib/tensor/angular_distance.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_calculator.h4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/tensor/vector_bundle.h1
9 files changed, 34 insertions, 18 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
index 391e2d91d08..eeae12e1695 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
@@ -20,6 +20,16 @@ using search::attribute::DistanceMetric;
template <typename T>
TypedCells t(const std::vector<T> &v) { return TypedCells(v); }
+template<typename T>
+struct EmptyCells {
+ explicit EmptyCells(size_t elems) : _zero(elems, 0), cells(_zero) { cells.size = 0; }
+ std::vector<T> _zero;
+ TypedCells cells;
+};
+
+template <typename T>
+EmptyCells<T> e(size_t elems) { return EmptyCells<T>(elems); }
+
void verify_geo_miles(const std::vector<double> &p1,
const std::vector<double> &p2,
double exp_miles)
@@ -49,6 +59,15 @@ void verify_geo_miles(const std::vector<double> &p1,
}
}
+template<typename T>
+void verifyInvalidQueryVector(DistanceFunctionFactory & dff, double expected_distance_to_origo) {
+ std::vector<T> origo = {0,0,0};
+ EXPECT_FLOAT_EQ(expected_distance_to_origo, dff.for_query_vector(t(origo))->calc(e<double>(origo.size()).cells));
+ EXPECT_FLOAT_EQ(expected_distance_to_origo, dff.for_query_vector(t(origo))->calc(e<float>(origo.size()).cells));
+ EXPECT_FLOAT_EQ(expected_distance_to_origo, dff.for_query_vector(t(origo))->calc(e<Int8Float>(origo.size()).cells));
+ EXPECT_FLOAT_EQ(expected_distance_to_origo, dff.for_query_vector(t(origo))->calc(e<vespalib::BFloat16>(origo.size()).cells));
+}
+
double computeEuclideanChecked(TypedCells a, TypedCells b) {
static EuclideanDistanceFunctionFactory<Int8Float> i8f_dff;
static EuclideanDistanceFunctionFactory<float> flt_dff;
@@ -92,6 +111,7 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score)
EXPECT_EQ(d12, 2.0);
EuclideanDistanceFunctionFactory<double> dff;
+ verifyInvalidQueryVector<double>(dff, 0.0);
auto euclid = dff.for_query_vector(t(p0));
EXPECT_DOUBLE_EQ(euclid->to_rawscore(d12), 1.0/(1.0 + sqrt(2.0)));
double threshold = euclid->convert_threshold(8.0);
@@ -128,10 +148,7 @@ TEST(DistanceFunctionsTest, euclidean_gives_expected_score)
EXPECT_EQ(computeEuclideanChecked(t(p6), t(p6)), 0.0);
// smoke test for bfloat16:
- std::vector<vespalib::BFloat16> bf16v;
- bf16v.emplace_back(1.0);
- bf16v.emplace_back(1.0);
- bf16v.emplace_back(1.0);
+ std::vector<vespalib::BFloat16> bf16v{1.0, 1.0, 1.0};
EXPECT_EQ(computeEuclideanChecked(t(bf16v), t(p0)), 3.0);
EXPECT_EQ(computeEuclideanChecked(t(bf16v), t(p1)), 2.0);
EXPECT_EQ(computeEuclideanChecked(t(bf16v), t(p2)), 2.0);
@@ -188,6 +205,7 @@ TEST(DistanceFunctionsTest, angular_gives_expected_score)
AngularDistanceFunctionFactory<double> dff;
auto angular = dff.for_query_vector(t(p0));
+ verifyInvalidQueryVector<double>(dff, 1.0);
constexpr double pi = 3.14159265358979323846;
double a12 = computeAngularChecked(t(p1), t(p2));
double a13 = computeAngularChecked(t(p1), t(p3));
@@ -315,6 +333,7 @@ TEST(DistanceFunctionsTest, prenormalized_angular_gives_expected_score)
std::vector<double> p8{3.0, 0.0, 0.0};
PrenormalizedAngularDistanceFunctionFactory<double> dff;
+ verifyInvalidQueryVector<double>(dff, 1.0);
auto pnad = dff.for_query_vector(t(p0));
double i12 = computePrenormalizedAngularChecked(t(p1), t(p2));
@@ -360,7 +379,8 @@ TEST(DistanceFunctionsTest, prenormalized_angular_gives_expected_score)
TEST(DistanceFunctionsTest, hamming_gives_expected_score)
{
- static HammingDistanceFunctionFactory<double> dff;
+ HammingDistanceFunctionFactory<double> dff;
+ verifyInvalidQueryVector<double>(dff, 0.0);
std::vector<std::vector<double>>
points{{0.0, 0.0, 0.0},
{1.0, 0.0, 0.0},
@@ -376,6 +396,7 @@ TEST(DistanceFunctionsTest, hamming_gives_expected_score)
EXPECT_EQ(h0, 0.0);
EXPECT_EQ(dist_fun->to_rawscore(h0), 1.0);
}
+
double d12 = dff.for_query_vector(t(points[1]))->calc(t(points[2]));
EXPECT_EQ(d12, 3.0);
EXPECT_DOUBLE_EQ(hamming->to_rawscore(d12), 1.0/(1.0 + 3.0));
@@ -579,6 +600,9 @@ TEST(DistanceFunctionsTest, transformed_mips_basic_scores)
std::vector<double> p4{0.5, 0.5, sq_root_half};
std::vector<double> p5{0.0,-1.0, 0.0};
+ MipsDistanceFunctionFactory<double> dff;
+ verifyInvalidQueryVector<double>(dff, 0.0);
+
double i12 = computeTransformedMipsChecked(t(p1), t(p2));
double i13 = computeTransformedMipsChecked(t(p1), t(p3));
double i23 = computeTransformedMipsChecked(t(p2), t(p3));
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index 07e490f4575..af99260979d 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -31,7 +31,6 @@ public:
double calc(TypedCells rhs) const noexcept override {
size_t sz = _lhs.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
- assert(sz == rhs_vector.size());
auto a = _lhs.data();
auto b = rhs_vector.data();
double b_norm_sq = _computer.dotProduct(b, b, sz);
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
index 9dbd12650cb..7ff9448c5af 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_calculator.h
@@ -45,7 +45,7 @@ public:
if (has_single_subspace) {
auto cells = _attr_tensor.get_vector(docid, 0);
double min_rawscore = _dist_fun->min_rawscore();
- if (cells.size == 0) [[unlikely]] {
+ if ( ! cells.valid() ) [[unlikely]] {
return min_rawscore;
}
return std::max(min_rawscore, _dist_fun->to_rawscore(_dist_fun->calc(cells)));
@@ -66,7 +66,7 @@ public:
double calc_with_limit(uint32_t docid, double limit) const noexcept {
if (has_single_subspace) {
auto cells = _attr_tensor.get_vector(docid, 0);
- if (cells.size == 0) [[unlikely]] {
+ if ( ! cells.valid() ) [[unlikely]] {
return std::numeric_limits<double>::max();
}
return _dist_fun->calc_with_limit(cells, limit);
diff --git a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp
index d581dbd129e..a7168b5eae6 100644
--- a/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/empty_subspace.cpp
@@ -11,7 +11,7 @@ EmptySubspace::EmptySubspace(const SubspaceType& type)
{
_empty_space.resize(type.mem_size());
// Set size to zero to signal empty/invalid subspace
- _cells = vespalib::eval::TypedCells(&_empty_space[0], type.cell_type(), 0);
+ _cells = vespalib::eval::TypedCells(_empty_space.data(), type.cell_type(), 0);
}
EmptySubspace::~EmptySubspace() = default;
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index 6a730132ad1..3ab3a1123eb 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -31,12 +31,10 @@ public:
_lhs_vector(_tmpSpace.storeLhs(lhs))
{}
double calc(TypedCells rhs) const noexcept override {
- size_t sz = _lhs_vector.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
- assert(sz == rhs_vector.size());
auto a = _lhs_vector.data();
auto b = rhs_vector.data();
- return _computer.squaredEuclideanDistance(cast(a), cast(b), sz);
+ return _computer.squaredEuclideanDistance(cast(a), cast(b), _lhs_vector.size());
}
double convert_threshold(double threshold) const noexcept override {
return threshold*threshold;
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
index 0be920b9c03..7f29a100492 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -28,7 +28,6 @@ public:
if constexpr (std::is_same<Int8Float, FloatType>::value) {
return (double) vespalib::binary_hamming_distance(_lhs_vector.data(), rhs_vector.data(), sz);
} else {
- assert(sz == rhs_vector.size());
size_t sum = 0;
for (size_t i = 0; i < sz; ++i) {
sum += (_lhs_vector[i] == rhs_vector[i]) ? 0 : 1;
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
index 267f91bb4e0..4bc90001227 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
@@ -29,12 +29,10 @@ public:
}
}
double calc(TypedCells rhs) const noexcept override {
- size_t sz = _lhs.size();
vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
- assert(sz == rhs_vector.size());
auto a = _lhs.data();
auto b = rhs_vector.data();
- double dot_product = _computer.dotProduct(a, b, sz);
+ double dot_product = _computer.dotProduct(a, b, _lhs.size());
double distance = _lhs_norm_sq - dot_product;
return distance;
}
diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
index b1018555212..4753e9d7c87 100644
--- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.cpp
@@ -38,7 +38,6 @@ struct ConvertCellsSelector
template <typename FloatType>
ConstArrayRef<FloatType>
TemporaryVectorStore<FloatType>::internal_convert(TypedCells cells, size_t offset) noexcept {
- assert(cells.size * 2 == _tmpSpace.size());
ArrayRef<FloatType> where(_tmpSpace.data() + offset, cells.size);
using MyTypify = vespalib::eval::TypifyCellType;
using MySelector = ConvertCellsSelector<FloatType>;
diff --git a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
index 087c0f43b60..e8b65c5b6b2 100644
--- a/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
+++ b/searchlib/src/vespa/searchlib/tensor/vector_bundle.h
@@ -39,7 +39,6 @@ public:
~VectorBundle() = default;
uint32_t subspaces() const noexcept { return _subspaces; }
vespalib::eval::TypedCells cells(uint32_t subspace) const noexcept {
- assert(subspace < _subspaces);
return {static_cast<const char*>(_data) + _subspace_mem_size * subspace, _cell_type, _subspace_size};
}
};