Speed up insertion into HNSW index by referencing vector memory in more cases.

When inserting vectors into a HnswIndex we use DistanceFunctionFactory::for_insertion_vector() to create a distance function each time distance calculations are needed for a candidate vector. All these vectors are of the same type, as given by the tensor type of the TensorAttribute. The lifetime of vectors is also handled by them either being stored in the TensorAttribute, or existing in the document being inserted. This PR speeds up insertion for vectors of type int8, float and double. This is done by referencing the vector memory directly in the distance function, instead of copying (and transforming) into a tempory memory buffer. Vectors of type bfloat16 are still transformed to float before distance calculations.
author: Geir Storli <geirst@vespa.ai> 2024-06-11 13:48:16 +0000
committer: Geir Storli <geirst@vespa.ai> 2024-06-12 12:04:01 +0000
commit: f6c7a36de05296f6cc5ce0eb77fd4a017071303e (patch)
tree: e443300bffbf80021742be39a9f1445b2d906372
parent: 12150d28d136f5e3f2fae5acc551a5af0e7a9af3 (diff)
14 files changed, 251 insertions, 67 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp
index 14a0adac651..0cb75075f5e 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp
@@ -75,7 +75,11 @@ void benchmark(size_t iterations, size_t elems, const DistanceFunctionFactory &
 template<typename T>
 void benchmark(size_t iterations, size_t elems, const std::string & dist_functions) {
     if (dist_functions.find("euclid") != npos) {
-        benchmark<T>(iterations, elems, EuclideanDistanceFunctionFactory<T>());
+        if constexpr ( ! std::is_same<T, BFloat16>()) {
+            benchmark<T>(iterations, elems, EuclideanDistanceFunctionFactory<T>());
+        } else {
+            benchmark<BFloat16>(iterations, elems, EuclideanDistanceFunctionFactory<float>());
+        }
     }
     if (dist_functions.find("angular") != npos) {
         if constexpr ( ! std::is_same<T, BFloat16>()) {
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
index eeae12e1695..c0296548b5a 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_test.cpp
@@ -13,9 +13,11 @@
 LOG_SETUP("distance_function_test");
 
 using namespace search::tensor;
+using search::attribute::DistanceMetric;
+using vespalib::BFloat16;
+using vespalib::eval::CellType;
 using vespalib::eval::Int8Float;
 using vespalib::eval::TypedCells;
-using search::attribute::DistanceMetric;
 
 template <typename T>
 TypedCells t(const std::vector<T> &v) { return TypedCells(v); }
@@ -716,6 +718,73 @@ TEST(DistanceFunctionsTest, transformed_mips_growing_norm)
     EXPECT_GT(-29900.0, f->calc(t(p9d)));
 }
 
+template <typename FloatType>
+void
+expect_reference_insertion_vector(FloatType exp_dist, DistanceMetric metric, CellType cell_type)
+{
+    std::vector<FloatType> lhs{0.0, 1.0};
+    std::vector<FloatType> rhs{0.0, 1.0};
+    auto factory = make_distance_function_factory(metric, cell_type);
+    auto func = factory->for_insertion_vector(t(lhs));
+    // Updating the insertion vector should be reflected in the calculation.
+    lhs[0] = 1.0;
+    lhs[1] = 0.0;
+    EXPECT_EQ(exp_dist, func->calc(t(rhs)));
+}
+
+template <typename FloatType>
+void
+expect_not_reference_insertion_vector(FloatType exp_dist, DistanceMetric metric, CellType cell_type)
+{
+    std::vector<FloatType> lhs{1.0, 0.0};
+    std::vector<FloatType> rhs{0.0, 1.0};
+    auto factory = make_distance_function_factory(metric, cell_type);
+    auto func = factory->for_insertion_vector(t(lhs));
+    // Updating the insertion vector should NOT be reflected in the calculation, as a copy has been created.
+    lhs[0] = 0.0;
+    lhs[1] = 1.0;
+    EXPECT_EQ(exp_dist, func->calc(t(rhs)));
+}
+
+TEST(DistanceFunctionsTest, angular_can_reference_insertion_vector)
+{
+    expect_reference_insertion_vector<float>(1.0, DistanceMetric::Angular, CellType::FLOAT);
+    expect_reference_insertion_vector<double>(1.0, DistanceMetric::Angular, CellType::DOUBLE);
+    expect_reference_insertion_vector<Int8Float>(1.0, DistanceMetric::Angular, CellType::INT8);
+    expect_not_reference_insertion_vector<BFloat16>(1.0, DistanceMetric::Angular, CellType::BFLOAT16);
+}
+
+TEST(DistanceFunctionsTest, prenormalized_angular_can_reference_insertion_vector)
+{
+    expect_reference_insertion_vector<float>(1.0, DistanceMetric::PrenormalizedAngular, CellType::FLOAT);
+    expect_reference_insertion_vector<double>(1.0, DistanceMetric::PrenormalizedAngular, CellType::DOUBLE);
+    expect_reference_insertion_vector<Int8Float>(1.0, DistanceMetric::PrenormalizedAngular, CellType::INT8);
+    expect_not_reference_insertion_vector<BFloat16>(1.0, DistanceMetric::PrenormalizedAngular, CellType::BFLOAT16);
+}
+
+TEST(DistanceFunctionsTest, euclidean_can_reference_insertion_vector)
+{
+    expect_reference_insertion_vector<float>(2.0, DistanceMetric::Euclidean, CellType::FLOAT);
+    expect_reference_insertion_vector<double>(2.0, DistanceMetric::Euclidean, CellType::DOUBLE);
+    expect_reference_insertion_vector<Int8Float>(2.0, DistanceMetric::Euclidean, CellType::INT8);
+    expect_not_reference_insertion_vector<BFloat16>(2.0, DistanceMetric::Euclidean, CellType::BFLOAT16);
+}
+
+TEST(DistanceFunctionsTest, dotproduct_can_reference_insertion_vector)
+{
+    expect_reference_insertion_vector<float>(0.0, DistanceMetric::Dotproduct, CellType::FLOAT);
+    expect_reference_insertion_vector<double>(0.0, DistanceMetric::Dotproduct, CellType::DOUBLE);
+    expect_reference_insertion_vector<Int8Float>(0.0, DistanceMetric::Dotproduct, CellType::INT8);
+    expect_not_reference_insertion_vector<BFloat16>(0.0, DistanceMetric::Dotproduct, CellType::BFLOAT16);
+}
+
+TEST(DistanceFunctionsTest, hamming_can_reference_insertion_vector)
+{
+    expect_reference_insertion_vector<float>(2.0, DistanceMetric::Hamming, CellType::FLOAT);
+    expect_reference_insertion_vector<double>(2.0, DistanceMetric::Hamming, CellType::DOUBLE);
+    expect_reference_insertion_vector<Int8Float>(2.0, DistanceMetric::Hamming, CellType::INT8);
+    expect_not_reference_insertion_vector<BFloat16>(2.0, DistanceMetric::Hamming, CellType::BFLOAT16);
+}
 
 GTEST_MAIN_RUN_ALL_TESTS()
 
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index d1ebb1f4e4e..2f07fa4cdc7 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -13,11 +13,12 @@ using vespalib::eval::Int8Float;
 
 namespace search::tensor {
 
-template<typename FloatType>
+template <typename VectorStoreType>
 class BoundAngularDistance final : public BoundDistanceFunction {
 private:
+    using FloatType = VectorStoreType::FloatType;
     const vespalib::hwaccelrated::IAccelrated & _computer;
-    mutable TemporaryVectorStore<FloatType> _tmpSpace;
+    mutable VectorStoreType _tmpSpace;
     const vespalib::ConstArrayRef<FloatType> _lhs;
     double _lhs_norm_sq;
 public:
@@ -66,21 +67,30 @@ public:
     }
 };
 
-template class BoundAngularDistance<float>;
-template class BoundAngularDistance<double>;
+template class BoundAngularDistance<TemporaryVectorStore<float>>;
+template class BoundAngularDistance<TemporaryVectorStore<double>>;
+template class BoundAngularDistance<TemporaryVectorStore<Int8Float>>;
+template class BoundAngularDistance<ReferenceVectorStore<float>>;
+template class BoundAngularDistance<ReferenceVectorStore<double>>;
+template class BoundAngularDistance<ReferenceVectorStore<Int8Float>>;
 
 template <typename FloatType>
 BoundDistanceFunction::UP
 AngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
-    using DFT = BoundAngularDistance<FloatType>;
+    using DFT = BoundAngularDistance<TemporaryVectorStore<FloatType>>;
     return std::make_unique<DFT>(lhs);
 }
 
 template <typename FloatType>
 BoundDistanceFunction::UP
 AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
-    using DFT = BoundAngularDistance<FloatType>;
-    return std::make_unique<DFT>(lhs);
+    if (_reference_insertion_vector) {
+        using DFT = BoundAngularDistance<ReferenceVectorStore<FloatType>>;
+        return std::make_unique<DFT>(lhs);
+    } else {
+        using DFT = BoundAngularDistance<TemporaryVectorStore<FloatType>>;
+        return std::make_unique<DFT>(lhs);
+    }
 }
 
 template class AngularDistanceFunctionFactory<float>;
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.h b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
index aa51f58b3cd..7dcc6e80484 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.h
@@ -10,11 +10,19 @@ namespace search::tensor {
  * Calculates angular distance between vectors
  * Will use instruction optimal for the cpu it is running on
  * after converting both vectors to an optimal cell type.
+ *
+ * When reference_insertion_vector == true:
+ *   - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ *   - The TypedCells memory is just referenced and used directly in calculations,
+ *     and thus no transformation via a temporary memory buffer occurs.
  */
 template <typename FloatType>
 class AngularDistanceFunctionFactory : public DistanceFunctionFactory {
+private:
+    bool _reference_insertion_vector;
 public:
-    AngularDistanceFunctionFactory() = default;
+    AngularDistanceFunctionFactory() noexcept : AngularDistanceFunctionFactory(false) {}
+    AngularDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
     BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
     BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
 };
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index f39994dfdcf..b8918e23ce7 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -16,40 +16,44 @@ make_distance_function_factory(DistanceMetric variant, CellType cell_type)
     switch (variant) {
         case DistanceMetric::Angular:
             switch (cell_type) {
-                case CellType::DOUBLE: return std::make_unique<AngularDistanceFunctionFactory<double>>();
-                case CellType::INT8:   return std::make_unique<AngularDistanceFunctionFactory<Int8Float>>();
+                case CellType::DOUBLE: return std::make_unique<AngularDistanceFunctionFactory<double>>(true);
+                case CellType::INT8:   return std::make_unique<AngularDistanceFunctionFactory<Int8Float>>(true);
+                case CellType::FLOAT:  return std::make_unique<AngularDistanceFunctionFactory<float>>(true);
                 default:               return std::make_unique<AngularDistanceFunctionFactory<float>>();
             }
         case DistanceMetric::Euclidean:
             switch (cell_type) {
-                case CellType::DOUBLE:   return std::make_unique<EuclideanDistanceFunctionFactory<double>>();
-                case CellType::INT8:     return std::make_unique<EuclideanDistanceFunctionFactory<Int8Float>>();
-                case CellType::BFLOAT16: return std::make_unique<EuclideanDistanceFunctionFactory<vespalib::BFloat16>>();
+                case CellType::DOUBLE:   return std::make_unique<EuclideanDistanceFunctionFactory<double>>(true);
+                case CellType::INT8:     return std::make_unique<EuclideanDistanceFunctionFactory<Int8Float>>(true);
+                case CellType::FLOAT:    return std::make_unique<EuclideanDistanceFunctionFactory<float>>(true);
                 default:                 return std::make_unique<EuclideanDistanceFunctionFactory<float>>();
             }
         case DistanceMetric::InnerProduct:
         case DistanceMetric::PrenormalizedAngular:
             switch (cell_type) {
-                case CellType::DOUBLE: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<double>>();
-                case CellType::INT8:   return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<Int8Float>>();
-                default:               return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>();
+                case CellType::DOUBLE:   return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<double>>(true);
+                case CellType::INT8:     return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<Int8Float>>(true);
+                case CellType::FLOAT:    return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>(true);
+                default:                 return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>();
             }
         case DistanceMetric::Dotproduct:
             switch (cell_type) {
-                case CellType::DOUBLE: return std::make_unique<MipsDistanceFunctionFactory<double>>();
-                case CellType::INT8:   return std::make_unique<MipsDistanceFunctionFactory<Int8Float>>();
+                case CellType::DOUBLE: return std::make_unique<MipsDistanceFunctionFactory<double>>(true);
+                case CellType::INT8:   return std::make_unique<MipsDistanceFunctionFactory<Int8Float>>(true);
+                case CellType::FLOAT:  return std::make_unique<MipsDistanceFunctionFactory<float>>(true);
                 default:               return std::make_unique<MipsDistanceFunctionFactory<float>>();
             }
         case DistanceMetric::GeoDegrees:
             return std::make_unique<GeoDistanceFunctionFactory>();
         case DistanceMetric::Hamming:
             switch (cell_type) {
-                case CellType::DOUBLE: return std::make_unique<HammingDistanceFunctionFactory<double>>();
-                case CellType::INT8:   return std::make_unique<HammingDistanceFunctionFactory<Int8Float>>();
+                case CellType::DOUBLE: return std::make_unique<HammingDistanceFunctionFactory<double>>(true);
+                case CellType::INT8:   return std::make_unique<HammingDistanceFunctionFactory<Int8Float>>(true);
+                case CellType::FLOAT:  return std::make_unique<HammingDistanceFunctionFactory<float>>(true);
                 default:               return std::make_unique<HammingDistanceFunctionFactory<float>>();
             }
     }
-    // not reached:
+    // Not reached:
     return {};
 }
 
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index 62b92b43ad9..02da4f496af 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -12,14 +12,13 @@ using vespalib::eval::TypedCells;
 namespace search::tensor {
 
 using vespalib::eval::Int8Float;
-using vespalib::BFloat16;
 
-template<typename AttributeCellType>
+template <typename VectorStoreType>
 class BoundEuclideanDistance final : public BoundDistanceFunction {
-    using FloatType = std::conditional_t<std::is_same<AttributeCellType, BFloat16>::value, float, AttributeCellType>;
 private:
+    using FloatType = VectorStoreType::FloatType;
     const vespalib::hwaccelrated::IAccelrated & _computer;
-    mutable TemporaryVectorStore<FloatType> _tmpSpace;
+    mutable VectorStoreType _tmpSpace;
     const vespalib::ConstArrayRef<FloatType> _lhs_vector;
 public:
     explicit BoundEuclideanDistance(TypedCells lhs)
@@ -46,27 +45,33 @@ public:
     }
 };
 
-template class BoundEuclideanDistance<Int8Float>;
-template class BoundEuclideanDistance<BFloat16>;
-template class BoundEuclideanDistance<float>;
-template class BoundEuclideanDistance<double>;
+template class BoundEuclideanDistance<TemporaryVectorStore<Int8Float>>;
+template class BoundEuclideanDistance<TemporaryVectorStore<float>>;
+template class BoundEuclideanDistance<TemporaryVectorStore<double>>;
+template class BoundEuclideanDistance<ReferenceVectorStore<Int8Float>>;
+template class BoundEuclideanDistance<ReferenceVectorStore<float>>;
+template class BoundEuclideanDistance<ReferenceVectorStore<double>>;
 
 template <typename FloatType>
 BoundDistanceFunction::UP
 EuclideanDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
-    using DFT = BoundEuclideanDistance<FloatType>;
+    using DFT = BoundEuclideanDistance<TemporaryVectorStore<FloatType>>;
     return std::make_unique<DFT>(lhs);
 }
 
 template <typename FloatType>
 BoundDistanceFunction::UP
 EuclideanDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
-    using DFT = BoundEuclideanDistance<FloatType>;
-    return std::make_unique<DFT>(lhs);
+    if (_reference_insertion_vector) {
+        using DFT = BoundEuclideanDistance<ReferenceVectorStore<FloatType>>;
+        return std::make_unique<DFT>(lhs);
+    } else {
+        using DFT = BoundEuclideanDistance<TemporaryVectorStore<FloatType>>;
+        return std::make_unique<DFT>(lhs);
+    }
 }
 
 template class EuclideanDistanceFunctionFactory<Int8Float>;
-template class EuclideanDistanceFunctionFactory<BFloat16>;
 template class EuclideanDistanceFunctionFactory<float>;
 template class EuclideanDistanceFunctionFactory<double>;
 
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
index 78460c93307..bd82e48fb0b 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h
@@ -10,11 +10,19 @@ namespace search::tensor {
  * Calculates the square of the standard Euclidean distance.
  * Will use instruction optimal for the cpu it is running on
  * after converting both vectors to an optimal cell type.
+ *
+ * When reference_insertion_vector == true:
+ *   - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ *   - The TypedCells memory is just referenced and used directly in calculations,
+ *     and thus no transformation via a temporary memory buffer occurs.
  */
 template <typename FloatType>
 class EuclideanDistanceFunctionFactory : public DistanceFunctionFactory {
+private:
+    bool _reference_insertion_vector;
 public:
-    EuclideanDistanceFunctionFactory() noexcept = default;
+    EuclideanDistanceFunctionFactory() noexcept : EuclideanDistanceFunctionFactory(false) {}
+    EuclideanDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
     BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
     BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
 };
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
index 7ea2e440a51..281a20cef87 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.cpp
@@ -5,17 +5,18 @@
 #include <vespa/vespalib/util/binary_hamming_distance.h>
 
 using vespalib::typify_invoke;
-using vespalib::eval::TypifyCellType;
 using vespalib::eval::TypedCells;
+using vespalib::eval::TypifyCellType;
 
 namespace search::tensor {
 
 using vespalib::eval::Int8Float;
 
-template<typename FloatType>
+template <typename VectorStoreType>
 class BoundHammingDistance final : public BoundDistanceFunction {
 private:
-    mutable TemporaryVectorStore<FloatType> _tmpSpace;
+    using FloatType = VectorStoreType::FloatType;
+    mutable VectorStoreType _tmpSpace;
     const vespalib::ConstArrayRef<FloatType> _lhs_vector;
 public:
     explicit BoundHammingDistance(TypedCells lhs)
@@ -47,18 +48,30 @@ public:
     }
 };
 
+template class BoundHammingDistance<TemporaryVectorStore<Int8Float>>;
+template class BoundHammingDistance<TemporaryVectorStore<float>>;
+template class BoundHammingDistance<TemporaryVectorStore<double>>;
+template class BoundHammingDistance<ReferenceVectorStore<Int8Float>>;
+template class BoundHammingDistance<ReferenceVectorStore<float>>;
+template class BoundHammingDistance<ReferenceVectorStore<double>>;
+
 template <typename FloatType>
 BoundDistanceFunction::UP
 HammingDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
-    using DFT = BoundHammingDistance<FloatType>;
+    using DFT = BoundHammingDistance<TemporaryVectorStore<FloatType>>;
     return std::make_unique<DFT>(lhs);
 }
 
 template <typename FloatType>
 BoundDistanceFunction::UP
 HammingDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
-    using DFT = BoundHammingDistance<FloatType>;
-    return std::make_unique<DFT>(lhs);
+    if (_reference_insertion_vector) {
+        using DFT = BoundHammingDistance<ReferenceVectorStore<FloatType>>;
+        return std::make_unique<DFT>(lhs);
+    } else {
+        using DFT = BoundHammingDistance<TemporaryVectorStore<FloatType>>;
+        return std::make_unique<DFT>(lhs);
+    }
 }
 
 template class HammingDistanceFunctionFactory<Int8Float>;
diff --git a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
index 2e3b75cc61f..768665653c0 100644
--- a/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/hamming_distance.h
@@ -7,15 +7,22 @@
 namespace search::tensor {
 
 /**
- * Calculates the Hamming distance defined as
- * "number of cells where the values are different"
- * or (for int8 cells, aka binary data only)
- * "number of bits that are different"
+ * Calculates the Hamming distance defined as "number of cells where the values are different"
+ * or (for int8 cells, aka binary data only) "number of bits that are different".
+ *
+ * When reference_insertion_vector == true:
+ *   - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ *   - The TypedCells memory is referenced and used directly in calculations,
+ *     and thus no transformation via a temporary memory buffer occurs.
  */
 template <typename FloatType>
 class HammingDistanceFunctionFactory : public DistanceFunctionFactory {
+private:
+    bool _reference_insertion_vector;
 public:
-    HammingDistanceFunctionFactory() = default;
+    HammingDistanceFunctionFactory() noexcept : HammingDistanceFunctionFactory(false) {}
+    HammingDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
+
     BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
     BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
 };
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
index 5bc727ebd97..f3a60668141 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
@@ -10,9 +10,11 @@ using vespalib::eval::Int8Float;
 
 namespace search::tensor {
 
-template<typename FloatType, bool extra_dim>
+template <typename VectorStoreType, bool extra_dim>
 class BoundMipsDistanceFunction final : public BoundDistanceFunction {
-    mutable TemporaryVectorStore<FloatType> _tmpSpace;
+private:
+    using FloatType = VectorStoreType::FloatType;
+    mutable VectorStoreType _tmpSpace;
     const vespalib::ConstArrayRef<FloatType> _lhs_vector;
     const vespalib::hwaccelrated::IAccelrated & _computer;
     double _max_sq_norm;
@@ -47,7 +49,7 @@ public:
         double dp = _computer.dotProduct(cast(a), cast(b), rhs.size);
         if constexpr (extra_dim) {
             double rhs_sq_norm = _computer.dotProduct(cast(b), cast(b), rhs.size);
-	    // avoid sqrt(negative) for robustness:
+            // avoid sqrt(negative) for robustness:
             double diff = std::max(0.0, _max_sq_norm - rhs_sq_norm);
             double rhs_extra_dim = std::sqrt(diff);
             dp += _lhs_extra_dim * rhs_extra_dim;
@@ -74,13 +76,17 @@ public:
 template<typename FloatType>
 BoundDistanceFunction::UP
 MipsDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
-    return std::make_unique<BoundMipsDistanceFunction<FloatType, false>>(lhs, *_sq_norm_store);
+    return std::make_unique<BoundMipsDistanceFunction<TemporaryVectorStore<FloatType>, false>>(lhs, *_sq_norm_store);
 }
 
 template<typename FloatType>
 BoundDistanceFunction::UP
 MipsDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
-    return std::make_unique<BoundMipsDistanceFunction<FloatType, true>>(lhs, *_sq_norm_store);
+    if (_reference_insertion_vector) {
+        return std::make_unique<BoundMipsDistanceFunction<ReferenceVectorStore<FloatType>, true>>(lhs, *_sq_norm_store);
+    } else {
+        return std::make_unique<BoundMipsDistanceFunction<TemporaryVectorStore<FloatType>, true>>(lhs, *_sq_norm_store);
+    }
 };
 
 template class MipsDistanceFunctionFactory<Int8Float>;
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
index 336511ab78f..7b82661179f 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.h
@@ -55,11 +55,19 @@ public:
  * problem.  When inserting vectors, an extra dimension is
  * added ensuring behavior "as if" all vectors had length equal
  * to the longest vector inserted so far, or at least length 1.
+ *
+ * When reference_insertion_vector == true:
+ *   - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ *   - The TypedCells memory is just referenced and used directly in calculations,
+ *     and thus no transformation via a temporary memory buffer occurs.
  */
-template<typename FloatType>
+template <typename FloatType>
 class MipsDistanceFunctionFactory : public MipsDistanceFunctionFactoryBase {
+private:
+    bool _reference_insertion_vector;
 public:
-    MipsDistanceFunctionFactory() noexcept = default;
+    MipsDistanceFunctionFactory() noexcept : MipsDistanceFunctionFactory(false) {}
+    MipsDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
     ~MipsDistanceFunctionFactory() override = default;
 
     BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
index 6f0966e7fb3..7c7a660f7ec 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
@@ -4,17 +4,18 @@
 #include "temporary_vector_store.h"
 #include <vespa/vespalib/hwaccelrated/iaccelrated.h>
 
-using vespalib::typify_invoke;
-using vespalib::eval::TypifyCellType;
 using vespalib::eval::Int8Float;
+using vespalib::eval::TypifyCellType;
+using vespalib::typify_invoke;
 
 namespace search::tensor {
 
-template<typename FloatType>
+template <typename VectorStoreType>
 class BoundPrenormalizedAngularDistance final : public BoundDistanceFunction {
 private:
+    using FloatType = VectorStoreType::FloatType;
     const vespalib::hwaccelrated::IAccelrated & _computer;
-    mutable TemporaryVectorStore<FloatType> _tmpSpace;
+    mutable VectorStoreType _tmpSpace;
     const vespalib::ConstArrayRef<FloatType> _lhs;
     double _lhs_norm_sq;
 public:
@@ -46,7 +47,7 @@ public:
     double to_rawscore(double distance) const noexcept override {
         double dot_product = _lhs_norm_sq - distance;
         double cosine_similarity = dot_product / _lhs_norm_sq;
-        // should be in in range [-1,1] but roundoff may cause problems:
+        // should be in range [-1,1] but roundoff may cause problems:
         cosine_similarity = std::min(1.0, cosine_similarity);
         cosine_similarity = std::max(-1.0, cosine_similarity);
         double cosine_distance = 1.0 - cosine_similarity; // in range [0,2]
@@ -58,21 +59,30 @@ public:
     }
 };
 
-template class BoundPrenormalizedAngularDistance<float>;
-template class BoundPrenormalizedAngularDistance<double>;
+template class BoundPrenormalizedAngularDistance<TemporaryVectorStore<float>>;
+template class BoundPrenormalizedAngularDistance<TemporaryVectorStore<double>>;
+template class BoundPrenormalizedAngularDistance<TemporaryVectorStore<Int8Float>>;
+template class BoundPrenormalizedAngularDistance<ReferenceVectorStore<float>>;
+template class BoundPrenormalizedAngularDistance<ReferenceVectorStore<double>>;
+template class BoundPrenormalizedAngularDistance<ReferenceVectorStore<Int8Float>>;
 
 template <typename FloatType>
 BoundDistanceFunction::UP
 PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_query_vector(TypedCells lhs) const {
-    using DFT = BoundPrenormalizedAngularDistance<FloatType>;
+    using DFT = BoundPrenormalizedAngularDistance<TemporaryVectorStore<FloatType>>;
     return std::make_unique<DFT>(lhs);
 }
 
 template <typename FloatType>
 BoundDistanceFunction::UP
 PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs) const {
-    using DFT = BoundPrenormalizedAngularDistance<FloatType>;
-    return std::make_unique<DFT>(lhs);
+    if (_reference_insertion_vector) {
+        using DFT = BoundPrenormalizedAngularDistance<ReferenceVectorStore<FloatType>>;
+        return std::make_unique<DFT>(lhs);
+    } else {
+        using DFT = BoundPrenormalizedAngularDistance<TemporaryVectorStore<FloatType>>;
+        return std::make_unique<DFT>(lhs);
+    }
 }
 
 template class PrenormalizedAngularDistanceFunctionFactory<float>;
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
index 6a791e0b6ec..639138df574 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.h
@@ -9,11 +9,19 @@ namespace search::tensor {
 /**
  * Calculates inner-product "distance" between vectors assuming a common norm.
  * Should give same ordering as Angular distance, but is less expensive.
+ *
+ * When reference_insertion_vector == true:
+ *   - Vectors passed to for_insertion_vector() and BoundDistanceFunction::calc() are assumed to have the same type as FloatType.
+ *   - The TypedCells memory is just referenced and used directly in calculations,
+ *     and thus no transformation via a temporary memory buffer occurs.
  */
 template <typename FloatType>
 class PrenormalizedAngularDistanceFunctionFactory : public DistanceFunctionFactory {
+private:
+    bool _reference_insertion_vector;
 public:
-    PrenormalizedAngularDistanceFunctionFactory() = default;
+    PrenormalizedAngularDistanceFunctionFactory() noexcept : PrenormalizedAngularDistanceFunctionFactory(false) {}
+    PrenormalizedAngularDistanceFunctionFactory(bool reference_insertion_vector) noexcept : _reference_insertion_vector(reference_insertion_vector) {}
     BoundDistanceFunction::UP for_query_vector(TypedCells lhs) const override;
     BoundDistanceFunction::UP for_insertion_vector(TypedCells lhs) const override;
 };
diff --git a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h
index 3dc237c85a4..d6702d8278a 100644
--- a/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h
+++ b/searchlib/src/vespa/searchlib/tensor/temporary_vector_store.h
@@ -6,9 +6,13 @@
 
 namespace search::tensor {
 
-/** helper class - temporary storage of possibly-converted vector cells */
-template <typename FloatType>
+/**
+ * Helper class containing temporary memory storage for possibly converted vector cells.
+ */
+template <typename FloatTypeT>
 class TemporaryVectorStore {
+public:
+    using FloatType = FloatTypeT;
 private:
     using TypedCells = vespalib::eval::TypedCells;
     std::vector<FloatType> _tmpSpace;
@@ -27,4 +31,24 @@ public:
     }
 };
 
+/**
+ * Helper class used when TypedCells vector memory is just referenced,
+ * and used directly in calculations without any transforms.
+ */
+template <typename FloatTypeT>
+class ReferenceVectorStore {
+public:
+    using FloatType = FloatTypeT;
+private:
+    using TypedCells = vespalib::eval::TypedCells;
+public:
+    explicit ReferenceVectorStore(size_t vector_size) noexcept { (void) vector_size; }
+    vespalib::ConstArrayRef<FloatType> storeLhs(TypedCells cells) noexcept {
+        return cells.unsafe_typify<FloatType>();
+    }
+    vespalib::ConstArrayRef<FloatType> convertRhs(TypedCells cells) noexcept {
+        return cells.unsafe_typify<FloatType>();
+    }
+};
+
 }
author	Geir Storli <geirst@vespa.ai>	2024-06-11 13:48:16 +0000
committer	Geir Storli <geirst@vespa.ai>	2024-06-12 12:04:01 +0000
commit	f6c7a36de05296f6cc5ce0eb77fd4a017071303e (patch)
tree	e443300bffbf80021742be39a9f1445b2d906372
parent	12150d28d136f5e3f2fae5acc551a5af0e7a9af3 (diff)