Merge pull request #31210 from vespa-engine/balder/faster-int8float

Speed up dotproduct for int8.
author: Henning Baldersheim <balder@yahoo-inc.com> 2024-05-16 11:00:37 +0200
committer: GitHub <noreply@github.com> 2024-05-16 11:00:37 +0200
commit: ea3e8e1c9187315fe9cc33bf936ff2af01220d68 (patch)
tree: 6d22501424ad485ba35010ee42271b499a9a17d9 /searchlib
parent: dac725c451f14ae6377de9c560490ef16135266c (diff)
parent: 329a72cc2673bf770f0b63a6ab90986ec76459ca (diff)
7 files changed, 29 insertions, 23 deletions
diff --git a/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp
index 15d6040a11a..04a2fa1cf2f 100644
--- a/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp
+++ b/searchlib/src/tests/tensor/distance_functions/distance_functions_benchmark.cpp
@@ -58,12 +58,12 @@ void benchmark(size_t iterations, size_t elems) __attribute_noinline__;
 template<typename T>
 void benchmark(size_t iterations, size_t elems, const DistanceFunctionFactory & df) {
     std::vector<T> av, bv;
-    srand(7);
+    srandom(7);
     av.reserve(elems);
     bv.reserve(elems);
     for (size_t i(0); i < elems; i++) {
-        av.push_back(rand());
-        bv.push_back(rand());
+        av.push_back(random()%128);
+        bv.push_back(random()%128);
     }
     TypedCells a_cells(av), b_cells(bv);
 
@@ -78,17 +78,17 @@ void benchmark(size_t iterations, size_t elems, const std::string & dist_functio
         benchmark<T>(iterations, elems, EuclideanDistanceFunctionFactory<T>());
     }
     if (dist_functions.find("angular") != npos) {
-        if (std::is_same<T, double>() || std::is_same<T, float>()) {
+        if ( ! std::is_same<T, BFloat16>()) {
             benchmark<T>(iterations, elems, AngularDistanceFunctionFactory<T>());
         }
     }
     if (dist_functions.find("prenorm") != npos) {
-        if (std::is_same<T, double>() || std::is_same<T, float>()) {
+        if ( ! std::is_same<T, BFloat16>()) {
             benchmark<T>(iterations, elems, PrenormalizedAngularDistanceFunctionFactory<T>());
         }
     }
     if (dist_functions.find("mips") != npos) {
-        if (std::is_same<T, double>() || std::is_same<T, float>() || std::is_same<T, Int8Float>()) {
+        if ( !std::is_same<T, BFloat16>()) {
             benchmark<T>(iterations, elems, MipsDistanceFunctionFactory<T>());
         }
     }
diff --git a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
index ef40381c807..d1ebb1f4e4e 100644
--- a/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/angular_distance.cpp
@@ -9,6 +9,7 @@
 using vespalib::typify_invoke;
 using vespalib::eval::TypifyCellType;
 using vespalib::eval::TypedCells;
+using vespalib::eval::Int8Float;
 
 namespace search::tensor {
 
@@ -26,16 +27,16 @@ public:
           _lhs(_tmpSpace.storeLhs(lhs))
     {
         auto a = _lhs.data();
-        _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
+        _lhs_norm_sq = _computer.dotProduct(cast(a), cast(a), lhs.size);
     }
     double calc(TypedCells rhs) const noexcept override {
         size_t sz = _lhs.size();
         vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
         auto a = _lhs.data();
         auto b = rhs_vector.data();
-        double b_norm_sq = _computer.dotProduct(b, b, sz);
+        double b_norm_sq = _computer.dotProduct(cast(b), cast(b), sz);
         double squared_norms = _lhs_norm_sq * b_norm_sq;
-        double dot_product = _computer.dotProduct(a, b, sz);
+        double dot_product = _computer.dotProduct(cast(a), cast(b), sz);
         double div = (squared_norms > 0) ? sqrt(squared_norms) : 1.0;
         double cosine_similarity = dot_product / div;
         double distance = 1.0 - cosine_similarity; // in range [0,2]
@@ -84,5 +85,6 @@ AngularDistanceFunctionFactory<FloatType>::for_insertion_vector(TypedCells lhs)
 
 template class AngularDistanceFunctionFactory<float>;
 template class AngularDistanceFunctionFactory<double>;
+template class AngularDistanceFunctionFactory<Int8Float>;
 
 }
diff --git a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
index 85089196a7a..318271835ad 100644
--- a/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
+++ b/searchlib/src/vespa/searchlib/tensor/bound_distance_function.h
@@ -19,6 +19,7 @@ class BoundDistanceFunction : public DistanceConverter {
 public:
     using UP = std::unique_ptr<BoundDistanceFunction>;
     using TypedCells = vespalib::eval::TypedCells;
+    using Int8Float = vespalib::eval::Int8Float;
 
     BoundDistanceFunction() noexcept = default;
 
@@ -29,6 +30,10 @@ public:
 
     // calculate internal distance, early return allowed if > limit
     virtual double calc_with_limit(TypedCells rhs, double limit) const noexcept = 0;
+protected:
+    static const double *cast(const double * p) { return p; }
+    static const float *cast(const float * p) { return p; }
+    static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
 };
 
 }
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
index ed08df5866e..f39994dfdcf 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/distance_function_factory.cpp
@@ -6,6 +6,7 @@
 
 using search::attribute::DistanceMetric;
 using vespalib::eval::CellType;
+using vespalib::eval::Int8Float;
 
 namespace search::tensor {
 
@@ -16,25 +17,27 @@ make_distance_function_factory(DistanceMetric variant, CellType cell_type)
         case DistanceMetric::Angular:
             switch (cell_type) {
                 case CellType::DOUBLE: return std::make_unique<AngularDistanceFunctionFactory<double>>();
+                case CellType::INT8:   return std::make_unique<AngularDistanceFunctionFactory<Int8Float>>();
                 default:               return std::make_unique<AngularDistanceFunctionFactory<float>>();
             }
         case DistanceMetric::Euclidean:
             switch (cell_type) {
-                case CellType::DOUBLE: return std::make_unique<EuclideanDistanceFunctionFactory<double>>();
-                case CellType::INT8:   return std::make_unique<EuclideanDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+                case CellType::DOUBLE:   return std::make_unique<EuclideanDistanceFunctionFactory<double>>();
+                case CellType::INT8:     return std::make_unique<EuclideanDistanceFunctionFactory<Int8Float>>();
                 case CellType::BFLOAT16: return std::make_unique<EuclideanDistanceFunctionFactory<vespalib::BFloat16>>();
-                default:               return std::make_unique<EuclideanDistanceFunctionFactory<float>>();
+                default:                 return std::make_unique<EuclideanDistanceFunctionFactory<float>>();
             }
         case DistanceMetric::InnerProduct:
         case DistanceMetric::PrenormalizedAngular:
             switch (cell_type) {
                 case CellType::DOUBLE: return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<double>>();
+                case CellType::INT8:   return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<Int8Float>>();
                 default:               return std::make_unique<PrenormalizedAngularDistanceFunctionFactory<float>>();
             }
         case DistanceMetric::Dotproduct:
             switch (cell_type) {
                 case CellType::DOUBLE: return std::make_unique<MipsDistanceFunctionFactory<double>>();
-                case CellType::INT8:   return std::make_unique<MipsDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+                case CellType::INT8:   return std::make_unique<MipsDistanceFunctionFactory<Int8Float>>();
                 default:               return std::make_unique<MipsDistanceFunctionFactory<float>>();
             }
         case DistanceMetric::GeoDegrees:
@@ -42,7 +45,7 @@ make_distance_function_factory(DistanceMetric variant, CellType cell_type)
         case DistanceMetric::Hamming:
             switch (cell_type) {
                 case CellType::DOUBLE: return std::make_unique<HammingDistanceFunctionFactory<double>>();
-                case CellType::INT8:   return std::make_unique<HammingDistanceFunctionFactory<vespalib::eval::Int8Float>>();
+                case CellType::INT8:   return std::make_unique<HammingDistanceFunctionFactory<Int8Float>>();
                 default:               return std::make_unique<HammingDistanceFunctionFactory<float>>();
             }
     }
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
index 355110b2f90..62b92b43ad9 100644
--- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.cpp
@@ -16,14 +16,11 @@ using vespalib::BFloat16;
 
 template<typename AttributeCellType>
 class BoundEuclideanDistance final : public BoundDistanceFunction {
-    using FloatType = std::conditional_t<std::is_same<AttributeCellType,BFloat16>::value,float,AttributeCellType>;
+    using FloatType = std::conditional_t<std::is_same<AttributeCellType, BFloat16>::value, float, AttributeCellType>;
 private:
     const vespalib::hwaccelrated::IAccelrated & _computer;
     mutable TemporaryVectorStore<FloatType> _tmpSpace;
     const vespalib::ConstArrayRef<FloatType> _lhs_vector;
-    static const double *cast(const double * p) { return p; }
-    static const float *cast(const float * p) { return p; }
-    static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
 public:
     explicit BoundEuclideanDistance(TypedCells lhs)
         : _computer(vespalib::hwaccelrated::IAccelrated::getAccelerator()),
diff --git a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
index fa47187fec9..5bc727ebd97 100644
--- a/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/mips_distance_transform.cpp
@@ -19,9 +19,6 @@ class BoundMipsDistanceFunction final : public BoundDistanceFunction {
     using ExtraDimT = std::conditional_t<extra_dim,double,std::monostate>;
     [[no_unique_address]] ExtraDimT _lhs_extra_dim;
 
-    static const double *cast(const double * p) { return p; }
-    static const float *cast(const float * p) { return p; }
-    static const int8_t *cast(const Int8Float * p) { return reinterpret_cast<const int8_t *>(p); }
 public:
     BoundMipsDistanceFunction(TypedCells lhs, MaximumSquaredNormStore& sq_norm_store)
         : BoundDistanceFunction(),
diff --git a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
index 58e92cbe2d4..6f0966e7fb3 100644
--- a/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/prenormalized_angular_distance.cpp
@@ -6,6 +6,7 @@
 
 using vespalib::typify_invoke;
 using vespalib::eval::TypifyCellType;
+using vespalib::eval::Int8Float;
 
 namespace search::tensor {
 
@@ -23,7 +24,7 @@ public:
           _lhs(_tmpSpace.storeLhs(lhs))
     {
         auto a = _lhs.data();
-        _lhs_norm_sq = _computer.dotProduct(a, a, lhs.size);
+        _lhs_norm_sq = _computer.dotProduct(cast(a), cast(a), lhs.size);
         if (_lhs_norm_sq <= 0.0) {
             _lhs_norm_sq = 1.0;
         }
@@ -32,7 +33,7 @@ public:
         vespalib::ConstArrayRef<FloatType> rhs_vector = _tmpSpace.convertRhs(rhs);
         auto a = _lhs.data();
         auto b = rhs_vector.data();
-        double dot_product = _computer.dotProduct(a, b, _lhs.size());
+        double dot_product = _computer.dotProduct(cast(a), cast(b), _lhs.size());
         double distance = _lhs_norm_sq - dot_product;
         return distance;
     }
@@ -76,5 +77,6 @@ PrenormalizedAngularDistanceFunctionFactory<FloatType>::for_insertion_vector(Typ
 
 template class PrenormalizedAngularDistanceFunctionFactory<float>;
 template class PrenormalizedAngularDistanceFunctionFactory<double>;
+template class PrenormalizedAngularDistanceFunctionFactory<Int8Float>;
 
 }
author	Henning Baldersheim <balder@yahoo-inc.com>	2024-05-16 11:00:37 +0200
committer	GitHub <noreply@github.com>	2024-05-16 11:00:37 +0200
commit	ea3e8e1c9187315fe9cc33bf936ff2af01220d68 (patch)
tree	6d22501424ad485ba35010ee42271b499a9a17d9 /searchlib
parent	dac725c451f14ae6377de9c560490ef16135266c (diff)
parent	329a72cc2673bf770f0b63a6ab90986ec76459ca (diff)