diff options
author | Geir Storli <geirst@verizonmedia.com> | 2020-03-06 17:24:48 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-03-06 17:24:48 +0100 |
commit | 318e9fa758f7bef5ad171edd2b1e9da4b5328b16 (patch) | |
tree | c82f2c106865e387128f04b79d5c20086f849285 /searchlib | |
parent | afbef0a2c9960ed651e3532c8b235f189084dcf5 (diff) | |
parent | 6ea5dfbf96ffd57307024387538e16f5a54548c3 (diff) |
Merge pull request #12488 from vespa-engine/geirst/hw-accel-euclidean-distance-function
First version of hardware accelerated calculation of the square of th…
Diffstat (limited to 'searchlib')
6 files changed, 69 insertions, 8 deletions
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp index 3ef0ab20cdd..fef063fb1cf 100644 --- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -148,8 +148,10 @@ public: class MockNearestNeighborIndexFactory : public NearestNeighborIndexFactory { std::unique_ptr<NearestNeighborIndex> make(const DocVectorAccess& vectors, + size_t vector_size, ValueType::CellType cell_type, const search::attribute::HnswIndexParams& params) const override { + (void) vector_size; (void) params; assert(cell_type == ValueType::CellType::DOUBLE); return std::make_unique<MockNearestNeighborIndex>(vectors); diff --git a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp index 8b0c5e931a4..6b03b6a1c46 100644 --- a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp +++ b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp @@ -18,12 +18,21 @@ class LevelZeroGenerator : public RandomLevelGenerator { }; DistanceFunction::UP -make_distance_function(ValueType::CellType cell_type) +make_distance_function(size_t vector_size, ValueType::CellType cell_type) { - if (cell_type == ValueType::CellType::FLOAT) { - return std::make_unique<SquaredEuclideanDistance<float>>(); + bool hw_accelerated = (vector_size % 32) == 0; + if (hw_accelerated) { + if (cell_type == ValueType::CellType::FLOAT) { + return std::make_unique<HwAccelSquaredEuclideanDistance<float, 32>>(); + } else { + return std::make_unique<HwAccelSquaredEuclideanDistance<double, 32>>(); + } } else { - return std::make_unique<SquaredEuclideanDistance<double>>(); + if (cell_type == ValueType::CellType::FLOAT) { + return std::make_unique<SquaredEuclideanDistance<float>>(); + } else { + return std::make_unique<SquaredEuclideanDistance<double>>(); + } } } @@ -37,6 +46,7 @@ make_random_level_generator(uint32_t m) std::unique_ptr<NearestNeighborIndex> DefaultNearestNeighborIndexFactory::make(const DocVectorAccess& vectors, + size_t vector_size, vespalib::eval::ValueType::CellType cell_type, const search::attribute::HnswIndexParams& params) const { @@ -45,7 +55,7 @@ DefaultNearestNeighborIndexFactory::make(const DocVectorAccess& vectors, m, params.neighbors_to_explore_at_insert(), true); - return std::make_unique<HnswIndex>(vectors, make_distance_function(cell_type), make_random_level_generator(m), cfg); + return std::make_unique<HnswIndex>(vectors, make_distance_function(vector_size, cell_type), make_random_level_generator(m), cfg); } } diff --git a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h index ea784efdb51..6a9ded92b60 100644 --- a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h +++ b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.h @@ -12,6 +12,7 @@ namespace search::tensor { class DefaultNearestNeighborIndexFactory : public NearestNeighborIndexFactory { public: std::unique_ptr<NearestNeighborIndex> make(const DocVectorAccess& vectors, + size_t vector_size, vespalib::eval::ValueType::CellType cell_type, const search::attribute::HnswIndexParams& params) const override; }; diff --git a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp index 1ee54256a03..54a5d31ea06 100644 --- a/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp +++ b/searchlib/src/vespa/searchlib/tensor/dense_tensor_attribute.cpp @@ -71,7 +71,11 @@ DenseTensorAttribute::DenseTensorAttribute(vespalib::stringref baseFileName, con _index() { if (cfg.hnsw_index_params().has_value()) { - _index = index_factory.make(*this, cfg.tensorType().cell_type(), cfg.hnsw_index_params().value()); + auto tensor_type = cfg.tensorType(); + assert(tensor_type.dimensions().size() == 1); + assert(tensor_type.is_dense()); + size_t vector_size = tensor_type.dimensions()[0].size; + _index = index_factory.make(*this, vector_size, tensor_type.cell_type(), cfg.hnsw_index_params().value()); } } diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h index 494d1a859b6..31f7b8fb1cb 100644 --- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h +++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h @@ -7,8 +7,6 @@ namespace search::tensor { -// TODO: Make distance functions hardware optimized. - /** * Calculates the square of the standard Euclidean distance. */ @@ -32,4 +30,49 @@ public: template class SquaredEuclideanDistance<float>; template class SquaredEuclideanDistance<double>; +/** + * Hardware accelerated calculation of the square of the standard Euclidean distance. + * + * Uses vector instructions to perform calculation on smaller vectors of size VectorSizeBytes. + * Caller must ensure that the size of the tensor vector is a multiple of VectorSizeBytes. + */ +template <typename FloatType, size_t VectorSizeBytes> +class HwAccelSquaredEuclideanDistance : public DistanceFunction { +public: + double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override { + constexpr const size_t elems_per_vector = VectorSizeBytes / sizeof(FloatType); + typedef FloatType VectorType __attribute__ ((vector_size (VectorSizeBytes), aligned(VectorSizeBytes))); + + auto lhs_vector = lhs.typify<FloatType>(); + auto rhs_vector = rhs.typify<FloatType>(); + size_t sz = lhs_vector.size(); + assert(sz == rhs_vector.size()); + // TODO: Handle remainder when tensor vector size is not a multiple of VectorSizeBytes. + assert((sz % VectorSizeBytes) == 0); + + const auto* a = reinterpret_cast<const VectorType*>(lhs_vector.begin()); + const auto* b = reinterpret_cast<const VectorType*>(rhs_vector.begin()); + + VectorType tmp_diff; + VectorType tmp_squa; + VectorType tmp_sum; + memset(&tmp_sum, 0, sizeof(tmp_sum)); + + const size_t num_ops = sz / elems_per_vector; + for (size_t i = 0; i < num_ops; ++i) { + tmp_diff = a[i] - b[i]; + tmp_squa = tmp_diff * tmp_diff; + tmp_sum += tmp_squa; + } + double sum = 0; + for (size_t i = 0; i < elems_per_vector; ++i) { + sum += tmp_sum[i]; + } + return sum; + } +}; + +template class HwAccelSquaredEuclideanDistance<float, 32>; +template class HwAccelSquaredEuclideanDistance<double, 32>; + } diff --git a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_factory.h b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_factory.h index c09403df5e0..089119944a7 100644 --- a/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_factory.h +++ b/searchlib/src/vespa/searchlib/tensor/nearest_neighbor_index_factory.h @@ -19,6 +19,7 @@ class NearestNeighborIndexFactory { public: virtual ~NearestNeighborIndexFactory() {} virtual std::unique_ptr<NearestNeighborIndex> make(const DocVectorAccess& vectors, + size_t vector_size, vespalib::eval::ValueType::CellType cell_type, const search::attribute::HnswIndexParams& params) const = 0; }; |