summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/tensor/distance_functions.h58
-rw-r--r--vespalib/CMakeLists.txt1
-rw-r--r--vespalib/src/tests/hwaccelrated/CMakeLists.txt8
-rw-r--r--vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp40
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp13
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx.h18
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp10
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.h6
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp10
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.h2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp49
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp34
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.h2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp97
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp3
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp13
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/sse2.h18
20 files changed, 232 insertions, 174 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp
index 6b03b6a1c46..f50d2838d9a 100644
--- a/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp
+++ b/searchlib/src/vespa/searchlib/tensor/default_nearest_neighbor_index_factory.cpp
@@ -18,21 +18,12 @@ class LevelZeroGenerator : public RandomLevelGenerator {
};
DistanceFunction::UP
-make_distance_function(size_t vector_size, ValueType::CellType cell_type)
+make_distance_function(ValueType::CellType cell_type)
{
- bool hw_accelerated = (vector_size % 32) == 0;
- if (hw_accelerated) {
- if (cell_type == ValueType::CellType::FLOAT) {
- return std::make_unique<HwAccelSquaredEuclideanDistance<float, 32>>();
- } else {
- return std::make_unique<HwAccelSquaredEuclideanDistance<double, 32>>();
- }
+ if (cell_type == ValueType::CellType::FLOAT) {
+ return std::make_unique<SquaredEuclideanDistance<float>>();
} else {
- if (cell_type == ValueType::CellType::FLOAT) {
- return std::make_unique<SquaredEuclideanDistance<float>>();
- } else {
- return std::make_unique<SquaredEuclideanDistance<double>>();
- }
+ return std::make_unique<SquaredEuclideanDistance<double>>();
}
}
@@ -50,12 +41,13 @@ DefaultNearestNeighborIndexFactory::make(const DocVectorAccess& vectors,
vespalib::eval::ValueType::CellType cell_type,
const search::attribute::HnswIndexParams& params) const
{
+ (void) vector_size;
uint32_t m = params.max_links_per_node();
HnswIndex::Config cfg(m * 2,
m,
params.neighbors_to_explore_at_insert(),
true);
- return std::make_unique<HnswIndex>(vectors, make_distance_function(vector_size, cell_type), make_random_level_generator(m), cfg);
+ return std::make_unique<HnswIndex>(vectors, make_distance_function(cell_type), make_random_level_generator(m), cfg);
}
}
diff --git a/searchlib/src/vespa/searchlib/tensor/distance_functions.h b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
index 31f7b8fb1cb..09f538269d8 100644
--- a/searchlib/src/vespa/searchlib/tensor/distance_functions.h
+++ b/searchlib/src/vespa/searchlib/tensor/distance_functions.h
@@ -4,75 +4,31 @@
#include "distance_function.h"
#include <vespa/eval/tensor/dense/typed_cells.h>
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
namespace search::tensor {
/**
* Calculates the square of the standard Euclidean distance.
+ * Will use instruction optimal for the cpu it is running on.
*/
template <typename FloatType>
class SquaredEuclideanDistance : public DistanceFunction {
public:
+ SquaredEuclideanDistance()
+ : _computer(vespalib::hwaccelrated::IAccelrated::getAccelrator())
+ {}
double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override {
auto lhs_vector = lhs.typify<FloatType>();
auto rhs_vector = rhs.typify<FloatType>();
- double result = 0.0;
size_t sz = lhs_vector.size();
assert(sz == rhs_vector.size());
- for (size_t i = 0; i < sz; ++i) {
- double diff = lhs_vector[i] - rhs_vector[i];
- result += diff * diff;
- }
- return result;
+ return _computer->squaredEuclideanDistance(&lhs_vector[0], &rhs_vector[0], sz);
}
+ vespalib::hwaccelrated::IAccelrated::UP _computer;
};
template class SquaredEuclideanDistance<float>;
template class SquaredEuclideanDistance<double>;
-/**
- * Hardware accelerated calculation of the square of the standard Euclidean distance.
- *
- * Uses vector instructions to perform calculation on smaller vectors of size VectorSizeBytes.
- * Caller must ensure that the size of the tensor vector is a multiple of VectorSizeBytes.
- */
-template <typename FloatType, size_t VectorSizeBytes>
-class HwAccelSquaredEuclideanDistance : public DistanceFunction {
-public:
- double calc(const vespalib::tensor::TypedCells& lhs, const vespalib::tensor::TypedCells& rhs) const override {
- constexpr const size_t elems_per_vector = VectorSizeBytes / sizeof(FloatType);
- typedef FloatType VectorType __attribute__ ((vector_size (VectorSizeBytes), aligned(VectorSizeBytes)));
-
- auto lhs_vector = lhs.typify<FloatType>();
- auto rhs_vector = rhs.typify<FloatType>();
- size_t sz = lhs_vector.size();
- assert(sz == rhs_vector.size());
- // TODO: Handle remainder when tensor vector size is not a multiple of VectorSizeBytes.
- assert((sz % VectorSizeBytes) == 0);
-
- const auto* a = reinterpret_cast<const VectorType*>(lhs_vector.begin());
- const auto* b = reinterpret_cast<const VectorType*>(rhs_vector.begin());
-
- VectorType tmp_diff;
- VectorType tmp_squa;
- VectorType tmp_sum;
- memset(&tmp_sum, 0, sizeof(tmp_sum));
-
- const size_t num_ops = sz / elems_per_vector;
- for (size_t i = 0; i < num_ops; ++i) {
- tmp_diff = a[i] - b[i];
- tmp_squa = tmp_diff * tmp_diff;
- tmp_sum += tmp_squa;
- }
- double sum = 0;
- for (size_t i = 0; i < elems_per_vector; ++i) {
- sum += tmp_sum[i];
- }
- return sum;
- }
-};
-
-template class HwAccelSquaredEuclideanDistance<float, 32>;
-template class HwAccelSquaredEuclideanDistance<double, 32>;
-
}
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index 98d50bcefba..2675bc16bf2 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -58,6 +58,7 @@ vespa_define_module(
src/tests/gencnt
src/tests/guard
src/tests/host_name
+ src/tests/hwaccelrated
src/tests/io/fileutil
src/tests/io/mapped_file_input
src/tests/latch
diff --git a/vespalib/src/tests/hwaccelrated/CMakeLists.txt b/vespalib/src/tests/hwaccelrated/CMakeLists.txt
new file mode 100644
index 00000000000..49501341098
--- /dev/null
+++ b/vespalib/src/tests/hwaccelrated/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_hwaccelrated_test_app TEST
+ SOURCES
+ hwaccelrated_test.cpp
+ DEPENDS
+ vespalib
+)
+vespa_add_test(NAME vespalib_hwaccelrated_test_app COMMAND vespalib_hwaccelrated_test_app)
diff --git a/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp b/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp
new file mode 100644
index 00000000000..421f3f6f520
--- /dev/null
+++ b/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp
@@ -0,0 +1,40 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include <vespa/vespalib/hwaccelrated/generic.h>
+
+using namespace vespalib;
+
+template<typename T>
+std::vector<T> createAndFill(size_t sz) {
+ std::vector<T> v(sz);
+ for (size_t i(0); i < sz; i++) {
+ v[i] = rand()%500;
+ }
+ return v;
+}
+
+template<typename T>
+void verifyEuclideanDistance(const hwaccelrated::IAccelrated & accel) {
+ const size_t testLength(255);
+ srand(1);
+ std::vector<T> a = createAndFill<T>(testLength);
+ std::vector<T> b = createAndFill<T>(testLength);
+ for (size_t j(0); j < 0x20; j++) {
+ T sum(0);
+ for (size_t i(j); i < testLength; i++) {
+ sum += (a[i] - b[i]) * (a[i] - b[i]);
+ }
+ T hwComputedSum(accel.squaredEuclideanDistance(&a[j], &b[j], testLength - j));
+ EXPECT_EQUAL(sum, hwComputedSum);
+ }
+}
+
+TEST("test euclidean distance") {
+ hwaccelrated::GenericAccelrator genericAccelrator;
+ verifyEuclideanDistance<float>(genericAccelrator);
+ verifyEuclideanDistance<double >(genericAccelrator);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt b/vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
index f6b38028471..ac9d8d76074 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
@@ -3,8 +3,6 @@ vespa_add_library(vespalib_vespalib_hwaccelrated OBJECT
SOURCES
iaccelrated.cpp
generic.cpp
- sse2.cpp
- avx.cpp
avx2.cpp
avx512.cpp
DEPENDS
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
deleted file mode 100644
index ec6dc164323..00000000000
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "avx.h"
-#include "avxprivate.hpp"
-
-namespace vespalib::hwaccelrated {
-
-size_t
-AvxAccelrator::populationCount(const uint64_t *a, size_t sz) const {
- return helper::populationCount(a, sz);
-}
-
-}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx.h
deleted file mode 100644
index e7f090b4695..00000000000
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "sse2.h"
-
-namespace vespalib::hwaccelrated {
-
-/**
- * Avx-256 implementation.
- */
-class AvxAccelrator : public Sse2Accelrator
-{
-public:
- size_t populationCount(const uint64_t *a, size_t sz) const override;
-};
-
-}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
index f0d03a995e4..7ff393c87f8 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
@@ -10,4 +10,14 @@ Avx2Accelrator::populationCount(const uint64_t *a, size_t sz) const {
return helper::populationCount(a, sz);
}
+double
+Avx2Accelrator::squaredEuclideanDistance(const float * a, const float * b, size_t sz) const {
+ return avx::euclideanDistanceSelectAlignment<float, 32>(a, b, sz);
+}
+
+double
+Avx2Accelrator::squaredEuclideanDistance(const double * a, const double * b, size_t sz) const {
+ return avx::euclideanDistanceSelectAlignment<double, 32>(a, b, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
index 7e1784698f1..3e0dbb28110 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
@@ -2,17 +2,19 @@
#pragma once
-#include "avx.h"
+#include "generic.h"
namespace vespalib::hwaccelrated {
/**
* Avx-512 implementation.
*/
-class Avx2Accelrator : public AvxAccelrator
+class Avx2Accelrator : public GenericAccelrator
{
public:
size_t populationCount(const uint64_t *a, size_t sz) const override;
+ double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const override;
+ double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
index 1abf6b270cf..0941e6d6ad8 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
@@ -22,4 +22,14 @@ Avx512Accelrator::populationCount(const uint64_t *a, size_t sz) const {
return helper::populationCount(a, sz);
}
+double
+Avx512Accelrator::squaredEuclideanDistance(const float * a, const float * b, size_t sz) const {
+ return avx::euclideanDistanceSelectAlignment<float, 64>(a, b, sz);
+}
+
+double
+Avx512Accelrator::squaredEuclideanDistance(const double * a, const double * b, size_t sz) const {
+ return avx::euclideanDistanceSelectAlignment<double, 64>(a, b, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
index eac8c96832b..209ec06c857 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
@@ -15,6 +15,8 @@ public:
float dotProduct(const float * a, const float * b, size_t sz) const override;
double dotProduct(const double * a, const double * b, size_t sz) const override;
size_t populationCount(const uint64_t *a, size_t sz) const override;
+ double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const override;
+ double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
index 9e6a6d8817f..087729a23b2 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avxprivate.hpp
@@ -4,7 +4,6 @@
#include "private_helpers.hpp"
#include <vespa/fastos/dynamiclibrary.h>
-#include <cstring>
namespace vespalib::hwaccelrated::avx {
@@ -87,4 +86,52 @@ T dotProductSelectAlignment(const T * af, const T * bf, size_t sz)
}
}
+template <typename T, unsigned VLEN, unsigned AlignA, unsigned AlignB>
+double
+euclideanDistanceT(const T * af, const T * bf, size_t sz)
+{
+ constexpr unsigned VectorsPerChunk = 4;
+ constexpr unsigned ChunkSize = VLEN*VectorsPerChunk/sizeof(T);
+ typedef T V __attribute__ ((vector_size (VLEN)));
+ typedef T A __attribute__ ((vector_size (VLEN), aligned(AlignA)));
+ typedef T B __attribute__ ((vector_size (VLEN), aligned(AlignB)));
+ V partial[VectorsPerChunk];
+ memset(partial, 0, sizeof(partial));
+ const A * a = reinterpret_cast<const A *>(af);
+ const B * b = reinterpret_cast<const B *>(bf);
+
+ const size_t numChunks(sz/ChunkSize);
+ for (size_t i(0); i < numChunks; i++) {
+ for (size_t j(0); j < VectorsPerChunk; j++) {
+ partial[j] += (a[VectorsPerChunk*i+j] - b[VectorsPerChunk*i+j]) * (a[VectorsPerChunk*i+j] - b[VectorsPerChunk*i+j]);
+ }
+ }
+ double sum(0);
+ for (size_t i(numChunks*ChunkSize); i < sz; i++) {
+ sum += (af[i] - bf[i]) * (af[i] - bf[i]);
+ }
+ partial[0] = sumR<V, VectorsPerChunk>(partial);
+
+ return sum + sumT<T, V>(partial[0]);
+}
+
+template <typename T, unsigned VLEN>
+double euclideanDistanceSelectAlignment(const T * af, const T * bf, size_t sz)
+{
+ constexpr unsigned ALIGN = 32;
+ if (validAlignment(af, ALIGN)) {
+ if (validAlignment(bf, ALIGN)) {
+ return euclideanDistanceT<T, VLEN, ALIGN, ALIGN>(af, bf, sz);
+ } else {
+ return euclideanDistanceT<T, ALIGN, ALIGN, 1>(af, bf, sz);
+ }
+ } else {
+ if (validAlignment(bf, ALIGN)) {
+ return euclideanDistanceT<T, VLEN, 1, ALIGN>(af, bf, sz);
+ } else {
+ return euclideanDistanceT<T, VLEN, 1, 1>(af, bf, sz);
+ }
+ }
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
index 4e02ca371f0..f9684e88c63 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
@@ -32,6 +32,30 @@ multiplyAdd(const T * a, const T * b, size_t sz)
return sum;
}
+template <typename T, size_t UNROLL>
+double
+euclideanDistanceT(const T * a, const T * b, size_t sz)
+{
+ T partial[UNROLL];
+ for (size_t i(0); i < UNROLL; i++) {
+ partial[i] = 0;
+ }
+ size_t i(0);
+ for (; i + UNROLL <= sz; i += UNROLL) {
+ for (size_t j(0); j < UNROLL; j++) {
+ partial[j] += (a[i+j] - b[i+j]) * (a[i+j] - b[i+j]);
+ }
+ }
+ for (;i < sz; i++) {
+ partial[i%UNROLL] += (a[i] - b[i]) * (a[i] - b[i]);
+ }
+ double sum(0);
+ for (size_t j(0); j < UNROLL; j++) {
+ sum += partial[j];
+ }
+ return sum;
+}
+
template<size_t UNROLL, typename Operation>
void
bitOperation(Operation operation, void * aOrg, const void * bOrg, size_t bytes) {
@@ -131,4 +155,14 @@ GenericAccelrator::populationCount(const uint64_t *a, size_t sz) const {
return helper::populationCount(a, sz);
}
+double
+GenericAccelrator::squaredEuclideanDistance(const float * a, const float * b, size_t sz) const {
+ return euclideanDistanceT<float, 8>(a, b, sz);
+}
+
+double
+GenericAccelrator::squaredEuclideanDistance(const double * a, const double * b, size_t sz) const {
+ return euclideanDistanceT<double, 4>(a, b, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
index d76d0728bdd..50a3d59d49d 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
@@ -23,6 +23,8 @@ public:
void andNotBit(void * a, const void * b, size_t bytes) const override;
void notBit(void * a, size_t bytes) const override;
size_t populationCount(const uint64_t *a, size_t sz) const override;
+ double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const override;
+ double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
index 061963f95a4..86bd4d2c7b2 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
@@ -2,12 +2,11 @@
#include "iaccelrated.h"
#include "generic.h"
-#include "sse2.h"
-#include "avx.h"
#include "avx2.h"
#include "avx512.h"
#include <vespa/vespalib/util/memory.h>
#include <cstdio>
+#include <vector>
#include <vespa/log/log.h>
LOG_SETUP(".vespalib.hwaccelrated");
@@ -18,7 +17,7 @@ namespace {
class Factory {
public:
- virtual ~Factory() { }
+ virtual ~Factory() = default;
virtual IAccelrated::UP create() const = 0;
};
@@ -27,16 +26,6 @@ public:
IAccelrated::UP create() const override { return std::make_unique<GenericAccelrator>(); }
};
-class Sse2Factory :public Factory{
-public:
- IAccelrated::UP create() const override { return std::make_unique<Sse2Accelrator>(); }
-};
-
-class AvxFactory :public Factory{
-public:
- IAccelrated::UP create() const override { return std::make_unique<AvxAccelrator>(); }
-};
-
class Avx2Factory :public Factory{
public:
IAccelrated::UP create() const override { return std::make_unique<Avx2Accelrator>(); }
@@ -48,16 +37,25 @@ public:
};
template<typename T>
-void verifyAccelrator(const IAccelrated & accel)
+std::vector<T> createAndFill(size_t sz) {
+ std::vector<T> v(sz);
+ for (size_t i(0); i < sz; i++) {
+ v[i] = rand()%100;
+ }
+ return v;
+}
+
+template<typename T>
+void verifyDotproduct(const IAccelrated & accel)
{
- const size_t testLength(127);
- T * a = new T[testLength];
- T * b = new T[testLength];
+ const size_t testLength(255);
+ srand(1);
+ std::vector<T> a = createAndFill<T>(testLength);
+ std::vector<T> b = createAndFill<T>(testLength);
for (size_t j(0); j < 0x20; j++) {
T sum(0);
for (size_t i(j); i < testLength; i++) {
- a[i] = b[i] = i;
- sum += i*i;
+ sum += a[i]*b[i];
}
T hwComputedSum(accel.dotProduct(&a[j], &b[j], testLength - j));
if (sum != hwComputedSum) {
@@ -65,8 +63,25 @@ void verifyAccelrator(const IAccelrated & accel)
LOG_ABORT("should not be reached");
}
}
- delete [] a;
- delete [] b;
+}
+
+template<typename T>
+void verifyEuclidianDistance(const IAccelrated & accel) {
+ const size_t testLength(255);
+ srand(1);
+ std::vector<T> a = createAndFill<T>(testLength);
+ std::vector<T> b = createAndFill<T>(testLength);
+ for (size_t j(0); j < 0x20; j++) {
+ T sum(0);
+ for (size_t i(j); i < testLength; i++) {
+ sum += (a[i] - b[i]) * (a[i] - b[i]);
+ }
+ T hwComputedSum(accel.squaredEuclideanDistance(&a[j], &b[j], testLength - j));
+ if (sum != hwComputedSum) {
+ fprintf(stderr, "Accelrator is not computing euclidian distance correctly.\n");
+ LOG_ABORT("should not be reached");
+ }
+ }
}
void verifyPopulationCount(const IAccelrated & accel)
@@ -90,23 +105,25 @@ class RuntimeVerificator
{
public:
RuntimeVerificator();
+private:
+ void verify(IAccelrated & accelrated) {
+ verifyDotproduct<float>(accelrated);
+ verifyDotproduct<double>(accelrated);
+ verifyDotproduct<int32_t>(accelrated);
+ verifyDotproduct<int64_t>(accelrated);
+ verifyEuclidianDistance<float>(accelrated);
+ verifyEuclidianDistance<double>(accelrated);
+ verifyPopulationCount(accelrated);
+ }
};
RuntimeVerificator::RuntimeVerificator()
{
- GenericAccelrator generic;
- verifyAccelrator<float>(generic);
- verifyAccelrator<double>(generic);
- verifyAccelrator<int32_t>(generic);
- verifyAccelrator<int64_t>(generic);
- verifyPopulationCount(generic);
-
- IAccelrated::UP thisCpu(IAccelrated::getAccelrator());
- verifyAccelrator<float>(*thisCpu);
- verifyAccelrator<double>(*thisCpu);
- verifyAccelrator<int32_t>(*thisCpu);
- verifyAccelrator<int64_t>(*thisCpu);
-
+ GenericAccelrator generic;
+ verify(generic);
+
+ IAccelrated::UP thisCpu(IAccelrated::getAccelrator());
+ verify(*thisCpu);
}
class Selector
@@ -119,17 +136,15 @@ private:
};
Selector::Selector() :
- _factory(new GenericFactory())
+ _factory()
{
__builtin_cpu_init ();
if (__builtin_cpu_supports("avx512f")) {
- _factory.reset(new Avx512Factory());
+ _factory = std::make_unique<Avx512Factory>();
} else if (__builtin_cpu_supports("avx2")) {
- _factory.reset(new Avx2Factory());
- } else if (__builtin_cpu_supports("avx")) {
- _factory.reset(new AvxFactory());
- } else if (__builtin_cpu_supports("sse2")) {
- _factory.reset(new Sse2Factory());
+ _factory = std::make_unique<Avx2Factory>();
+ } else {
+ _factory = std::make_unique<GenericFactory>();
}
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
index 4031169c44d..ea7e268bc6f 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
@@ -27,6 +27,8 @@ public:
virtual void andNotBit(void * a, const void * b, size_t bytes) const = 0;
virtual void notBit(void * a, size_t bytes) const = 0;
virtual size_t populationCount(const uint64_t *a, size_t sz) const = 0;
+ virtual double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const = 0;
+ virtual double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const = 0;
static IAccelrated::UP getAccelrator() __attribute__((noinline));
};
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
index 8eba313d5f1..f5daf2b9081 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
@@ -3,6 +3,7 @@
#pragma once
#include <vespa/vespalib/util/optimized.h>
+#include <cstring>
namespace vespalib::hwaccelrated::helper {
namespace {
@@ -24,4 +25,4 @@ populationCount(const uint64_t *a, size_t sz) {
}
}
-} \ No newline at end of file
+}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
deleted file mode 100644
index 64a26d49f2b..00000000000
--- a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "sse2.h"
-#include "private_helpers.hpp"
-
-namespace vespalib::hwaccelrated {
-
-size_t
-Sse2Accelrator::populationCount(const uint64_t *a, size_t sz) const {
- return helper::populationCount(a, sz);
-}
-
-}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
deleted file mode 100644
index 0b2462423b1..00000000000
--- a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "generic.h"
-
-namespace vespalib::hwaccelrated {
-
-/**
- * Generic cpu agnostic implementation.
- */
-class Sse2Accelrator : public GenericAccelrator
-{
-public:
- size_t populationCount(const uint64_t *a, size_t sz) const override;
-};
-
-}