summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-03-10 10:30:47 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-03-10 10:30:47 +0000
commit73b16bba68712e47d4f820a67e2c0e7d74d9eb1b (patch)
tree311eca76557cf3243011747a913e1a81f9c1789b /vespalib
parent4ed4ccb035ed13d83c976437314a1f99715afadd (diff)
Add a euclidian distance that is optimal for avx, avx2 and avx512.
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/CMakeLists.txt1
-rw-r--r--vespalib/src/tests/hwaccelrated/CMakeLists.txt8
-rw-r--r--vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp39
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp13
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx.h18
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp10
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.h6
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp10
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.h2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp10
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.h2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp93
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp26
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp13
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/sse2.h18
17 files changed, 166 insertions, 107 deletions
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index 3530fb816df..2129649f81c 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -58,6 +58,7 @@ vespa_define_module(
src/tests/gencnt
src/tests/guard
src/tests/host_name
+ src/tests/hwaccelrated
src/tests/io/fileutil
src/tests/io/mapped_file_input
src/tests/latch
diff --git a/vespalib/src/tests/hwaccelrated/CMakeLists.txt b/vespalib/src/tests/hwaccelrated/CMakeLists.txt
new file mode 100644
index 00000000000..9dda949d088
--- /dev/null
+++ b/vespalib/src/tests/hwaccelrated/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_hwaccelrated_test_app TEST
+ SOURCES
+ hwaccelrated_test.cpp
+ DEPENDS
+ vespalib
+)
+vespa_add_test(NAME vespalib_hwaccelrated_test_app COMMAND vespalib_hwaccelrated_test_app)
diff --git a/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp b/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp
new file mode 100644
index 00000000000..757626ead94
--- /dev/null
+++ b/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp
@@ -0,0 +1,39 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
+#include <vespa/vespalib/hwaccelrated/generic.h>
+
+using namespace vespalib;
+
+template<typename T>
+std::vector<T> createAndFill(size_t sz) {
+ std::vector<T> v(sz);
+ for (size_t i(0); i < sz; i++) {
+ v[i] = i;
+ }
+ return v;
+}
+
+template<typename T>
+void verifyEuclidianDistance(const hwaccelrated::IAccelrated & accel) {
+ const size_t testLength(255);
+ std::vector<T> a = createAndFill<T>(testLength);
+ std::vector<T> b = createAndFill<T>(testLength);
+ for (size_t j(0); j < 0x20; j++) {
+ T sum(0);
+ for (size_t i(j); i < testLength; i++) {
+ sum += (a[i] - b[i]) * (a[i] - b[i]);
+ }
+ T hwComputedSum(accel.squaredEuclidianDistance(&a[j], &b[j], testLength - j));
+ EXPECT_EQUAL (sum, hwComputedSum);
+ }
+}
+
+TEST("test euclidian distance") {
+ hwaccelrated::GenericAccelrator genericAccelrator;
+ verifyEuclidianDistance<float>(genericAccelrator);
+ verifyEuclidianDistance<double >(genericAccelrator);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt b/vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
index f6b38028471..ac9d8d76074 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/CMakeLists.txt
@@ -3,8 +3,6 @@ vespa_add_library(vespalib_vespalib_hwaccelrated OBJECT
SOURCES
iaccelrated.cpp
generic.cpp
- sse2.cpp
- avx.cpp
avx2.cpp
avx512.cpp
DEPENDS
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
deleted file mode 100644
index ec6dc164323..00000000000
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "avx.h"
-#include "avxprivate.hpp"
-
-namespace vespalib::hwaccelrated {
-
-size_t
-AvxAccelrator::populationCount(const uint64_t *a, size_t sz) const {
- return helper::populationCount(a, sz);
-}
-
-}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx.h
deleted file mode 100644
index e7f090b4695..00000000000
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "sse2.h"
-
-namespace vespalib::hwaccelrated {
-
-/**
- * Avx-256 implementation.
- */
-class AvxAccelrator : public Sse2Accelrator
-{
-public:
- size_t populationCount(const uint64_t *a, size_t sz) const override;
-};
-
-}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
index f0d03a995e4..cd80261fc76 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
@@ -10,4 +10,14 @@ Avx2Accelrator::populationCount(const uint64_t *a, size_t sz) const {
return helper::populationCount(a, sz);
}
+double
+Avx2Accelrator::squaredEuclidianDistance(const float * a, const float * b, size_t sz) const {
+ return helper::euclidianDistanceT<float, 32>(a, b, sz);
+}
+
+double
+Avx2Accelrator::squaredEuclidianDistance(const double * a, const double * b, size_t sz) const {
+ return helper::euclidianDistanceT<double, 8>(a, b, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
index 7e1784698f1..68c0bb5695c 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
@@ -2,17 +2,19 @@
#pragma once
-#include "avx.h"
+#include "generic.h"
namespace vespalib::hwaccelrated {
/**
* Avx-512 implementation.
*/
-class Avx2Accelrator : public AvxAccelrator
+class Avx2Accelrator : public GenericAccelrator
{
public:
size_t populationCount(const uint64_t *a, size_t sz) const override;
+ double squaredEuclidianDistance(const float * a, const float * b, size_t sz) const override;
+ double squaredEuclidianDistance(const double * a, const double * b, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
index 1abf6b270cf..747119d7a8e 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
@@ -22,4 +22,14 @@ Avx512Accelrator::populationCount(const uint64_t *a, size_t sz) const {
return helper::populationCount(a, sz);
}
+double
+Avx512Accelrator::squaredEuclidianDistance(const float * a, const float * b, size_t sz) const {
+ return helper::euclidianDistanceT<float, 64>(a, b, sz);
+}
+
+double
+Avx512Accelrator::squaredEuclidianDistance(const double * a, const double * b, size_t sz) const {
+ return helper::euclidianDistanceT<double, 32>(a, b, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
index eac8c96832b..78712811b8b 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
@@ -15,6 +15,8 @@ public:
float dotProduct(const float * a, const float * b, size_t sz) const override;
double dotProduct(const double * a, const double * b, size_t sz) const override;
size_t populationCount(const uint64_t *a, size_t sz) const override;
+ double squaredEuclidianDistance(const float * a, const float * b, size_t sz) const override;
+ double squaredEuclidianDistance(const double * a, const double * b, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
index 4e02ca371f0..7018f37d49a 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
@@ -131,4 +131,14 @@ GenericAccelrator::populationCount(const uint64_t *a, size_t sz) const {
return helper::populationCount(a, sz);
}
+double
+GenericAccelrator::squaredEuclidianDistance(const float * a, const float * b, size_t sz) const {
+ return helper::euclidianDistanceT<float, 8>(a, b, sz);
+}
+
+double
+GenericAccelrator::squaredEuclidianDistance(const double * a, const double * b, size_t sz) const {
+ return helper::euclidianDistanceT<double, 4>(a, b, sz);
+}
+
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
index d76d0728bdd..224394a9595 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
@@ -23,6 +23,8 @@ public:
void andNotBit(void * a, const void * b, size_t bytes) const override;
void notBit(void * a, size_t bytes) const override;
size_t populationCount(const uint64_t *a, size_t sz) const override;
+ double squaredEuclidianDistance(const float * a, const float * b, size_t sz) const override;
+ double squaredEuclidianDistance(const double * a, const double * b, size_t sz) const override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
index 061963f95a4..b298ffdc4af 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
@@ -2,12 +2,11 @@
#include "iaccelrated.h"
#include "generic.h"
-#include "sse2.h"
-#include "avx.h"
#include "avx2.h"
#include "avx512.h"
#include <vespa/vespalib/util/memory.h>
#include <cstdio>
+#include <vector>
#include <vespa/log/log.h>
LOG_SETUP(".vespalib.hwaccelrated");
@@ -18,7 +17,7 @@ namespace {
class Factory {
public:
- virtual ~Factory() { }
+ virtual ~Factory() = default;
virtual IAccelrated::UP create() const = 0;
};
@@ -27,16 +26,6 @@ public:
IAccelrated::UP create() const override { return std::make_unique<GenericAccelrator>(); }
};
-class Sse2Factory :public Factory{
-public:
- IAccelrated::UP create() const override { return std::make_unique<Sse2Accelrator>(); }
-};
-
-class AvxFactory :public Factory{
-public:
- IAccelrated::UP create() const override { return std::make_unique<AvxAccelrator>(); }
-};
-
class Avx2Factory :public Factory{
public:
IAccelrated::UP create() const override { return std::make_unique<Avx2Accelrator>(); }
@@ -48,15 +37,23 @@ public:
};
template<typename T>
-void verifyAccelrator(const IAccelrated & accel)
+std::vector<T> createAndFill(size_t sz) {
+ std::vector<T> v(sz);
+ for (size_t i(0); i < sz; i++) {
+ v[i] = i;
+ }
+ return v;
+}
+
+template<typename T>
+void verifyDotproduct(const IAccelrated & accel)
{
- const size_t testLength(127);
- T * a = new T[testLength];
- T * b = new T[testLength];
+ const size_t testLength(255);
+ std::vector<T> a = createAndFill<T>(testLength);
+ std::vector<T> b = createAndFill<T>(testLength);
for (size_t j(0); j < 0x20; j++) {
T sum(0);
for (size_t i(j); i < testLength; i++) {
- a[i] = b[i] = i;
sum += i*i;
}
T hwComputedSum(accel.dotProduct(&a[j], &b[j], testLength - j));
@@ -65,8 +62,24 @@ void verifyAccelrator(const IAccelrated & accel)
LOG_ABORT("should not be reached");
}
}
- delete [] a;
- delete [] b;
+}
+
+template<typename T>
+void verifyEuclidianDistance(const IAccelrated & accel) {
+ const size_t testLength(255);
+ std::vector<T> a = createAndFill<T>(testLength);
+ std::vector<T> b = createAndFill<T>(testLength);
+ for (size_t j(0); j < 0x20; j++) {
+ T sum(0);
+ for (size_t i(j); i < testLength; i++) {
+ sum += (a[i] - b[i]) * (a[i] - b[i]);
+ }
+ T hwComputedSum(accel.squaredEuclidianDistance(&a[j], &b[j], testLength - j));
+ if (sum != hwComputedSum) {
+ fprintf(stderr, "Accelrator is not computing euclidian distance correctly.\n");
+ LOG_ABORT("should not be reached");
+ }
+ }
}
void verifyPopulationCount(const IAccelrated & accel)
@@ -90,23 +103,25 @@ class RuntimeVerificator
{
public:
RuntimeVerificator();
+private:
+ void verify(IAccelrated & accelrated) {
+ verifyDotproduct<float>(accelrated);
+ verifyDotproduct<double>(accelrated);
+ verifyDotproduct<int32_t>(accelrated);
+ verifyDotproduct<int64_t>(accelrated);
+ verifyEuclidianDistance<float>(accelrated);
+ verifyEuclidianDistance<double>(accelrated);
+ verifyPopulationCount(accelrated);
+ }
};
RuntimeVerificator::RuntimeVerificator()
{
- GenericAccelrator generic;
- verifyAccelrator<float>(generic);
- verifyAccelrator<double>(generic);
- verifyAccelrator<int32_t>(generic);
- verifyAccelrator<int64_t>(generic);
- verifyPopulationCount(generic);
-
- IAccelrated::UP thisCpu(IAccelrated::getAccelrator());
- verifyAccelrator<float>(*thisCpu);
- verifyAccelrator<double>(*thisCpu);
- verifyAccelrator<int32_t>(*thisCpu);
- verifyAccelrator<int64_t>(*thisCpu);
-
+ GenericAccelrator generic;
+ verify(generic);
+
+ IAccelrated::UP thisCpu(IAccelrated::getAccelrator());
+ verify(*thisCpu);
}
class Selector
@@ -119,17 +134,15 @@ private:
};
Selector::Selector() :
- _factory(new GenericFactory())
+ _factory()
{
__builtin_cpu_init ();
if (__builtin_cpu_supports("avx512f")) {
- _factory.reset(new Avx512Factory());
+ _factory = std::make_unique<Avx512Factory>();
} else if (__builtin_cpu_supports("avx2")) {
- _factory.reset(new Avx2Factory());
- } else if (__builtin_cpu_supports("avx")) {
- _factory.reset(new AvxFactory());
- } else if (__builtin_cpu_supports("sse2")) {
- _factory.reset(new Sse2Factory());
+ _factory = std::make_unique<Avx2Factory>();
+ } else {
+ _factory = std::make_unique<GenericFactory>();
}
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
index 4031169c44d..61739e1837a 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
@@ -27,6 +27,8 @@ public:
virtual void andNotBit(void * a, const void * b, size_t bytes) const = 0;
virtual void notBit(void * a, size_t bytes) const = 0;
virtual size_t populationCount(const uint64_t *a, size_t sz) const = 0;
+ virtual double squaredEuclidianDistance(const float * a, const float * b, size_t sz) const = 0;
+ virtual double squaredEuclidianDistance(const double * a, const double * b, size_t sz) const = 0;
static IAccelrated::UP getAccelrator() __attribute__((noinline));
};
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
index 8eba313d5f1..6a70e14c290 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
@@ -23,5 +23,29 @@ populationCount(const uint64_t *a, size_t sz) {
return count;
}
+template <typename T, size_t UNROLL>
+double
+euclidianDistanceT(const T * a, const T * b, size_t sz)
+{
+ T partial[UNROLL];
+ for (size_t i(0); i < UNROLL; i++) {
+ partial[i] = 0;
+ }
+ size_t i(0);
+ for (; i + UNROLL <= sz; i += UNROLL) {
+ for (size_t j(0); j < UNROLL; j++) {
+ partial[j] += (a[i+j] - b[i+j]) * (a[i+j] - b[i+j]);
+ }
+ }
+ for (;i < sz; i++) {
+ partial[i%UNROLL] += (a[i] - b[i]) * (a[i] - b[i]);
+ }
+ double sum(0);
+ for (size_t j(0); j < UNROLL; j++) {
+ sum += partial[j];
+ }
+ return sum;
+}
+
+}
}
-} \ No newline at end of file
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
deleted file mode 100644
index 64a26d49f2b..00000000000
--- a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include "sse2.h"
-#include "private_helpers.hpp"
-
-namespace vespalib::hwaccelrated {
-
-size_t
-Sse2Accelrator::populationCount(const uint64_t *a, size_t sz) const {
- return helper::populationCount(a, sz);
-}
-
-}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h b/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
deleted file mode 100644
index 0b2462423b1..00000000000
--- a/vespalib/src/vespa/vespalib/hwaccelrated/sse2.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "generic.h"
-
-namespace vespalib::hwaccelrated {
-
-/**
- * Generic cpu agnostic implementation.
- */
-class Sse2Accelrator : public GenericAccelrator
-{
-public:
- size_t populationCount(const uint64_t *a, size_t sz) const override;
-};
-
-}