summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-11-26 12:11:06 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-11-26 13:14:33 +0000
commitef0b1301517fd689f617e0cafb4afe283b6483fc (patch)
treecc7d586b29136bb18ab6c9d39bac500557e4835e /vespalib
parentb54f9353181518054a1aaafc294df03ee15d58de (diff)
- Extend test to cover iterations in outer loop.
- Rename for clarity. - Fix logic bug checking for fallback.
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp15
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp2
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp14
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp10
5 files changed, 23 insertions, 20 deletions
diff --git a/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp b/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp
index 45d4f09e720..bbe0ff6663a 100644
--- a/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp
+++ b/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp
@@ -18,7 +18,7 @@ std::vector<T> createAndFill(size_t sz) {
}
template<typename T, typename P>
-void verifyEuclideanDistance(const hwaccelrated::IAccelrated & accel, size_t testLength) {
+void verifyEuclideanDistance(const hwaccelrated::IAccelrated & accel, size_t testLength, double approxFactor) {
srand(1);
std::vector<T> a = createAndFill<T>(testLength);
std::vector<T> b = createAndFill<T>(testLength);
@@ -29,21 +29,22 @@ void verifyEuclideanDistance(const hwaccelrated::IAccelrated & accel, size_t tes
sum += d * d;
}
P hwComputedSum(accel.squaredEuclideanDistance(&a[j], &b[j], testLength - j));
- EXPECT_EQUAL(sum, hwComputedSum);
+ EXPECT_APPROX(sum, hwComputedSum, sum*approxFactor);
}
}
void
verifyEuclideanDistance(const hwaccelrated::IAccelrated & accelrator, size_t testLength) {
- verifyEuclideanDistance<int8_t, float>(accelrator, testLength);
- verifyEuclideanDistance<float, float>(accelrator, testLength);
- verifyEuclideanDistance<double, float>(accelrator, testLength);
+ verifyEuclideanDistance<int8_t, double>(accelrator, testLength, 0.0);
+ verifyEuclideanDistance<float, double>(accelrator, testLength, 0.0001); // Small deviation requiring EXPECT_APPROX
+ verifyEuclideanDistance<double, double>(accelrator, testLength, 0.0);
}
TEST("test euclidean distance") {
hwaccelrated::GenericAccelrator genericAccelrator;
- verifyEuclideanDistance(hwaccelrated::GenericAccelrator(),255);
- verifyEuclideanDistance(hwaccelrated::IAccelrated::getAccelerator(),255);
+ constexpr size_t TEST_LENGTH = 140000; // must be longer than 64k
+ TEST_DO(verifyEuclideanDistance(hwaccelrated::GenericAccelrator(), TEST_LENGTH));
+ TEST_DO(verifyEuclideanDistance(hwaccelrated::IAccelrated::getAccelerator(), TEST_LENGTH));
}
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
index 07ef897ca11..590223ed13a 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
@@ -12,7 +12,7 @@ Avx2Accelrator::populationCount(const uint64_t *a, size_t sz) const {
double
Avx2Accelrator::squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const {
- return helper::euclideanDistance(a, b, sz);
+ return helper::squaredEuclideanDistance(a, b, sz);
}
double
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
index 491fe04752d..5878165bb6d 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
@@ -24,7 +24,7 @@ Avx512Accelrator::populationCount(const uint64_t *a, size_t sz) const {
double
Avx512Accelrator::squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const {
- return helper::euclideanDistance(a, b, sz);
+ return helper::squaredEuclideanDistance(a, b, sz);
}
double
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
index a715d1b5758..13946fa3398 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
@@ -34,7 +34,7 @@ multiplyAdd(const T * a, const T * b, size_t sz)
template <typename T, size_t UNROLL>
double
-euclideanDistanceT(const T * a, const T * b, size_t sz)
+squaredEuclideanDistanceT(const T * a, const T * b, size_t sz)
{
T partial[UNROLL];
for (size_t i(0); i < UNROLL; i++) {
@@ -43,11 +43,13 @@ euclideanDistanceT(const T * a, const T * b, size_t sz)
size_t i(0);
for (; i + UNROLL <= sz; i += UNROLL) {
for (size_t j(0); j < UNROLL; j++) {
- partial[j] += (a[i+j] - b[i+j]) * (a[i+j] - b[i+j]);
+ T d = a[i+j] - b[i+j];
+ partial[j] += d * d;
}
}
for (;i < sz; i++) {
- partial[i%UNROLL] += (a[i] - b[i]) * (a[i] - b[i]);
+ T d = a[i] - b[i];
+ partial[i%UNROLL] += d * d;
}
double sum(0);
for (size_t j(0); j < UNROLL; j++) {
@@ -157,17 +159,17 @@ GenericAccelrator::populationCount(const uint64_t *a, size_t sz) const {
double
GenericAccelrator::squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const {
- return helper::euclideanDistance(a, b, sz);
+ return helper::squaredEuclideanDistance(a, b, sz);
}
double
GenericAccelrator::squaredEuclideanDistance(const float * a, const float * b, size_t sz) const {
- return euclideanDistanceT<float, 2>(a, b, sz);
+ return squaredEuclideanDistanceT<float, 2>(a, b, sz);
}
double
GenericAccelrator::squaredEuclideanDistance(const double * a, const double * b, size_t sz) const {
- return euclideanDistanceT<double, 2>(a, b, sz);
+ return squaredEuclideanDistanceT<double, 2>(a, b, sz);
}
void
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
index ffa1c2e5c49..3b063ce6805 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
@@ -75,9 +75,9 @@ orChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src,
}
template<typename TemporaryT=int32_t>
-double euclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz) __attribute__((noinline));
+double squaredEuclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz) __attribute__((noinline));
template<typename TemporaryT>
-double euclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz)
+double squaredEuclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz)
{
//Note that this is 3 times faster with int32_t than with int64_t and 16x faster than float
TemporaryT sum = 0;
@@ -89,14 +89,14 @@ double euclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz)
}
inline double
-euclideanDistance(const int8_t * a, const int8_t * b, size_t sz) {
+squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) {
constexpr size_t LOOP_COUNT = 0x10000;
double sum(0);
size_t i=0;
for (; i + LOOP_COUNT <= sz; i += LOOP_COUNT) {
- sum += euclideanDistanceT<int32_t>(a+i, b+i, LOOP_COUNT);
+ sum += squaredEuclideanDistanceT<int32_t>(a + i, b + i, LOOP_COUNT);
}
- sum += euclideanDistanceT<int32_t>(a+i, b+i, sz-i);
+ sum += squaredEuclideanDistanceT<int32_t>(a + i, b + i, sz - i);
return sum;
}