diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-11-26 12:11:06 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-11-26 13:14:33 +0000 |
commit | ef0b1301517fd689f617e0cafb4afe283b6483fc (patch) | |
tree | cc7d586b29136bb18ab6c9d39bac500557e4835e | |
parent | b54f9353181518054a1aaafc294df03ee15d58de (diff) |
- Extend test to cover iterations in outer loop.
- Rename for clarity.
- Fix logic bug checking for fallback.
6 files changed, 24 insertions, 21 deletions
diff --git a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h index df6fe4a6df4..25fac9c41c4 100644 --- a/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h +++ b/searchlib/src/vespa/searchlib/tensor/euclidean_distance.h @@ -49,7 +49,7 @@ public: static const int8_t *cast(const vespalib::eval::Int8Float * p) { return reinterpret_cast<const int8_t *>(p); } double calc(const vespalib::eval::TypedCells& lhs, const vespalib::eval::TypedCells& rhs) const override { constexpr vespalib::eval::CellType expected = vespalib::eval::get_cell_type<FloatType>(); - if ((lhs.type != expected) || (rhs.type == expected)) { + if ((lhs.type != expected) || (rhs.type != expected)) { return SquaredEuclideanDistance::calc(lhs, rhs); } auto lhs_vector = lhs.typify<FloatType>(); diff --git a/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp b/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp index 45d4f09e720..bbe0ff6663a 100644 --- a/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp +++ b/vespalib/src/tests/hwaccelrated/hwaccelrated_test.cpp @@ -18,7 +18,7 @@ std::vector<T> createAndFill(size_t sz) { } template<typename T, typename P> -void verifyEuclideanDistance(const hwaccelrated::IAccelrated & accel, size_t testLength) { +void verifyEuclideanDistance(const hwaccelrated::IAccelrated & accel, size_t testLength, double approxFactor) { srand(1); std::vector<T> a = createAndFill<T>(testLength); std::vector<T> b = createAndFill<T>(testLength); @@ -29,21 +29,22 @@ void verifyEuclideanDistance(const hwaccelrated::IAccelrated & accel, size_t tes sum += d * d; } P hwComputedSum(accel.squaredEuclideanDistance(&a[j], &b[j], testLength - j)); - EXPECT_EQUAL(sum, hwComputedSum); + EXPECT_APPROX(sum, hwComputedSum, sum*approxFactor); } } void verifyEuclideanDistance(const hwaccelrated::IAccelrated & accelrator, size_t testLength) { - verifyEuclideanDistance<int8_t, float>(accelrator, testLength); - verifyEuclideanDistance<float, float>(accelrator, testLength); - verifyEuclideanDistance<double, float>(accelrator, testLength); + verifyEuclideanDistance<int8_t, double>(accelrator, testLength, 0.0); + verifyEuclideanDistance<float, double>(accelrator, testLength, 0.0001); // Small deviation requiring EXPECT_APPROX + verifyEuclideanDistance<double, double>(accelrator, testLength, 0.0); } TEST("test euclidean distance") { hwaccelrated::GenericAccelrator genericAccelrator; - verifyEuclideanDistance(hwaccelrated::GenericAccelrator(),255); - verifyEuclideanDistance(hwaccelrated::IAccelrated::getAccelerator(),255); + constexpr size_t TEST_LENGTH = 140000; // must be longer than 64k + TEST_DO(verifyEuclideanDistance(hwaccelrated::GenericAccelrator(), TEST_LENGTH)); + TEST_DO(verifyEuclideanDistance(hwaccelrated::IAccelrated::getAccelerator(), TEST_LENGTH)); } TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp index 07ef897ca11..590223ed13a 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp @@ -12,7 +12,7 @@ Avx2Accelrator::populationCount(const uint64_t *a, size_t sz) const { double Avx2Accelrator::squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const { - return helper::euclideanDistance(a, b, sz); + return helper::squaredEuclideanDistance(a, b, sz); } double diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp index 491fe04752d..5878165bb6d 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp @@ -24,7 +24,7 @@ Avx512Accelrator::populationCount(const uint64_t *a, size_t sz) const { double Avx512Accelrator::squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const { - return helper::euclideanDistance(a, b, sz); + return helper::squaredEuclideanDistance(a, b, sz); } double diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp index a715d1b5758..13946fa3398 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp @@ -34,7 +34,7 @@ multiplyAdd(const T * a, const T * b, size_t sz) template <typename T, size_t UNROLL> double -euclideanDistanceT(const T * a, const T * b, size_t sz) +squaredEuclideanDistanceT(const T * a, const T * b, size_t sz) { T partial[UNROLL]; for (size_t i(0); i < UNROLL; i++) { @@ -43,11 +43,13 @@ euclideanDistanceT(const T * a, const T * b, size_t sz) size_t i(0); for (; i + UNROLL <= sz; i += UNROLL) { for (size_t j(0); j < UNROLL; j++) { - partial[j] += (a[i+j] - b[i+j]) * (a[i+j] - b[i+j]); + T d = a[i+j] - b[i+j]; + partial[j] += d * d; } } for (;i < sz; i++) { - partial[i%UNROLL] += (a[i] - b[i]) * (a[i] - b[i]); + T d = a[i] - b[i]; + partial[i%UNROLL] += d * d; } double sum(0); for (size_t j(0); j < UNROLL; j++) { @@ -157,17 +159,17 @@ GenericAccelrator::populationCount(const uint64_t *a, size_t sz) const { double GenericAccelrator::squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const { - return helper::euclideanDistance(a, b, sz); + return helper::squaredEuclideanDistance(a, b, sz); } double GenericAccelrator::squaredEuclideanDistance(const float * a, const float * b, size_t sz) const { - return euclideanDistanceT<float, 2>(a, b, sz); + return squaredEuclideanDistanceT<float, 2>(a, b, sz); } double GenericAccelrator::squaredEuclideanDistance(const double * a, const double * b, size_t sz) const { - return euclideanDistanceT<double, 2>(a, b, sz); + return squaredEuclideanDistanceT<double, 2>(a, b, sz); } void diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp index ffa1c2e5c49..3b063ce6805 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp @@ -75,9 +75,9 @@ orChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src, } template<typename TemporaryT=int32_t> -double euclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz) __attribute__((noinline)); +double squaredEuclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz) __attribute__((noinline)); template<typename TemporaryT> -double euclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz) +double squaredEuclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz) { //Note that this is 3 times faster with int32_t than with int64_t and 16x faster than float TemporaryT sum = 0; @@ -89,14 +89,14 @@ double euclideanDistanceT(const int8_t * a, const int8_t * b, size_t sz) } inline double -euclideanDistance(const int8_t * a, const int8_t * b, size_t sz) { +squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) { constexpr size_t LOOP_COUNT = 0x10000; double sum(0); size_t i=0; for (; i + LOOP_COUNT <= sz; i += LOOP_COUNT) { - sum += euclideanDistanceT<int32_t>(a+i, b+i, LOOP_COUNT); + sum += squaredEuclideanDistanceT<int32_t>(a + i, b + i, LOOP_COUNT); } - sum += euclideanDistanceT<int32_t>(a+i, b+i, sz-i); + sum += squaredEuclideanDistanceT<int32_t>(a + i, b + i, sz - i); return sum; } |