diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-15 14:54:17 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-15 14:54:17 +0100 |
commit | b8ba7d82cd08462a9a48b5acbd03c6869be9a9a3 (patch) | |
tree | 6a2eb867fc201e1505beac128befac70d61707ff /vespalib/src | |
parent | 1bbf3077f25dc7c20e3d2d5c1e47f035e1ec94b4 (diff) | |
parent | a5ed125d867cd13237191c20f7b84f24749e912e (diff) |
Merge pull request #29663 from vespa-engine/balder/separate-hot-cold-path-tomake-fast-path-faster
Balder/separate hot cold path tomake fast path faster
Diffstat (limited to 'vespalib/src')
9 files changed, 32 insertions, 32 deletions
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp index bbba4109fc2..c6a9cc7ae9e 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp @@ -26,13 +26,13 @@ Avx2Accelrator::squaredEuclideanDistance(const double * a, const double * b, siz } void -Avx2Accelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { - helper::andChunks<32u, 2u>(offset, src, dest); +Avx2Accelrator::and256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { + helper::andChunks<32u, 8u>(offset, src, dest); } void -Avx2Accelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { - helper::orChunks<32u, 2u>(offset, src, dest); +Avx2Accelrator::or256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { + helper::orChunks<32u, 8u>(offset, src, dest); } } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h index 934d815d67b..61ca1573601 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h @@ -16,8 +16,8 @@ public: double squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const noexcept override; double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const noexcept override; double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const noexcept override; - void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; - void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; + void and256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; + void or256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; }; } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp index 035f33cb25e..5b3a7deb564 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp @@ -36,13 +36,13 @@ Avx512Accelrator::squaredEuclideanDistance(const double * a, const double * b, s } void -Avx512Accelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { - helper::andChunks<64, 1>(offset, src, dest); +Avx512Accelrator::and256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { + helper::andChunks<64, 4>(offset, src, dest); } void -Avx512Accelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { - helper::orChunks<64, 1>(offset, src, dest); +Avx512Accelrator::or256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { + helper::orChunks<64, 4>(offset, src, dest); } } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h index 38eab0a2549..fbfdd021619 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h @@ -18,8 +18,8 @@ public: double squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const noexcept override; double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const noexcept override; double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const noexcept override; - void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; - void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; + void and256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; + void or256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; }; } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp index a8e5535cc21..b6b8436a389 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp @@ -173,13 +173,13 @@ GenericAccelrator::squaredEuclideanDistance(const double * a, const double * b, } void -GenericAccelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { - helper::andChunks<16, 4>(offset, src, dest); +GenericAccelrator::and256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { + helper::andChunks<16, 16>(offset, src, dest); } void -GenericAccelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { - helper::orChunks<16,4>(offset, src, dest); +GenericAccelrator::or256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept { + helper::orChunks<16, 16>(offset, src, dest); } } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h index 16c8bab71da..5cbabc3de53 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h @@ -26,8 +26,8 @@ public: double squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const noexcept override; double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const noexcept override; double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const noexcept override; - void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; - void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; + void and256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; + void or256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override; }; } diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp index d707553b504..77d168a2c5d 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp @@ -153,8 +153,8 @@ verifyOr64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>> & simpleOrWith(expected, optionallyInvert(vRefs[j].second, vectors[j])); } - uint64_t dest[8] __attribute((aligned(64))); - accel.or64(offset*sizeof(uint64_t), vRefs, dest); + uint64_t dest[32] __attribute((aligned(64))); + accel.or256(offset * sizeof(uint64_t), vRefs, dest); int diff = memcmp(&expected[offset], dest, sizeof(dest)); if (diff != 0) { LOG_ABORT("Accelerator fails to compute correct 64 bytes OR"); @@ -174,8 +174,8 @@ verifyAnd64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>> simpleAndWith(expected, optionallyInvert(vRefs[j].second, vectors[j])); } - uint64_t dest[8] __attribute((aligned(64))); - accel.and64(offset*sizeof(uint64_t), vRefs, dest); + uint64_t dest[32] __attribute((aligned(64))); + accel.and256(offset * sizeof(uint64_t), vRefs, dest); int diff = memcmp(&expected[offset], dest, sizeof(dest)); if (diff != 0) { LOG_ABORT("Accelerator fails to compute correct 64 bytes AND"); @@ -186,9 +186,9 @@ void verifyOr64(const IAccelrated & accel) { std::vector<std::vector<uint64_t>> vectors(3) ; for (auto & v : vectors) { - fill(v, 16); + fill(v, 64); } - for (size_t offset = 0; offset < 8; offset++) { + for (size_t offset = 0; offset < 32; offset++) { for (size_t i = 1; i < vectors.size(); i++) { verifyOr64(accel, vectors, offset, i, false); verifyOr64(accel, vectors, offset, i, true); @@ -200,9 +200,9 @@ void verifyAnd64(const IAccelrated & accel) { std::vector<std::vector<uint64_t>> vectors(3); for (auto & v : vectors) { - fill(v, 16); + fill(v, 64); } - for (size_t offset = 0; offset < 8; offset++) { + for (size_t offset = 0; offset < 32; offset++) { for (size_t i = 1; i < vectors.size(); i++) { verifyAnd64(accel, vectors, offset, i, false); verifyAnd64(accel, vectors, offset, i, true); diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h index 806e77caced..e6bd86957db 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h +++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h @@ -31,10 +31,10 @@ public: virtual double squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const noexcept = 0; virtual double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const noexcept = 0; virtual double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const noexcept = 0; - // AND 64 bytes from multiple, optionally inverted sources - virtual void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept = 0; - // OR 64 bytes from multiple, optionally inverted sources - virtual void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept = 0; + // AND 256 bytes from multiple, optionally inverted sources + virtual void and256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept = 0; + // OR 256 bytes from multiple, optionally inverted sources + virtual void or256(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept = 0; static const IAccelrated & getAccelerator() __attribute__((noinline)); }; diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp index c884f0d7bb9..3185d6e77cd 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp @@ -43,7 +43,7 @@ void andChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src, void * dest) { typedef uint64_t Chunk __attribute__ ((vector_size (ChunkSize))); static_assert(sizeof(Chunk) == ChunkSize, "sizeof(Chunk) == ChunkSize"); - static_assert(ChunkSize*Chunks == 64, "ChunkSize*Chunks == 64"); + static_assert(ChunkSize*Chunks == 256, "ChunkSize*Chunks == 256"); Chunk * chunk = static_cast<Chunk *>(dest); const Chunk * tmp = cast<Chunk, ChunkSize>(src[0].first, offset); for (size_t n=0; n < Chunks; n++) { @@ -62,7 +62,7 @@ void orChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src, void * dest) { typedef uint64_t Chunk __attribute__ ((vector_size (ChunkSize))); static_assert(sizeof(Chunk) == ChunkSize, "sizeof(Chunk) == ChunkSize"); - static_assert(ChunkSize*Chunks == 64, "ChunkSize*Chunks == 64"); + static_assert(ChunkSize*Chunks == 256, "ChunkSize*Chunks == 256"); Chunk * chunk = static_cast<Chunk *>(dest); const Chunk * tmp = cast<Chunk, ChunkSize>(src[0].first, offset); for (size_t n=0; n < Chunks; n++) { |