summaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-12-15 16:46:06 +0100
committerHenning Baldersheim <balder@yahoo-inc.com>2023-12-19 14:52:20 +0000
commit10a8162be6cae6a30be24ac7c8ca2e5adc62f4f7 (patch)
treee6226915ae8ec752283034c9443ca53475ad4ef0 /vespalib
parentbfc58b6dfd268d9abd1bd635a9ec3c4cc0009035 (diff)
Precompute 1024 bits, 128 bytes, 2 cachelines for intel, and 1 for arm64.
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp8
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx2.h4
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp8
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/avx512.h4
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp8
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/generic.h4
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp20
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h8
-rw-r--r--vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp4
9 files changed, 34 insertions, 34 deletions
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
index bbba4109fc2..66441b3c08b 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.cpp
@@ -26,13 +26,13 @@ Avx2Accelrator::squaredEuclideanDistance(const double * a, const double * b, siz
}
void
-Avx2Accelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
- helper::andChunks<32u, 2u>(offset, src, dest);
+Avx2Accelrator::and128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
+ helper::andChunks<32u, 4u>(offset, src, dest);
}
void
-Avx2Accelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
- helper::orChunks<32u, 2u>(offset, src, dest);
+Avx2Accelrator::or128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
+ helper::orChunks<32u, 4u>(offset, src, dest);
}
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
index 934d815d67b..af46035666c 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx2.h
@@ -16,8 +16,8 @@ public:
double squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const noexcept override;
double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const noexcept override;
double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const noexcept override;
- void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
- void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
+ void and128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
+ void or128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
index 035f33cb25e..5f408c05fef 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.cpp
@@ -36,13 +36,13 @@ Avx512Accelrator::squaredEuclideanDistance(const double * a, const double * b, s
}
void
-Avx512Accelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
- helper::andChunks<64, 1>(offset, src, dest);
+Avx512Accelrator::and128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
+ helper::andChunks<64, 2>(offset, src, dest);
}
void
-Avx512Accelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
- helper::orChunks<64, 1>(offset, src, dest);
+Avx512Accelrator::or128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
+ helper::orChunks<64, 2>(offset, src, dest);
}
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
index 38eab0a2549..a86a2787d5a 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/avx512.h
@@ -18,8 +18,8 @@ public:
double squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const noexcept override;
double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const noexcept override;
double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const noexcept override;
- void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
- void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
+ void and128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
+ void or128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
index a8e5535cc21..f0112aaddf7 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp
@@ -173,13 +173,13 @@ GenericAccelrator::squaredEuclideanDistance(const double * a, const double * b,
}
void
-GenericAccelrator::and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
- helper::andChunks<16, 4>(offset, src, dest);
+GenericAccelrator::and128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
+ helper::andChunks<16, 8>(offset, src, dest);
}
void
-GenericAccelrator::or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
- helper::orChunks<16,4>(offset, src, dest);
+GenericAccelrator::or128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept {
+ helper::orChunks<16, 8>(offset, src, dest);
}
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
index 16c8bab71da..ba986656635 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.h
@@ -26,8 +26,8 @@ public:
double squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const noexcept override;
double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const noexcept override;
double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const noexcept override;
- void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
- void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
+ void and128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
+ void or128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept override;
};
}
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
index d707553b504..a02e9545765 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.cpp
@@ -153,11 +153,11 @@ verifyOr64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>> &
simpleOrWith(expected, optionallyInvert(vRefs[j].second, vectors[j]));
}
- uint64_t dest[8] __attribute((aligned(64)));
- accel.or64(offset*sizeof(uint64_t), vRefs, dest);
+ uint64_t dest[16] __attribute((aligned(64)));
+ accel.or128(offset * sizeof(uint64_t), vRefs, dest);
int diff = memcmp(&expected[offset], dest, sizeof(dest));
if (diff != 0) {
- LOG_ABORT("Accelerator fails to compute correct 64 bytes OR");
+ LOG_ABORT("Accelerator fails to compute correct 128 bytes OR");
}
}
@@ -174,11 +174,11 @@ verifyAnd64(const IAccelrated & accel, const std::vector<std::vector<uint64_t>>
simpleAndWith(expected, optionallyInvert(vRefs[j].second, vectors[j]));
}
- uint64_t dest[8] __attribute((aligned(64)));
- accel.and64(offset*sizeof(uint64_t), vRefs, dest);
+ uint64_t dest[16] __attribute((aligned(64)));
+ accel.and128(offset * sizeof(uint64_t), vRefs, dest);
int diff = memcmp(&expected[offset], dest, sizeof(dest));
if (diff != 0) {
- LOG_ABORT("Accelerator fails to compute correct 64 bytes AND");
+ LOG_ABORT("Accelerator fails to compute correct 128 bytes AND");
}
}
@@ -186,9 +186,9 @@ void
verifyOr64(const IAccelrated & accel) {
std::vector<std::vector<uint64_t>> vectors(3) ;
for (auto & v : vectors) {
- fill(v, 16);
+ fill(v, 32);
}
- for (size_t offset = 0; offset < 8; offset++) {
+ for (size_t offset = 0; offset < 16; offset++) {
for (size_t i = 1; i < vectors.size(); i++) {
verifyOr64(accel, vectors, offset, i, false);
verifyOr64(accel, vectors, offset, i, true);
@@ -200,9 +200,9 @@ void
verifyAnd64(const IAccelrated & accel) {
std::vector<std::vector<uint64_t>> vectors(3);
for (auto & v : vectors) {
- fill(v, 16);
+ fill(v, 32);
}
- for (size_t offset = 0; offset < 8; offset++) {
+ for (size_t offset = 0; offset < 16; offset++) {
for (size_t i = 1; i < vectors.size(); i++) {
verifyAnd64(accel, vectors, offset, i, false);
verifyAnd64(accel, vectors, offset, i, true);
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
index 806e77caced..f070f206b7e 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/iaccelrated.h
@@ -31,10 +31,10 @@ public:
virtual double squaredEuclideanDistance(const int8_t * a, const int8_t * b, size_t sz) const noexcept = 0;
virtual double squaredEuclideanDistance(const float * a, const float * b, size_t sz) const noexcept = 0;
virtual double squaredEuclideanDistance(const double * a, const double * b, size_t sz) const noexcept = 0;
- // AND 64 bytes from multiple, optionally inverted sources
- virtual void and64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept = 0;
- // OR 64 bytes from multiple, optionally inverted sources
- virtual void or64(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept = 0;
+ // AND 128 bytes from multiple, optionally inverted sources
+ virtual void and128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept = 0;
+ // OR 128 bytes from multiple, optionally inverted sources
+ virtual void or128(size_t offset, const std::vector<std::pair<const void *, bool>> &src, void *dest) const noexcept = 0;
static const IAccelrated & getAccelerator() __attribute__((noinline));
};
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
index c884f0d7bb9..6731b449462 100644
--- a/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
+++ b/vespalib/src/vespa/vespalib/hwaccelrated/private_helpers.hpp
@@ -43,7 +43,7 @@ void
andChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src, void * dest) {
typedef uint64_t Chunk __attribute__ ((vector_size (ChunkSize)));
static_assert(sizeof(Chunk) == ChunkSize, "sizeof(Chunk) == ChunkSize");
- static_assert(ChunkSize*Chunks == 64, "ChunkSize*Chunks == 64");
+ static_assert(ChunkSize*Chunks == 128, "ChunkSize*Chunks == 128");
Chunk * chunk = static_cast<Chunk *>(dest);
const Chunk * tmp = cast<Chunk, ChunkSize>(src[0].first, offset);
for (size_t n=0; n < Chunks; n++) {
@@ -62,7 +62,7 @@ void
orChunks(size_t offset, const std::vector<std::pair<const void *, bool>> & src, void * dest) {
typedef uint64_t Chunk __attribute__ ((vector_size (ChunkSize)));
static_assert(sizeof(Chunk) == ChunkSize, "sizeof(Chunk) == ChunkSize");
- static_assert(ChunkSize*Chunks == 64, "ChunkSize*Chunks == 64");
+ static_assert(ChunkSize*Chunks == 128, "ChunkSize*Chunks == 128");
Chunk * chunk = static_cast<Chunk *>(dest);
const Chunk * tmp = cast<Chunk, ChunkSize>(src[0].first, offset);
for (size_t n=0; n < Chunks; n++) {