diff options
author | HÃ¥vard Pettersen <havardpe@gmail.com> | 2017-02-23 12:44:41 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-02-23 12:44:41 +0100 |
commit | c12d2c78f24f03b97c3b0b8c401356af1fd3c89c (patch) | |
tree | 4b49055d617e790cfdbbbf8a8820947d978d8d73 /vespalib | |
parent | 2a00e8ecc8f740345a9c765c95eaaccfd8cc39a7 (diff) | |
parent | 11fa9899e06c613e4f0f2415690437cf6060c52c (diff) |
Merge pull request #1846 from yahoo/balder/unroll-bitwise-operations
Unroll 8 elements
Diffstat (limited to 'vespalib')
-rw-r--r-- | vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp | 62 |
1 files changed, 29 insertions, 33 deletions
diff --git a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp index a624c057943..127553ffc91 100644 --- a/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp +++ b/vespalib/src/vespa/vespalib/hwaccelrated/generic.cpp @@ -32,6 +32,32 @@ multiplyAdd(const T * a, const T * b, size_t sz) return sum; } +template<size_t UNROLL, typename Operation> +void +bitOperation(Operation operation, void * aOrg, const void * bOrg, size_t bytes) { + + const size_t sz(bytes/sizeof(uint64_t)); + { + uint64_t *a(static_cast<uint64_t *>(aOrg)); + const uint64_t *b(static_cast<const uint64_t *>(bOrg)); + size_t i(0); + for (; i + UNROLL <= sz; i += UNROLL) { + for (size_t j(0); j < UNROLL; j++) { + a[i + j] = operation(a[i + j], b[i + j]); + } + } + for (; i < sz; i++) { + a[i] = operation(a[i], b[i]); + } + } + + uint8_t *a(static_cast<uint8_t *>(aOrg)); + const uint8_t *b(static_cast<const uint8_t *>(bOrg)); + for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) { + a[i] = operation(a[i], b[i]); + } +} + } float @@ -61,48 +87,18 @@ GenericAccelrator::dotProduct(const int64_t * a, const int64_t * b, size_t sz) c void GenericAccelrator::orBit(void * aOrg, const void * bOrg, size_t bytes) const { - uint64_t *a(static_cast<uint64_t *>(aOrg)); - const uint64_t *b(static_cast<const uint64_t *>(bOrg)); - const size_t sz(bytes/sizeof(uint64_t)); - for (size_t i(0); i < sz; i++) { - a[i] |= b[i]; - } - uint8_t *ac(static_cast<uint8_t *>(aOrg)); - const uint8_t *bc(static_cast<const uint8_t *>(bOrg)); - for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) { - ac[i] |= bc[i]; - } + bitOperation<8>([](uint64_t a, uint64_t b) { return a | b; }, aOrg, bOrg, bytes); } void GenericAccelrator::andBit(void * aOrg, const void * bOrg, size_t bytes) const { - uint64_t *a(static_cast<uint64_t *>(aOrg)); - const uint64_t *b(static_cast<const uint64_t *>(bOrg)); - const size_t sz(bytes/sizeof(uint64_t)); - for (size_t i(0); i < sz; i++) { - a[i] &= b[i]; - } - uint8_t *ac(static_cast<uint8_t *>(aOrg)); - const uint8_t *bc(static_cast<const uint8_t *>(bOrg)); - for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) { - ac[i] &= bc[i]; - } + bitOperation<8>([](uint64_t a, uint64_t b) { return a & b; }, aOrg, bOrg, bytes); } void GenericAccelrator::andNotBit(void * aOrg, const void * bOrg, size_t bytes) const { - uint64_t *a(static_cast<uint64_t *>(aOrg)); - const uint64_t *b(static_cast<const uint64_t *>(bOrg)); - const size_t sz(bytes/sizeof(uint64_t)); - for (size_t i(0); i < sz; i++) { - a[i] &= ~b[i]; - } - uint8_t *ac(static_cast<uint8_t *>(aOrg)); - const uint8_t *bc(static_cast<const uint8_t *>(bOrg)); - for (size_t i(sz*sizeof(uint64_t)); i < bytes; i++) { - ac[i] &= ~bc[i]; - } + bitOperation<8>([](uint64_t a, uint64_t b) { return a & ~b; }, aOrg, bOrg, bytes); } void |