Merge pull request #29712 from vespa-engine/balder/increase-chunk-size-512-to-1024-bits

Precompute 1024 bits, 128 bytes, 2 cachelines for intel, and 1 for ar…
author: Henning Baldersheim <balder@yahoo-inc.com> 2023-12-19 17:01:17 +0100
committer: GitHub <noreply@github.com> 2023-12-19 17:01:17 +0100
commit: 094d0dbd07127b6094ca8fbcc63d1ac313cafd30 (patch)
tree: 24401c20432e7bf0584b6f45cc89ea91e0006d3e /searchlib/src
parent: ba31b27efd446eef7222e22d844af7b2225c797b (diff)
parent: 10a8162be6cae6a30be24ac7c8ca2e5adc62f4f7 (diff)
3 files changed, 6 insertions, 7 deletions
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp
index b79703a8e5c..a75066a67a9 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.cpp
+++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp
@@ -39,7 +39,7 @@ BitVector::allocatePaddedAndAligned(Index start, Index end, Index capacity, cons
 {
     assert(capacity >= end);
     uint32_t words = numActiveWords(start, capacity);
-    words += (-words & 15); // Pad to 64 byte alignment
+    words += (-words & 15); // Pad to 128 byte alignment
     const size_t sz(words * sizeof(Word));
     Alloc alloc = (init_alloc != nullptr) ? init_alloc->create(sz) : Alloc::alloc(sz, MMAP_LIMIT);
     assert(alloc.size()/sizeof(Word) >= words);
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
index 66f505581c7..e90156868fb 100644
--- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
@@ -4,7 +4,6 @@
 #include "andsearch.h"
 #include "andnotsearch.h"
 #include "sourceblendersearch.h"
-#include <vespa/searchlib/common/bitvectoriterator.h>
 #include <vespa/vespalib/hwaccelrated/iaccelrated.h>
 
 namespace search::queryeval {
@@ -18,7 +17,7 @@ namespace {
 struct And {
     using Word = BitWord::Word;
     void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept {
-        accel.and64(offset, src, dest);
+        accel.and128(offset, src, dest);
     }
     static constexpr bool isAnd() noexcept { return true; }
 };
@@ -26,7 +25,7 @@ struct And {
 struct Or {
     using Word = BitWord::Word;
     void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept {
-        accel.or64(offset, src, dest);
+        accel.or128(offset, src, dest);
     }
     static constexpr bool isAnd() noexcept { return false; }
 };
@@ -56,8 +55,8 @@ MultiBitVector<Update>::MultiBitVector(size_t reserved)
       _accel(IAccelrated::getAccelerator()),
       _lastWords()
 {
-    static_assert(sizeof(_lastWords) == 64, "Lastwords should have 64 byte size");
-    static_assert(NumWordsInBatch == 8, "Batch size should be 8 words.");
+    static_assert(sizeof(_lastWords) == 128, "Lastwords should have 128 byte size");
+    static_assert(NumWordsInBatch == 16, "Batch size should be 16 words.");
     memset(_lastWords, 0, sizeof(_lastWords));
 }
 
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
index 0d9e2c4f25f..0ecf9d85b92 100644
--- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
@@ -50,7 +50,7 @@ private:
 
     Update              _update;
     const IAccelrated & _accel;
-    alignas(64) Word    _lastWords[8];
+    alignas(64) Word    _lastWords[16];
     static constexpr size_t NumWordsInBatch = sizeof(_lastWords) / sizeof(Word);
 };
author	Henning Baldersheim <balder@yahoo-inc.com>	2023-12-19 17:01:17 +0100
committer	GitHub <noreply@github.com>	2023-12-19 17:01:17 +0100
commit	094d0dbd07127b6094ca8fbcc63d1ac313cafd30 (patch)
tree	24401c20432e7bf0584b6f45cc89ea91e0006d3e /searchlib/src
parent	ba31b27efd446eef7222e22d844af7b2225c797b (diff)
parent	10a8162be6cae6a30be24ac7c8ca2e5adc62f4f7 (diff)