From 0019d49940f96eb6410b809f9f6d383636cfcf50 Mon Sep 17 00:00:00 2001
From: Håvard Pettersen <havardpe@yahooinc.com>
Date: Fri, 13 May 2022 12:36:49 +0000
Subject: use more atomic read/write for bitvectors

---
 searchlib/src/vespa/searchlib/common/bitvector.cpp | 40 ++++++++++++----------
 searchlib/src/vespa/searchlib/common/bitvector.h   | 24 ++++++-------
 searchlib/src/vespa/searchlib/common/bitword.h     |  1 +
 3 files changed, 34 insertions(+), 31 deletions(-)

(limited to 'searchlib/src')

diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp
index 2d7fda32037..03fe97cabbb 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.cpp
+++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp
@@ -102,13 +102,13 @@ BitVector::clearIntervalNoInvalidation(Range range_in)
     Index endw = wordNum(last);
 
     if (endw > startw) {
-        store_word(startw, _words[startw] & startBits(range.start()));
+        store(_words[startw], _words[startw] & startBits(range.start()));
         for (Index i = startw + 1; i < endw; ++i) {
-            store_word(i, 0);
+            store(_words[i], 0);
         }
-        store_word(endw, _words[endw] & endBits(last));
+        store(_words[endw], _words[endw] & endBits(last));
     } else {
-        store_word(startw, _words[startw] & (startBits(range.start()) | endBits(last)));
+        store(_words[startw], _words[startw] & (startBits(range.start()) | endBits(last)));
     }
 }
 
@@ -123,11 +123,13 @@ BitVector::setInterval(Index start_in, Index end_in)
     Index endw = wordNum(last);
 
     if (endw > startw) {
-        _words[startw++] |= checkTab(range.start());
-        memset(_words + startw, 0xff, sizeof(*_words)*(endw-startw));
-        _words[endw] |= ~endBits(last);
+        store(_words[startw], _words[startw] | checkTab(range.start()));
+        for (Index i = startw + 1; i < endw; ++i) {
+            store(_words[i], allBits());
+        }
+        store(_words[endw], _words[endw] | ~endBits(last));
     } else {
-        _words[startw] |= ~(startBits(range.start()) | endBits(last));
+        store(_words[startw], _words[startw] | ~(startBits(range.start()) | endBits(last)));
     }
 
     invalidateCachedCount();
@@ -152,17 +154,17 @@ BitVector::countInterval(Range range_in) const
     Word *bitValues = _words;
 
     if (startw == endw) {
-        return Optimized::popCount(bitValues[startw] & ~(startBits(range.start()) | endBits(last)));
+        return Optimized::popCount(load(bitValues[startw]) & ~(startBits(range.start()) | endBits(last)));
     }
     Index res = 0;
     // Limit to full words
     if ((range.start() & (WordLen - 1)) != 0) {
-        res += Optimized::popCount(bitValues[startw] & ~startBits(range.start()));
+        res += Optimized::popCount(load(bitValues[startw]) & ~startBits(range.start()));
         ++startw;
     }
     // Align start to 16 bytes
     while (startw < endw && (startw & 3) != 0) {
-        res += Optimized::popCount(bitValues[startw]);
+        res += Optimized::popCount(load(bitValues[startw]));
         ++startw;
     }
     bool partialEnd = (last & (WordLen - 1)) != (WordLen - 1);
@@ -173,7 +175,7 @@ BitVector::countInterval(Range range_in) const
         res += IAccelrated::getAccelerator().populationCount(bitValues + startw, endw - startw);
     }
     if (partialEnd) {
-        res += Optimized::popCount(bitValues[endw] & ~endBits(last));
+        res += Optimized::popCount(load(bitValues[endw]) & ~endBits(last));
     }
 
     return res;
@@ -191,7 +193,7 @@ BitVector::orWith(const BitVector & right)
                 IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
             }
             Index last(right.size() - 1);
-            getWordIndex(last)[0] |= (right.getWordIndex(last)[0] & ~endBits(last));
+            store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last)));
         }
     } else {
         IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
@@ -206,8 +208,8 @@ BitVector::repairEnds()
     if (size() != 0) {
         Index start(getStartIndex());
         Index last(size() - 1);
-        getWordIndex(start)[0] &= ~startBits(start);
-        getWordIndex(last)[0] &= ~endBits(last);
+        store(getWordIndex(start)[0], getWordIndex(start)[0] & ~startBits(start));
+        store(getWordIndex(last)[0], getWordIndex(last)[0] & ~endBits(last));
     }
     setGuardBit();
 }
@@ -241,7 +243,7 @@ BitVector::andNotWith(const BitVector& right)
                 IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
             }
             Index last(right.size() - 1);
-            getWordIndex(last)[0] &= ~(right.getWordIndex(last)[0] & ~endBits(last));
+            store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last)));
         }
     } else {
         IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
@@ -269,7 +271,7 @@ BitVector::operator==(const BitVector &rhs) const
     const Word *words = getActiveStart();
     const Word *oWords = rhs.getActiveStart();
     for (Index i = 0; i < bitVectorSize; i++) {
-        if (words[i] != oWords[i]) {
+        if (load(words[i]) != load(oWords[i])) {
             return false;
         }
     }
@@ -282,13 +284,13 @@ BitVector::hasTrueBitsInternal() const
     Index bitVectorSizeL1(numActiveWords() - 1);
     const Word *words(getActiveStart());
     for (Index i = 0; i < bitVectorSizeL1; i++) {
-        if (words[i] != 0) {
+        if (load(words[i]) != 0) {
             return true;
         }
     }
 
     // Ignore guard bit.
-    if ((words[bitVectorSizeL1] & ~mask(size())) != 0)
+    if ((load(words[bitVectorSizeL1]) & ~mask(size())) != 0)
         return true;
 
     return false;
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h
index 3f11e2a257e..912c4a47c39 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.h
+++ b/searchlib/src/vespa/searchlib/common/bitvector.h
@@ -42,10 +42,8 @@ public:
     void * getStart() { return _words; }
     Index size() const { return vespalib::atomic::load_ref_relaxed(_sz); }
     Index sizeBytes() const { return numBytes(getActiveSize()); }
-    Word load_word(Index widx) const { return vespalib::atomic::load_ref_relaxed(_words[widx]); }
-    void store_word(Index widx, Word word) { return vespalib::atomic::store_ref_relaxed(_words[widx], word); }
     bool testBit(Index idx) const {
-        return ((load_word(wordNum(idx)) & mask(idx)) != 0);
+        return ((load(_words[wordNum(idx)]) & mask(idx)) != 0);
     }
     Index getSizeAcquire() const {
         return vespalib::atomic::load_ref_acquire(_sz);
@@ -123,10 +121,10 @@ public:
     Index getPrevTrueBit(Index start) const {
         Index index(wordNum(start));
         const Word *words(_words);
-        Word t(words[index] & ~endBits(start));
+        Word t(load(words[index]) & ~endBits(start));
 
         while(t == 0 && index > getStartWordNum()) {
-           t = words[--index];
+            t = load(words[--index]);
         }
 
         return (t != 0)
@@ -144,13 +142,13 @@ public:
         vespalib::atomic::store_ref_release(_sz, sz);
     }
     void setBit(Index idx) {
-        store_word(wordNum(idx), _words[wordNum(idx)] | mask(idx));
+        store(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx));
     }
     void clearBit(Index idx) {
-        store_word(wordNum(idx), _words[wordNum(idx)] & ~ mask(idx));
+        store(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx));
     }
     void flipBit(Index idx) {
-        _words[wordNum(idx)] ^= mask(idx);
+        store(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx));
     }
 
     void andWith(const BitVector &right);
@@ -277,6 +275,8 @@ protected:
     static Alloc allocatePaddedAndAligned(Index start, Index end, Index capacity, const Alloc* init_alloc = nullptr);
 
 private:
+    Word load(const Word &word) const { return vespalib::atomic::load_ref_relaxed(word); }
+    void store(Word &word, Word value) { return vespalib::atomic::store_ref_relaxed(word, value); }
     friend PartialBitVector;
     const Word * getWordIndex(Index index) const { return static_cast<const Word *>(getStart()) + wordNum(index); }
     Word * getWordIndex(Index index) { return static_cast<Word *>(getStart()) + wordNum(index); }
@@ -319,8 +319,8 @@ private:
 
         Index index(wordNum(start));
         Index lastIndex(wordNum(last));
-        Word word(conv(_words[index]) & checkTab(start));
-        for ( ; index < lastIndex; word = conv(_words[++index])) {
+        Word word(conv(load(_words[index])) & checkTab(start));
+        for ( ; index < lastIndex; word = conv(load(_words[++index]))) {
             foreach_bit(func, word, index << numWordBits());
         }
         foreach_bit(func, word & ~endBits(last), lastIndex << numWordBits());
@@ -329,14 +329,14 @@ private:
     Index getNextBit(WordConverter conv, Index start) const {
         Index index(wordNum(start));
         const Word *words(_words);
-        Word t(conv(words[index]) & checkTab(start));
+        Word t(conv(load(words[index])) & checkTab(start));
 
         // In order to avoid a test an extra guard bit is added
         // after the bitvector as a termination.
         // Also bitvector will normally at least 1 bit set per 32 bits.
         // So that is what we should expect.
         while (__builtin_expect(t == 0, false)) {
-            t = conv(words[++index]);
+            t = conv(load(words[++index]));
         }
 
         return (index << numWordBits()) + vespalib::Optimized::lsbIdx(t);
diff --git a/searchlib/src/vespa/searchlib/common/bitword.h b/searchlib/src/vespa/searchlib/common/bitword.h
index 96665d47c93..d90ec9c93d3 100644
--- a/searchlib/src/vespa/searchlib/common/bitword.h
+++ b/searchlib/src/vespa/searchlib/common/bitword.h
@@ -17,6 +17,7 @@ public:
     static constexpr size_t WordLen = sizeof(Word)*8;
     static uint8_t bitNum(Index idx) { return (idx % WordLen); }
     static Word endBits(Index index) { return (std::numeric_limits<Word>::max() - 1) << bitNum(index); }
+    static Word allBits() { return std::numeric_limits<Word>::max(); }
     static Index wordNum(Index idx) { return idx >> numWordBits(); }
     static Word mask(Index idx) { return Word(1) << bitNum(idx); }
     static constexpr uint8_t size_bits(uint8_t n) { return (n > 1) ? (1 + size_bits(n >> 1)) : 0; }
-- 
cgit v1.2.3