From 0019d49940f96eb6410b809f9f6d383636cfcf50 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Fri, 13 May 2022 12:36:49 +0000 Subject: use more atomic read/write for bitvectors --- searchlib/src/vespa/searchlib/common/bitvector.cpp | 40 ++++++++++++---------- searchlib/src/vespa/searchlib/common/bitvector.h | 24 ++++++------- searchlib/src/vespa/searchlib/common/bitword.h | 1 + 3 files changed, 34 insertions(+), 31 deletions(-) (limited to 'searchlib/src') diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index 2d7fda32037..03fe97cabbb 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -102,13 +102,13 @@ BitVector::clearIntervalNoInvalidation(Range range_in) Index endw = wordNum(last); if (endw > startw) { - store_word(startw, _words[startw] & startBits(range.start())); + store(_words[startw], _words[startw] & startBits(range.start())); for (Index i = startw + 1; i < endw; ++i) { - store_word(i, 0); + store(_words[i], 0); } - store_word(endw, _words[endw] & endBits(last)); + store(_words[endw], _words[endw] & endBits(last)); } else { - store_word(startw, _words[startw] & (startBits(range.start()) | endBits(last))); + store(_words[startw], _words[startw] & (startBits(range.start()) | endBits(last))); } } @@ -123,11 +123,13 @@ BitVector::setInterval(Index start_in, Index end_in) Index endw = wordNum(last); if (endw > startw) { - _words[startw++] |= checkTab(range.start()); - memset(_words + startw, 0xff, sizeof(*_words)*(endw-startw)); - _words[endw] |= ~endBits(last); + store(_words[startw], _words[startw] | checkTab(range.start())); + for (Index i = startw + 1; i < endw; ++i) { + store(_words[i], allBits()); + } + store(_words[endw], _words[endw] | ~endBits(last)); } else { - _words[startw] |= ~(startBits(range.start()) | endBits(last)); + store(_words[startw], _words[startw] | ~(startBits(range.start()) | endBits(last))); } invalidateCachedCount(); @@ -152,17 +154,17 @@ BitVector::countInterval(Range range_in) const Word *bitValues = _words; if (startw == endw) { - return Optimized::popCount(bitValues[startw] & ~(startBits(range.start()) | endBits(last))); + return Optimized::popCount(load(bitValues[startw]) & ~(startBits(range.start()) | endBits(last))); } Index res = 0; // Limit to full words if ((range.start() & (WordLen - 1)) != 0) { - res += Optimized::popCount(bitValues[startw] & ~startBits(range.start())); + res += Optimized::popCount(load(bitValues[startw]) & ~startBits(range.start())); ++startw; } // Align start to 16 bytes while (startw < endw && (startw & 3) != 0) { - res += Optimized::popCount(bitValues[startw]); + res += Optimized::popCount(load(bitValues[startw])); ++startw; } bool partialEnd = (last & (WordLen - 1)) != (WordLen - 1); @@ -173,7 +175,7 @@ BitVector::countInterval(Range range_in) const res += IAccelrated::getAccelerator().populationCount(bitValues + startw, endw - startw); } if (partialEnd) { - res += Optimized::popCount(bitValues[endw] & ~endBits(last)); + res += Optimized::popCount(load(bitValues[endw]) & ~endBits(last)); } return res; @@ -191,7 +193,7 @@ BitVector::orWith(const BitVector & right) IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); } Index last(right.size() - 1); - getWordIndex(last)[0] |= (right.getWordIndex(last)[0] & ~endBits(last)); + store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last))); } } else { IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); @@ -206,8 +208,8 @@ BitVector::repairEnds() if (size() != 0) { Index start(getStartIndex()); Index last(size() - 1); - getWordIndex(start)[0] &= ~startBits(start); - getWordIndex(last)[0] &= ~endBits(last); + store(getWordIndex(start)[0], getWordIndex(start)[0] & ~startBits(start)); + store(getWordIndex(last)[0], getWordIndex(last)[0] & ~endBits(last)); } setGuardBit(); } @@ -241,7 +243,7 @@ BitVector::andNotWith(const BitVector& right) IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); } Index last(right.size() - 1); - getWordIndex(last)[0] &= ~(right.getWordIndex(last)[0] & ~endBits(last)); + store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last))); } } else { IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); @@ -269,7 +271,7 @@ BitVector::operator==(const BitVector &rhs) const const Word *words = getActiveStart(); const Word *oWords = rhs.getActiveStart(); for (Index i = 0; i < bitVectorSize; i++) { - if (words[i] != oWords[i]) { + if (load(words[i]) != load(oWords[i])) { return false; } } @@ -282,13 +284,13 @@ BitVector::hasTrueBitsInternal() const Index bitVectorSizeL1(numActiveWords() - 1); const Word *words(getActiveStart()); for (Index i = 0; i < bitVectorSizeL1; i++) { - if (words[i] != 0) { + if (load(words[i]) != 0) { return true; } } // Ignore guard bit. - if ((words[bitVectorSizeL1] & ~mask(size())) != 0) + if ((load(words[bitVectorSizeL1]) & ~mask(size())) != 0) return true; return false; diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h index 3f11e2a257e..912c4a47c39 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.h +++ b/searchlib/src/vespa/searchlib/common/bitvector.h @@ -42,10 +42,8 @@ public: void * getStart() { return _words; } Index size() const { return vespalib::atomic::load_ref_relaxed(_sz); } Index sizeBytes() const { return numBytes(getActiveSize()); } - Word load_word(Index widx) const { return vespalib::atomic::load_ref_relaxed(_words[widx]); } - void store_word(Index widx, Word word) { return vespalib::atomic::store_ref_relaxed(_words[widx], word); } bool testBit(Index idx) const { - return ((load_word(wordNum(idx)) & mask(idx)) != 0); + return ((load(_words[wordNum(idx)]) & mask(idx)) != 0); } Index getSizeAcquire() const { return vespalib::atomic::load_ref_acquire(_sz); @@ -123,10 +121,10 @@ public: Index getPrevTrueBit(Index start) const { Index index(wordNum(start)); const Word *words(_words); - Word t(words[index] & ~endBits(start)); + Word t(load(words[index]) & ~endBits(start)); while(t == 0 && index > getStartWordNum()) { - t = words[--index]; + t = load(words[--index]); } return (t != 0) @@ -144,13 +142,13 @@ public: vespalib::atomic::store_ref_release(_sz, sz); } void setBit(Index idx) { - store_word(wordNum(idx), _words[wordNum(idx)] | mask(idx)); + store(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx)); } void clearBit(Index idx) { - store_word(wordNum(idx), _words[wordNum(idx)] & ~ mask(idx)); + store(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx)); } void flipBit(Index idx) { - _words[wordNum(idx)] ^= mask(idx); + store(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx)); } void andWith(const BitVector &right); @@ -277,6 +275,8 @@ protected: static Alloc allocatePaddedAndAligned(Index start, Index end, Index capacity, const Alloc* init_alloc = nullptr); private: + Word load(const Word &word) const { return vespalib::atomic::load_ref_relaxed(word); } + void store(Word &word, Word value) { return vespalib::atomic::store_ref_relaxed(word, value); } friend PartialBitVector; const Word * getWordIndex(Index index) const { return static_cast(getStart()) + wordNum(index); } Word * getWordIndex(Index index) { return static_cast(getStart()) + wordNum(index); } @@ -319,8 +319,8 @@ private: Index index(wordNum(start)); Index lastIndex(wordNum(last)); - Word word(conv(_words[index]) & checkTab(start)); - for ( ; index < lastIndex; word = conv(_words[++index])) { + Word word(conv(load(_words[index])) & checkTab(start)); + for ( ; index < lastIndex; word = conv(load(_words[++index]))) { foreach_bit(func, word, index << numWordBits()); } foreach_bit(func, word & ~endBits(last), lastIndex << numWordBits()); @@ -329,14 +329,14 @@ private: Index getNextBit(WordConverter conv, Index start) const { Index index(wordNum(start)); const Word *words(_words); - Word t(conv(words[index]) & checkTab(start)); + Word t(conv(load(words[index])) & checkTab(start)); // In order to avoid a test an extra guard bit is added // after the bitvector as a termination. // Also bitvector will normally at least 1 bit set per 32 bits. // So that is what we should expect. while (__builtin_expect(t == 0, false)) { - t = conv(words[++index]); + t = conv(load(words[++index])); } return (index << numWordBits()) + vespalib::Optimized::lsbIdx(t); diff --git a/searchlib/src/vespa/searchlib/common/bitword.h b/searchlib/src/vespa/searchlib/common/bitword.h index 96665d47c93..d90ec9c93d3 100644 --- a/searchlib/src/vespa/searchlib/common/bitword.h +++ b/searchlib/src/vespa/searchlib/common/bitword.h @@ -17,6 +17,7 @@ public: static constexpr size_t WordLen = sizeof(Word)*8; static uint8_t bitNum(Index idx) { return (idx % WordLen); } static Word endBits(Index index) { return (std::numeric_limits::max() - 1) << bitNum(index); } + static Word allBits() { return std::numeric_limits::max(); } static Index wordNum(Index idx) { return idx >> numWordBits(); } static Word mask(Index idx) { return Word(1) << bitNum(idx); } static constexpr uint8_t size_bits(uint8_t n) { return (n > 1) ? (1 + size_bits(n >> 1)) : 0; } -- cgit v1.2.3