From 6d55ba7e9bdfd6248d5a83786c9dd1e5ee63e9b9 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 23 Jan 2023 16:55:35 +0000 Subject: Add test that non-overlapping OR does not write outside source bitvector. --- .../src/tests/common/bitvector/bitvector_test.cpp | 35 ++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp index 7a26202682b..9547e162c72 100644 --- a/searchlib/src/tests/common/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include using namespace search; @@ -135,7 +136,7 @@ assertBV(const std::string & exp, const BitVector & act) bool res1 = EXPECT_EQUAL(exp, toString(act)); search::fef::TermFieldMatchData f; queryeval::SearchIterator::UP it(BitVectorIterator::create(&act, f, true)); - BitVectorIterator & b(dynamic_cast(*it)); + auto & b(dynamic_cast(*it)); bool res2 = EXPECT_EQUAL(exp, toString(b)); return res1 && res2; } @@ -295,6 +296,18 @@ TEST("requireThatSequentialOperationsOnPartialWorks") EXPECT_EQUAL(5u, p2.countTrueBits()); p2.orWith(full); EXPECT_EQUAL(202u, p2.countTrueBits()); + + AllocatedBitVector before(100); + before.setInterval(0, 100); + p2.orWith(before); + EXPECT_EQUAL(202u, p2.countTrueBits()); + + PartialBitVector after(1000, 1100); + after.setInterval(1000, 1100); + EXPECT_EXCEPTION(p2.orWith(after), vespalib::IllegalArgumentException, + "IllegalArgumentException: [717, 919] starts before which is not allowed currently [1000, 1100] at " + "verifyInclusiveStart in /home/balder/git/vespa/searchlib/src/vespa/searchlib/common/bitvector.cpp:33"); + EXPECT_EQUAL(202u, p2.countTrueBits()); } TEST("requireThatInitRangeStaysWithinBounds") { @@ -342,7 +355,22 @@ verifyThatLongerWithShorterWorksAsZeroPadded(uint32_t offset, uint32_t sz1, uint func(*aLarger2, *bSmall); func(*aLarger3, *bEmpty); EXPECT_TRUE(*aLarger == *aLarger2); - //EXPECT_TRUE(*aLarger == *aLarger3); +} + +template +void +verifyNonOverlappingWorksAsZeroPadded(bool clear, Func func) { + constexpr size_t CNT = 34; + BitVector::UP left = createEveryNthBitSet(3, 1000, 100); + BitVector::UP right = createEveryNthBitSet(3, 2000, 100); + EXPECT_EQUAL(CNT, left->countTrueBits()); + EXPECT_EQUAL(CNT, right->countTrueBits()); + EXPECT_EXCEPTION(func(*left, *right), vespalib::IllegalArgumentException, + "IllegalArgumentException: [1000, 1100] starts before which is not allowed currently [2000, 2100] at " + "verifyInclusiveStart in /home/balder/git/vespa/searchlib/src/vespa/searchlib/common/bitvector.cpp:33"); + func(*right, *left); + EXPECT_EQUAL(CNT, left->countTrueBits()); + EXPECT_EQUAL(clear ? 0 : CNT, right->countTrueBits()); } TEST("requireThatAndWorks") { @@ -351,6 +379,7 @@ TEST("requireThatAndWorks") { verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3, [](BitVector & a, const BitVector & b) { a.andWith(b); }); } + verifyNonOverlappingWorksAsZeroPadded(true, [](BitVector & a, const BitVector & b) { a.andWith(b); }); } TEST("requireThatOrWorks") { @@ -359,6 +388,7 @@ TEST("requireThatOrWorks") { verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3, [](BitVector & a, const BitVector & b) { a.orWith(b); }); } + verifyNonOverlappingWorksAsZeroPadded(false, [](BitVector & a, const BitVector & b) { a.orWith(b); }); } @@ -368,6 +398,7 @@ TEST("requireThatAndNotWorks") { verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3, [](BitVector & a, const BitVector & b) { a.andNotWith(b); }); } + verifyNonOverlappingWorksAsZeroPadded(false, [](BitVector & a, const BitVector & b) { a.andNotWith(b); }); } TEST("test that empty bitvectors does not crash") { -- cgit v1.2.3 From 78e994df4130ab8a3eb07135f17e306695947207 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 23 Jan 2023 17:54:23 +0000 Subject: Add rangecheck to BitVector::store(Word & word). Handle nonoverlapping vectors in orWith, andWith and andNotWith. --- searchlib/src/vespa/searchlib/common/bitvector.cpp | 15 ++++++++---- searchlib/src/vespa/searchlib/common/bitvector.h | 27 ++++++++++++---------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index 47ca590c9bc..1e403702fad 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -90,10 +90,15 @@ void BitVector::clearInterval(Index start, Index end) { clearIntervalNoInvalidation(Range(start, end)); - invalidateCachedCount(); } +void +BitVector::store(Word &word, Word value) { + assert(!_enable_range_check && (&word >= getActiveStart())); + return store_unchecked(word, value); +} + void BitVector::clearIntervalNoInvalidation(Range range_in) { @@ -107,7 +112,7 @@ BitVector::clearIntervalNoInvalidation(Range range_in) if (endw > startw) { store(_words[startw], _words[startw] & startBits(range.start())); for (Index i = startw + 1; i < endw; ++i) { - store(_words[i], 0); + store_unchecked(_words[i], 0); } store(_words[endw], _words[endw] & endBits(last)); } else { @@ -128,7 +133,7 @@ BitVector::setInterval(Index start_in, Index end_in) if (endw > startw) { store(_words[startw], _words[startw] | checkTab(range.start())); for (Index i = startw + 1; i < endw; ++i) { - store(_words[i], allBits()); + store_unchecked(_words[i], allBits()); } store(_words[endw], _words[endw] | ~endBits(last)); } else { @@ -190,7 +195,7 @@ BitVector::orWith(const BitVector & right) verifyInclusiveStart(*this, right); if (right.size() < size()) { - if (right.size() > 0) { + if (right.size() > getStartIndex()) { ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word); if (commonBytes > 0) { IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); @@ -240,7 +245,7 @@ BitVector::andNotWith(const BitVector& right) verifyInclusiveStart(*this, right); if (right.size() < size()) { - if (right.size() > 0) { + if (right.size() > getStartIndex()) { ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word); if (commonBytes > 0) { IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h index 1d289435f6d..973d1a8704b 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.h +++ b/searchlib/src/vespa/searchlib/common/bitvector.h @@ -28,10 +28,10 @@ public: using UP = std::unique_ptr; class Range { public: - Range(Index start_in, Index end_in) : _start(start_in), _end(end_in) {} - Index start() const { return _start; } - Index end() const { return _end; } - bool validNonZero() const { return _end > _start; } + Range(Index start_in, Index end_in) noexcept : _start(start_in), _end(end_in) {} + [[nodiscard]] Index start() const noexcept { return _start; } + [[nodiscard]] Index end() const noexcept { return _end; } + [[nodiscard]] bool validNonZero() const noexcept { return _end > _start; } private: Index _start; Index _end; @@ -144,15 +144,15 @@ public: vespalib::atomic::store_ref_release(_sz, sz); } void set_bit_no_range_check(Index idx) { - store(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx)); + store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx)); } void clear_bit_no_range_check(Index idx) { - store(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx)); + store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx)); } void flip_bit_no_range_check(Index idx) { - store(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx)); + store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx)); } - void range_check(Index idx) { + void range_check(Index idx) const { assert(!_enable_range_check || (idx >= _startOffset && idx < _sz)); } void setBit(Index idx) { @@ -301,8 +301,11 @@ protected: static Alloc allocatePaddedAndAligned(Index start, Index end, Index capacity, const Alloc* init_alloc = nullptr); private: - Word load(const Word &word) const { return vespalib::atomic::load_ref_relaxed(word); } - void store(Word &word, Word value) { return vespalib::atomic::store_ref_relaxed(word, value); } + static Word load(const Word &word) { return vespalib::atomic::load_ref_relaxed(word); } + VESPA_DLL_LOCAL void store(Word &word, Word value); + static void store_unchecked(Word &word, Word value) { + return vespalib::atomic::store_ref_relaxed(word, value); + } friend PartialBitVector; const Word * getWordIndex(Index index) const { return static_cast(getStart()) + wordNum(index); } Word * getWordIndex(Index index) { return static_cast(getStart()) + wordNum(index); } @@ -330,8 +333,8 @@ private: } VESPA_DLL_LOCAL void repairEnds(); Range sanitize(Range range) const { - return Range(std::max(range.start(), getStartIndex()), - std::min(range.end(), size())); + return {std::max(range.start(), getStartIndex()), + std::min(range.end(), size())}; } Index count() const; bool hasTrueBitsInternal() const; -- cgit v1.2.3 From f36fd2153ce2e4668f4e3df1568deddebf25a348 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 23 Jan 2023 20:05:51 +0000 Subject: Change assert to also check end. --- searchlib/src/vespa/searchlib/common/bitvector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index 1e403702fad..7ae21b5718a 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -95,7 +95,7 @@ BitVector::clearInterval(Index start, Index end) void BitVector::store(Word &word, Word value) { - assert(!_enable_range_check && (&word >= getActiveStart())); + assert(!_enable_range_check || ((&word >= getActiveStart()) && (&word < (getActiveStart() + numActiveWords())))); return store_unchecked(word, value); } -- cgit v1.2.3 From a69b9c38acebdbfd8240024601a9e61cc624605a Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 23 Jan 2023 20:47:16 +0000 Subject: - Use santitized range to align orWith / andNotWith with similar code. --- searchlib/src/vespa/searchlib/common/bitvector.cpp | 32 +++++++++++----------- searchlib/src/vespa/searchlib/common/bitvector.h | 1 + 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index 7ae21b5718a..19d0e194432 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -193,18 +193,18 @@ void BitVector::orWith(const BitVector & right) { verifyInclusiveStart(*this, right); + Range range = sanitize(right.range()); + if ( ! range.validNonZero()) return; if (right.size() < size()) { - if (right.size() > getStartIndex()) { - ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word); - if (commonBytes > 0) { - IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); - } - Index last(right.size() - 1); - store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last))); + ssize_t commonBytes = numActiveBytes(range.start(), range.end()) - sizeof(Word); + if (commonBytes > 0) { + IAccelrated::getAccelerator().orBit(getWordIndex(range.start()), right.getWordIndex(range.start()), commonBytes); } + Index last(range.end() - 1); + store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last))); } else { - IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); + IAccelrated::getAccelerator().orBit(getWordIndex(range.start()), right.getWordIndex(range.start()), getActiveBytes()); } repairEnds(); invalidateCachedCount(); @@ -243,18 +243,18 @@ void BitVector::andNotWith(const BitVector& right) { verifyInclusiveStart(*this, right); + Range range = sanitize(right.range()); + if ( ! range.validNonZero()) return; if (right.size() < size()) { - if (right.size() > getStartIndex()) { - ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word); - if (commonBytes > 0) { - IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); - } - Index last(right.size() - 1); - store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last))); + ssize_t commonBytes = numActiveBytes(range.start(), range.end()) - sizeof(Word); + if (commonBytes > 0) { + IAccelrated::getAccelerator().andNotBit(getWordIndex(range.start()), right.getWordIndex(range.start()), commonBytes); } + Index last(range.end() - 1); + store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last))); } else { - IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); + IAccelrated::getAccelerator().andNotBit(getWordIndex(range.start()), right.getWordIndex(range.start()), getActiveBytes()); } repairEnds(); diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h index 973d1a8704b..67f3e2ad502 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.h +++ b/searchlib/src/vespa/searchlib/common/bitvector.h @@ -42,6 +42,7 @@ public: bool operator == (const BitVector &right) const; const void * getStart() const { return _words; } void * getStart() { return _words; } + Range range() const noexcept { return {getStartIndex(), size()}; } Index size() const { return vespalib::atomic::load_ref_relaxed(_sz); } Index sizeBytes() const { return numBytes(getActiveSize()); } bool testBit(Index idx) const { -- cgit v1.2.3 From 4b6f692bed109678f01b591f1a737f6d74775058 Mon Sep 17 00:00:00 2001 From: Arne Juul Date: Mon, 23 Jan 2023 21:56:51 +0000 Subject: * use the sanitize range and check in "andWith" also * verifyInclusiveStart is not needed now that we do range checks * the unit tests could only work in "/home/balder" --- .../src/tests/common/bitvector/bitvector_test.cpp | 13 +++++++------ searchlib/src/vespa/searchlib/common/bitvector.cpp | 19 +++++-------------- .../src/vespa/searchlib/common/partialbitvector.h | 2 +- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp index 9547e162c72..9afc4601801 100644 --- a/searchlib/src/tests/common/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp @@ -304,9 +304,6 @@ TEST("requireThatSequentialOperationsOnPartialWorks") PartialBitVector after(1000, 1100); after.setInterval(1000, 1100); - EXPECT_EXCEPTION(p2.orWith(after), vespalib::IllegalArgumentException, - "IllegalArgumentException: [717, 919] starts before which is not allowed currently [1000, 1100] at " - "verifyInclusiveStart in /home/balder/git/vespa/searchlib/src/vespa/searchlib/common/bitvector.cpp:33"); EXPECT_EQUAL(202u, p2.countTrueBits()); } @@ -365,9 +362,13 @@ verifyNonOverlappingWorksAsZeroPadded(bool clear, Func func) { BitVector::UP right = createEveryNthBitSet(3, 2000, 100); EXPECT_EQUAL(CNT, left->countTrueBits()); EXPECT_EQUAL(CNT, right->countTrueBits()); - EXPECT_EXCEPTION(func(*left, *right), vespalib::IllegalArgumentException, - "IllegalArgumentException: [1000, 1100] starts before which is not allowed currently [2000, 2100] at " - "verifyInclusiveStart in /home/balder/git/vespa/searchlib/src/vespa/searchlib/common/bitvector.cpp:33"); + func(*left, *right); + EXPECT_EQUAL(clear ? 0 : CNT, left->countTrueBits()); + EXPECT_EQUAL(CNT, right->countTrueBits()); + left = createEveryNthBitSet(3, 1000, 100); + right = createEveryNthBitSet(3, 2000, 100); + EXPECT_EQUAL(CNT, left->countTrueBits()); + EXPECT_EQUAL(CNT, right->countTrueBits()); func(*right, *left); EXPECT_EQUAL(CNT, left->countTrueBits()); EXPECT_EQUAL(clear ? 0 : CNT, right->countTrueBits()); diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index 19d0e194432..cba3ef4f842 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -23,17 +23,6 @@ using vespalib::alloc::Alloc; namespace { -void verifyInclusiveStart(const search::BitVector & a, const search::BitVector & b) __attribute__((noinline)); - -void verifyInclusiveStart(const search::BitVector & a, const search::BitVector & b) -{ - if (a.getStartIndex() < b.getStartIndex()) { - throw IllegalArgumentException(make_string("[%d, %d] starts before which is not allowed currently [%d, %d]", - a.getStartIndex(), a.size(), b.getStartIndex(), b.size()), - VESPA_STRLOC); - } -} - constexpr size_t MMAP_LIMIT = 256_Mi; } @@ -192,7 +181,6 @@ BitVector::countInterval(Range range_in) const void BitVector::orWith(const BitVector & right) { - verifyInclusiveStart(*this, right); Range range = sanitize(right.range()); if ( ! range.validNonZero()) return; @@ -226,7 +214,11 @@ BitVector::repairEnds() void BitVector::andWith(const BitVector & right) { - verifyInclusiveStart(*this, right); + Range range = sanitize(right.range()); + if ( ! range.validNonZero()) { + clear(); + return; + } uint32_t commonBytes = std::min(getActiveBytes(), numActiveBytes(getStartIndex(), right.size())); IAccelrated::getAccelerator().andBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); @@ -242,7 +234,6 @@ BitVector::andWith(const BitVector & right) void BitVector::andNotWith(const BitVector& right) { - verifyInclusiveStart(*this, right); Range range = sanitize(right.range()); if ( ! range.validNonZero()) return; diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.h b/searchlib/src/vespa/searchlib/common/partialbitvector.h index d1baa22f9a1..4fb5c3c0d18 100644 --- a/searchlib/src/vespa/searchlib/common/partialbitvector.h +++ b/searchlib/src/vespa/searchlib/common/partialbitvector.h @@ -8,7 +8,7 @@ namespace search { /** * search::PartialBitVector is a bitvector that is only represents 1 part - * of the full space. All operations concerning the whole vector while only + * of the full space. All operations concerning the whole vector will only * be conducted on this smaller area. */ class PartialBitVector : public BitVector -- cgit v1.2.3