aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArne H Juul <arnej27959@users.noreply.github.com>2023-01-24 00:06:05 +0100
committerGitHub <noreply@github.com>2023-01-24 00:06:05 +0100
commit0c6c77b803e233d14667741baf10b6ae0deb532b (patch)
tree6ea59d4ca175490d9c1217bf03e11fe7163dce1b
parent0137c3fdae9171b370eae22c67e3ee4784b71f60 (diff)
parent4b6f692bed109678f01b591f1a737f6d74775058 (diff)
Merge pull request #25688 from vespa-engine/balder/also-handle-non-overlapping-bitvectorsv8.115.24
Balder/also handle non overlapping bitvectors
-rw-r--r--searchlib/src/tests/common/bitvector/bitvector_test.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvector.cpp62
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvector.h28
-rw-r--r--searchlib/src/vespa/searchlib/common/partialbitvector.h2
4 files changed, 80 insertions, 48 deletions
diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp
index 7a26202682b..9afc4601801 100644
--- a/searchlib/src/tests/common/bitvector/bitvector_test.cpp
+++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp
@@ -10,6 +10,7 @@
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/vespalib/test/memory_allocator_observer.h>
#include <vespa/vespalib/util/rand48.h>
+#include <vespa/vespalib/util/exceptions.h>
#include <algorithm>
using namespace search;
@@ -135,7 +136,7 @@ assertBV(const std::string & exp, const BitVector & act)
bool res1 = EXPECT_EQUAL(exp, toString(act));
search::fef::TermFieldMatchData f;
queryeval::SearchIterator::UP it(BitVectorIterator::create(&act, f, true));
- BitVectorIterator & b(dynamic_cast<BitVectorIterator &>(*it));
+ auto & b(dynamic_cast<BitVectorIterator &>(*it));
bool res2 = EXPECT_EQUAL(exp, toString(b));
return res1 && res2;
}
@@ -295,6 +296,15 @@ TEST("requireThatSequentialOperationsOnPartialWorks")
EXPECT_EQUAL(5u, p2.countTrueBits());
p2.orWith(full);
EXPECT_EQUAL(202u, p2.countTrueBits());
+
+ AllocatedBitVector before(100);
+ before.setInterval(0, 100);
+ p2.orWith(before);
+ EXPECT_EQUAL(202u, p2.countTrueBits());
+
+ PartialBitVector after(1000, 1100);
+ after.setInterval(1000, 1100);
+ EXPECT_EQUAL(202u, p2.countTrueBits());
}
TEST("requireThatInitRangeStaysWithinBounds") {
@@ -342,7 +352,26 @@ verifyThatLongerWithShorterWorksAsZeroPadded(uint32_t offset, uint32_t sz1, uint
func(*aLarger2, *bSmall);
func(*aLarger3, *bEmpty);
EXPECT_TRUE(*aLarger == *aLarger2);
- //EXPECT_TRUE(*aLarger == *aLarger3);
+}
+
+template<typename Func>
+void
+verifyNonOverlappingWorksAsZeroPadded(bool clear, Func func) {
+ constexpr size_t CNT = 34;
+ BitVector::UP left = createEveryNthBitSet(3, 1000, 100);
+ BitVector::UP right = createEveryNthBitSet(3, 2000, 100);
+ EXPECT_EQUAL(CNT, left->countTrueBits());
+ EXPECT_EQUAL(CNT, right->countTrueBits());
+ func(*left, *right);
+ EXPECT_EQUAL(clear ? 0 : CNT, left->countTrueBits());
+ EXPECT_EQUAL(CNT, right->countTrueBits());
+ left = createEveryNthBitSet(3, 1000, 100);
+ right = createEveryNthBitSet(3, 2000, 100);
+ EXPECT_EQUAL(CNT, left->countTrueBits());
+ EXPECT_EQUAL(CNT, right->countTrueBits());
+ func(*right, *left);
+ EXPECT_EQUAL(CNT, left->countTrueBits());
+ EXPECT_EQUAL(clear ? 0 : CNT, right->countTrueBits());
}
TEST("requireThatAndWorks") {
@@ -351,6 +380,7 @@ TEST("requireThatAndWorks") {
verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3,
[](BitVector & a, const BitVector & b) { a.andWith(b); });
}
+ verifyNonOverlappingWorksAsZeroPadded(true, [](BitVector & a, const BitVector & b) { a.andWith(b); });
}
TEST("requireThatOrWorks") {
@@ -359,6 +389,7 @@ TEST("requireThatOrWorks") {
verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3,
[](BitVector & a, const BitVector & b) { a.orWith(b); });
}
+ verifyNonOverlappingWorksAsZeroPadded(false, [](BitVector & a, const BitVector & b) { a.orWith(b); });
}
@@ -368,6 +399,7 @@ TEST("requireThatAndNotWorks") {
verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3,
[](BitVector & a, const BitVector & b) { a.andNotWith(b); });
}
+ verifyNonOverlappingWorksAsZeroPadded(false, [](BitVector & a, const BitVector & b) { a.andNotWith(b); });
}
TEST("test that empty bitvectors does not crash") {
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp
index 47ca590c9bc..cba3ef4f842 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.cpp
+++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp
@@ -23,17 +23,6 @@ using vespalib::alloc::Alloc;
namespace {
-void verifyInclusiveStart(const search::BitVector & a, const search::BitVector & b) __attribute__((noinline));
-
-void verifyInclusiveStart(const search::BitVector & a, const search::BitVector & b)
-{
- if (a.getStartIndex() < b.getStartIndex()) {
- throw IllegalArgumentException(make_string("[%d, %d] starts before which is not allowed currently [%d, %d]",
- a.getStartIndex(), a.size(), b.getStartIndex(), b.size()),
- VESPA_STRLOC);
- }
-}
-
constexpr size_t MMAP_LIMIT = 256_Mi;
}
@@ -90,11 +79,16 @@ void
BitVector::clearInterval(Index start, Index end)
{
clearIntervalNoInvalidation(Range(start, end));
-
invalidateCachedCount();
}
void
+BitVector::store(Word &word, Word value) {
+ assert(!_enable_range_check || ((&word >= getActiveStart()) && (&word < (getActiveStart() + numActiveWords()))));
+ return store_unchecked(word, value);
+}
+
+void
BitVector::clearIntervalNoInvalidation(Range range_in)
{
Range range = sanitize(range_in);
@@ -107,7 +101,7 @@ BitVector::clearIntervalNoInvalidation(Range range_in)
if (endw > startw) {
store(_words[startw], _words[startw] & startBits(range.start()));
for (Index i = startw + 1; i < endw; ++i) {
- store(_words[i], 0);
+ store_unchecked(_words[i], 0);
}
store(_words[endw], _words[endw] & endBits(last));
} else {
@@ -128,7 +122,7 @@ BitVector::setInterval(Index start_in, Index end_in)
if (endw > startw) {
store(_words[startw], _words[startw] | checkTab(range.start()));
for (Index i = startw + 1; i < endw; ++i) {
- store(_words[i], allBits());
+ store_unchecked(_words[i], allBits());
}
store(_words[endw], _words[endw] | ~endBits(last));
} else {
@@ -187,19 +181,18 @@ BitVector::countInterval(Range range_in) const
void
BitVector::orWith(const BitVector & right)
{
- verifyInclusiveStart(*this, right);
+ Range range = sanitize(right.range());
+ if ( ! range.validNonZero()) return;
if (right.size() < size()) {
- if (right.size() > 0) {
- ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word);
- if (commonBytes > 0) {
- IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
- }
- Index last(right.size() - 1);
- store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last)));
+ ssize_t commonBytes = numActiveBytes(range.start(), range.end()) - sizeof(Word);
+ if (commonBytes > 0) {
+ IAccelrated::getAccelerator().orBit(getWordIndex(range.start()), right.getWordIndex(range.start()), commonBytes);
}
+ Index last(range.end() - 1);
+ store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last)));
} else {
- IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
+ IAccelrated::getAccelerator().orBit(getWordIndex(range.start()), right.getWordIndex(range.start()), getActiveBytes());
}
repairEnds();
invalidateCachedCount();
@@ -221,7 +214,11 @@ BitVector::repairEnds()
void
BitVector::andWith(const BitVector & right)
{
- verifyInclusiveStart(*this, right);
+ Range range = sanitize(right.range());
+ if ( ! range.validNonZero()) {
+ clear();
+ return;
+ }
uint32_t commonBytes = std::min(getActiveBytes(), numActiveBytes(getStartIndex(), right.size()));
IAccelrated::getAccelerator().andBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
@@ -237,19 +234,18 @@ BitVector::andWith(const BitVector & right)
void
BitVector::andNotWith(const BitVector& right)
{
- verifyInclusiveStart(*this, right);
+ Range range = sanitize(right.range());
+ if ( ! range.validNonZero()) return;
if (right.size() < size()) {
- if (right.size() > 0) {
- ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word);
- if (commonBytes > 0) {
- IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
- }
- Index last(right.size() - 1);
- store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last)));
+ ssize_t commonBytes = numActiveBytes(range.start(), range.end()) - sizeof(Word);
+ if (commonBytes > 0) {
+ IAccelrated::getAccelerator().andNotBit(getWordIndex(range.start()), right.getWordIndex(range.start()), commonBytes);
}
+ Index last(range.end() - 1);
+ store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last)));
} else {
- IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
+ IAccelrated::getAccelerator().andNotBit(getWordIndex(range.start()), right.getWordIndex(range.start()), getActiveBytes());
}
repairEnds();
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h
index 1d289435f6d..67f3e2ad502 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.h
+++ b/searchlib/src/vespa/searchlib/common/bitvector.h
@@ -28,10 +28,10 @@ public:
using UP = std::unique_ptr<BitVector>;
class Range {
public:
- Range(Index start_in, Index end_in) : _start(start_in), _end(end_in) {}
- Index start() const { return _start; }
- Index end() const { return _end; }
- bool validNonZero() const { return _end > _start; }
+ Range(Index start_in, Index end_in) noexcept : _start(start_in), _end(end_in) {}
+ [[nodiscard]] Index start() const noexcept { return _start; }
+ [[nodiscard]] Index end() const noexcept { return _end; }
+ [[nodiscard]] bool validNonZero() const noexcept { return _end > _start; }
private:
Index _start;
Index _end;
@@ -42,6 +42,7 @@ public:
bool operator == (const BitVector &right) const;
const void * getStart() const { return _words; }
void * getStart() { return _words; }
+ Range range() const noexcept { return {getStartIndex(), size()}; }
Index size() const { return vespalib::atomic::load_ref_relaxed(_sz); }
Index sizeBytes() const { return numBytes(getActiveSize()); }
bool testBit(Index idx) const {
@@ -144,15 +145,15 @@ public:
vespalib::atomic::store_ref_release(_sz, sz);
}
void set_bit_no_range_check(Index idx) {
- store(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx));
+ store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx));
}
void clear_bit_no_range_check(Index idx) {
- store(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx));
+ store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx));
}
void flip_bit_no_range_check(Index idx) {
- store(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx));
+ store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx));
}
- void range_check(Index idx) {
+ void range_check(Index idx) const {
assert(!_enable_range_check || (idx >= _startOffset && idx < _sz));
}
void setBit(Index idx) {
@@ -301,8 +302,11 @@ protected:
static Alloc allocatePaddedAndAligned(Index start, Index end, Index capacity, const Alloc* init_alloc = nullptr);
private:
- Word load(const Word &word) const { return vespalib::atomic::load_ref_relaxed(word); }
- void store(Word &word, Word value) { return vespalib::atomic::store_ref_relaxed(word, value); }
+ static Word load(const Word &word) { return vespalib::atomic::load_ref_relaxed(word); }
+ VESPA_DLL_LOCAL void store(Word &word, Word value);
+ static void store_unchecked(Word &word, Word value) {
+ return vespalib::atomic::store_ref_relaxed(word, value);
+ }
friend PartialBitVector;
const Word * getWordIndex(Index index) const { return static_cast<const Word *>(getStart()) + wordNum(index); }
Word * getWordIndex(Index index) { return static_cast<Word *>(getStart()) + wordNum(index); }
@@ -330,8 +334,8 @@ private:
}
VESPA_DLL_LOCAL void repairEnds();
Range sanitize(Range range) const {
- return Range(std::max(range.start(), getStartIndex()),
- std::min(range.end(), size()));
+ return {std::max(range.start(), getStartIndex()),
+ std::min(range.end(), size())};
}
Index count() const;
bool hasTrueBitsInternal() const;
diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.h b/searchlib/src/vespa/searchlib/common/partialbitvector.h
index d1baa22f9a1..4fb5c3c0d18 100644
--- a/searchlib/src/vespa/searchlib/common/partialbitvector.h
+++ b/searchlib/src/vespa/searchlib/common/partialbitvector.h
@@ -8,7 +8,7 @@ namespace search {
/**
* search::PartialBitVector is a bitvector that is only represents 1 part
- * of the full space. All operations concerning the whole vector while only
+ * of the full space. All operations concerning the whole vector will only
* be conducted on this smaller area.
*/
class PartialBitVector : public BitVector