aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2020-01-22 06:12:19 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2020-01-22 06:12:19 +0000
commit73aa08d82d68b47f46cff025f2ae668980d649f6 (patch)
tree99d5dd713be02af25cad50def01d3624625379ad /searchlib
parentf936993576920fa1e729ef79b17bdc7d917e73df (diff)
Maintain the cached bitCount to avoid cost query time.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/bitvector/bitvectorbenchmark.cpp4
-rw-r--r--searchlib/src/tests/common/bitvector/bitvector_test.cpp22
-rw-r--r--searchlib/src/tests/docstore/document_store_visitor/document_store_visitor_test.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/flagattribute.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/attribute/flagattribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postingstore.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleboolattribute.h4
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvector.h40
9 files changed, 72 insertions, 75 deletions
diff --git a/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp
index ed681d9021b..a9df188d417 100644
--- a/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp
+++ b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp
@@ -51,10 +51,10 @@ void BitVectorBenchmark::init(size_t n)
BitVector *b(BitVector::create(n).release());
srand(1);
for(size_t i(0), j(0); i < n; i += rand()%10, j++) {
- a->flip(i);
+ a->flipBit(i);
}
for(size_t i(0), j(0); i < n; i += rand()%10, j++) {
- b->flip(i);
+ b->flipBit(i);
}
a->invalidateCachedCount();
b->invalidateCachedCount();
diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp
index 4cbe96c74b5..d720b105671 100644
--- a/searchlib/src/tests/common/bitvector/bitvector_test.cpp
+++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp
@@ -268,21 +268,21 @@ TEST("requireThatSequentialOperationsOnPartialWorks")
p1.invalidateCachedCount();
EXPECT_TRUE(p1.hasTrueBits());
EXPECT_EQUAL(1u, p1.countTrueBits());
- p1.slowSetBit(718);
- p1.slowSetBit(739);
- p1.slowSetBit(871);
- p1.slowSetBit(903);
+ p1.setBitAndMaintainCount(718);
+ p1.setBitAndMaintainCount(739);
+ p1.setBitAndMaintainCount(871);
+ p1.setBitAndMaintainCount(903);
EXPECT_EQUAL(5u, p1.countTrueBits());
EXPECT_TRUE(assertBV("[718,719,739,871,903]", p1));
PartialBitVector p2(717,919);
EXPECT_FALSE(p1 == p2);
- p2.slowSetBit(719);
- p2.slowSetBit(718);
- p2.slowSetBit(739);
- p2.slowSetBit(871);
+ p2.setBitAndMaintainCount(719);
+ p2.setBitAndMaintainCount(718);
+ p2.setBitAndMaintainCount(739);
+ p2.setBitAndMaintainCount(871);
EXPECT_FALSE(p1 == p2);
- p2.slowSetBit(903);
+ p2.setBitAndMaintainCount(903);
EXPECT_TRUE(p1 == p2);
AllocatedBitVector full(1000);
@@ -422,10 +422,10 @@ TEST("requireThatSetWorks")
EXPECT_EQUAL(4u, v1.countTrueBits());
EXPECT_TRUE(assertBV("[7,39,80,103]", v1));
- v1.slowSetBit(39);
+ v1.setBitAndMaintainCount(39);
EXPECT_EQUAL(4u, v1.countTrueBits());
EXPECT_TRUE(assertBV("[7,39,80,103]", v1));
- v1.slowSetBit(57);
+ v1.setBitAndMaintainCount(57);
EXPECT_EQUAL(5u, v1.countTrueBits());
EXPECT_TRUE(assertBV("[7,39,57,80,103]", v1));
}
diff --git a/searchlib/src/tests/docstore/document_store_visitor/document_store_visitor_test.cpp b/searchlib/src/tests/docstore/document_store_visitor/document_store_visitor_test.cpp
index 0a3c3788c98..cc31fcec4d4 100644
--- a/searchlib/src/tests/docstore/document_store_visitor/document_store_visitor_test.cpp
+++ b/searchlib/src/tests/docstore/document_store_visitor/document_store_visitor_test.cpp
@@ -127,7 +127,7 @@ MyVisitor::visit(uint32_t lid, const std::shared_ptr<Document> &doc)
assert(lid < _docIdLimit);
Document::UP expDoc(makeDoc(_repo, lid, _before));
EXPECT_TRUE(*expDoc == *doc);
- _valid->slowSetBit(lid);
+ _valid->setBitAndMaintainCount(lid);
}
@@ -136,7 +136,7 @@ MyVisitor::visit(uint32_t lid)
{
++_visitRmCount;
assert(lid < _docIdLimit);
- _valid->slowClearBit(lid);
+ _valid->clearBitAndMaintainCount(lid);
}
@@ -158,7 +158,7 @@ MyRewriteVisitor::visit(uint32_t lid, const std::shared_ptr<Document> &doc)
assert(lid < _docIdLimit);
Document::UP expDoc(makeDoc(_repo, lid, _before));
EXPECT_TRUE(*expDoc == *doc);
- _valid->slowSetBit(lid);
+ _valid->setBitAndMaintainCount(lid);
doc->set("extra", "foo");
}
@@ -297,7 +297,7 @@ Fixture::put(const Document &doc, uint32_t lid)
++_syncToken;
assert(lid < _docIdLimit);
_store->write(_syncToken, lid, doc);
- _valid->slowSetBit(lid);
+ _valid->setBitAndMaintainCount(lid);
}
@@ -307,7 +307,7 @@ Fixture::remove(uint32_t lid)
++_syncToken;
assert(lid < _docIdLimit);
_store->remove(_syncToken, lid);
- _valid->slowClearBit(lid);
+ _valid->clearBitAndMaintainCount(lid);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp
index a38cbe60e56..1e4bba95b4b 100644
--- a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp
@@ -24,8 +24,7 @@ class SaveBits
FA &_fa;
public:
- SaveBits(vespalib::ConstArrayRef<T> map,
- FA &fa)
+ SaveBits(vespalib::ConstArrayRef<T> map, FA &fa)
: _map(map),
_fa(fa)
{
@@ -55,11 +54,9 @@ FlagAttributeT<B>::FlagAttributeT(const vespalib::string & baseFileName, const A
template <typename B>
AttributeVector::SearchContext::UP
-FlagAttributeT<B>::getSearch(QueryTermSimple::UP qTerm,
- const attribute::SearchContextParams & params) const
+FlagAttributeT<B>::getSearch(QueryTermSimple::UP qTerm, const attribute::SearchContextParams &) const
{
- (void) params;
- return AttributeVector::SearchContext::UP (new SearchContext(std::move(qTerm), *this));
+ return std::make_unique<SearchContext>(std::move(qTerm), *this);
}
template <typename B>
@@ -69,7 +66,7 @@ void FlagAttributeT<B>::clearOldValues(DocId doc)
for (uint32_t i(0), m(this->get(doc, values)); i < m; i++) {
BitVector * bv = _bitVectors[getOffset(values[i].value())];
if (bv != nullptr) {
- bv->clearBit(doc);
+ bv->clearBitAndMaintainCount(doc);
}
}
}
@@ -130,10 +127,9 @@ void FlagAttributeT<B>::setNewValues(DocId doc, const std::vector<typename B::WT
_bitVectorStore[offset] = BitVector::create(_bitVectorSize);
_bitVectors[offset] = _bitVectorStore[offset].get();
bv = _bitVectors[offset];
- bv->invalidateCachedCount();
ensureGuardBit(*bv);
}
- bv->setBit(doc);
+ bv->setBitAndMaintainCount(doc);
}
}
@@ -145,13 +141,12 @@ FlagAttributeT<B>::setNewBVValue(DocId doc, typename B::WType::ValueType value)
BitVector * bv = _bitVectors[offset];
if (bv == nullptr) {
assert(_bitVectorSize >= this->getNumDocs());
- _bitVectorStore[offset] = BitVector::create(_bitVectorSize);
+ _bitVectorStore[offset] = BitVector::create(_bitVectorSize);
_bitVectors[offset] = _bitVectorStore[offset].get();
bv = _bitVectors[offset];
- bv->invalidateCachedCount();
ensureGuardBit(*bv);
}
- bv->setBit(doc);
+ bv->setBitAndMaintainCount(doc);
}
@@ -193,8 +188,7 @@ template <typename B>
void
FlagAttributeT<B>::ensureGuardBit()
{
- for (uint32_t i = 0; i < _bitVectors.size(); ++i) {
- BitVector * bv = _bitVectors[i];
+ for (BitVector * bv : _bitVectors) {
if (bv != nullptr) {
ensureGuardBit(*bv);
}
@@ -205,8 +199,7 @@ template <typename B>
void
FlagAttributeT<B>::clearGuardBit(DocId doc)
{
- for (uint32_t i = 0; i < _bitVectors.size(); ++i) {
- BitVector * bv = _bitVectors[i];
+ for (BitVector * bv : _bitVectors) {
if (bv != nullptr) {
bv->clearBit(doc); // clear guard bit and start using this doc id
}
@@ -219,8 +212,7 @@ FlagAttributeT<B>::resizeBitVectors(uint32_t neededSize)
{
const GrowStrategy & gs = this->getConfig().getGrowStrategy();
uint32_t newSize = neededSize + (neededSize * gs.getDocsGrowFactor()) + gs.getDocsGrowDelta();
- for (uint32_t i = 0; i < _bitVectors.size(); ++i) {
- BitVector * bv = _bitVectors[i];
+ for (BitVector * bv : _bitVectors) {
if (bv != nullptr) {
vespalib::GenerationHeldBase::UP hold(bv->grow(newSize));
ensureGuardBit(*bv);
diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.h b/searchlib/src/vespa/searchlib/attribute/flagattribute.h
index 742d0cd8592..d3ce5f9af46 100644
--- a/searchlib/src/vespa/searchlib/attribute/flagattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.h
@@ -52,10 +52,10 @@ private:
return _bitVectors[value + 128];
}
- vespalib::GenerationHolder _bitVectorHolder;
+ vespalib::GenerationHolder _bitVectorHolder;
std::vector<std::shared_ptr<BitVector> > _bitVectorStore;
std::vector<BitVector *> _bitVectors;
- uint32_t _bitVectorSize;
+ uint32_t _bitVectorSize;
template <class SC> friend class FlagAttributeIteratorT;
template <class SC> friend class FlagAttributeIteratorStrict;
};
diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
index 9c736a16069..2badb199934 100644
--- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp
@@ -246,9 +246,8 @@ PostingStore<DataT>::makeBitVector(EntryRef &ref)
uint32_t typeId = getTypeId(iRef);
assert(isBTree(typeId));
(void) typeId;
- std::shared_ptr<GrowableBitVector> bvsp;
vespalib::GenerationHolder &genHolder = _store.getGenerationHolder();
- bvsp.reset(new GrowableBitVector(_bvSize, _bvCapacity, genHolder));
+ auto bvsp = std::make_shared<GrowableBitVector>(_bvSize, _bvCapacity, genHolder);
AllocatedBitVector &bv = *bvsp.get();
uint32_t docIdLimit = _bvSize;
(void) docIdLimit;
@@ -288,9 +287,8 @@ PostingStore<DataT>::applyNewBitVector(EntryRef &ref,
{
assert(!ref.valid());
RefType iRef(ref);
- std::shared_ptr<GrowableBitVector> bvsp;
vespalib::GenerationHolder &genHolder = _store.getGenerationHolder();
- bvsp.reset(new GrowableBitVector(_bvSize, _bvCapacity, genHolder));
+ auto bvsp = std::make_shared<GrowableBitVector>(_bvSize, _bvCapacity, genHolder);
AllocatedBitVector &bv = *bvsp.get();
uint32_t docIdLimit = _bvSize;
(void) docIdLimit;
@@ -329,17 +327,17 @@ PostingStore<DataT>::apply(BitVector &bv,
if (r != re && (a == ae || *r < a->_key)) {
// remove
assert(*r < bv.size());
- bv.slowClearBit(*r);
+ bv.clearBitAndMaintainCount(*r);
++r;
} else {
if (r != re && !(a->_key < *r)) {
// update or add
assert(a->_key < bv.size());
- bv.slowSetBit(a->_key);
+ bv.setBitAndMaintainCount(a->_key);
++r;
} else {
assert(a->_key < bv.size());
- bv.slowSetBit(a->_key);
+ bv.setBitAndMaintainCount(a->_key);
}
++a;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
index 250182f924b..d350b479c66 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp
@@ -60,25 +60,16 @@ SingleBoolAttribute::onCommit() {
for (const auto & change : _changes) {
if (change._type == ChangeBase::UPDATE) {
std::atomic_thread_fence(std::memory_order_release);
- if (change._data == 0) {
- _bv.clearBit(change._doc);
- } else {
- _bv.setBit(change._doc);
- }
+ setBit(change._doc, change._data != 0);
} else if ((change._type >= ChangeBase::ADD) && (change._type <= ChangeBase::DIV)) {
std::atomic_thread_fence(std::memory_order_release);
int8_t val = applyArithmetic(getFast(change._doc), change);
- if (val == 0) {
- _bv.clearBit(change._doc);
- } else {
- _bv.setBit(change._doc);
- }
+ setBit(change._doc, val != 0);
} else if (change._type == ChangeBase::CLEARDOC) {
std::atomic_thread_fence(std::memory_order_release);
- _bv.clearBit(change._doc);
+ _bv.clearBitAndMaintainCount(change._doc);
}
}
- _bv.invalidateCachedCount();
}
std::atomic_thread_fence(std::memory_order_release);
@@ -160,8 +151,7 @@ BitVectorSearchContext::createFilterIterator(fef::TermFieldMatchData * matchData
}
void
-BitVectorSearchContext::fetchPostings(const queryeval::ExecuteInfo &execInfo) {
- (void) execInfo;
+BitVectorSearchContext::fetchPostings(const queryeval::ExecuteInfo &) {
}
std::unique_ptr<queryeval::SearchIterator>
@@ -172,7 +162,9 @@ BitVectorSearchContext::createPostingIterator(fef::TermFieldMatchData *matchData
unsigned int
BitVectorSearchContext::approximateHits() const {
return valid()
- ? (_invert) ? (_bv.size() - _bv.countTrueBits()) : _bv.countTrueBits()
+ ? (_invert)
+ ? (_bv.size() - _bv.countTrueBits())
+ : _bv.countTrueBits()
: 0;
}
@@ -195,6 +187,7 @@ SingleBoolAttribute::onLoad()
uint32_t numDocs = attrReader.getNextData();
_bv.extend(numDocs);
ssize_t bytesRead = attrReader.getReader().read(_bv.getStart(), _bv.sizeBytes());
+ _bv.invalidateCachedCount();
assert(bytesRead == _bv.sizeBytes());
setNumDocs(numDocs);
setCommittedDocIdLimit(numDocs);
diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h
index 20ec0b6d077..78c297d55c3 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h
@@ -91,9 +91,9 @@ public:
const BitVector & getBitVector() const { return _bv; }
void setBit(DocId doc, bool value) {
if (value) {
- _bv.setBit(doc);
+ _bv.setBitAndMaintainCount(doc);
} else {
- _bv.clearBit(doc);
+ _bv.clearBitAndMaintainCount(doc);
}
}
protected:
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h
index 7405688f4f7..ee6364235e3 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.h
+++ b/searchlib/src/vespa/searchlib/common/bitvector.h
@@ -134,17 +134,9 @@ public:
void clearBit(Index idx) {
_words[wordNum(idx)] &= ~ mask(idx);
}
- void flip(Index idx) {
+ void flipBit(Index idx) {
_words[wordNum(idx)] ^= mask(idx);
}
- void slowSetBit(Index idx) {
- if ( ! testBit(idx) ) {
- setBit(idx);
- if ( isValidCount() ) {
- _numTrueBits++;
- }
- }
- }
void andWith(const BitVector &right);
void orWith(const BitVector &right);
@@ -171,12 +163,24 @@ public:
*/
void setInterval(Index start, Index end);
- void slowClearBit(Index idx) {
+ /**
+ * Sets a bit and maintains count of number of bits set.
+ * @param idx
+ */
+ void setBitAndMaintainCount(Index idx) {
+ if ( ! testBit(idx) ) {
+ setBit(idx);
+ incNumBits();
+ }
+ }
+ /**
+ * Clears a bit and maintains count of number of bits set.
+ * @param idx
+ */
+ void clearBitAndMaintainCount(Index idx) {
if (testBit(idx)) {
clearBit(idx);
- if ( isValidCount() ) {
- _numTrueBits--;
- }
+ decNumBits();
}
}
@@ -279,6 +283,16 @@ private:
static size_t numActiveWords(Index start, Index end) { return (numWords(end) - wordNum(start)); }
static Index invalidCount() { return std::numeric_limits<Index>::max(); }
void setGuardBit() { setBit(size()); }
+ void incNumBits() {
+ if ( isValidCount() ) {
+ _numTrueBits++;
+ }
+ }
+ void decNumBits() {
+ if ( isValidCount() ) {
+ _numTrueBits--;
+ }
+ }
VESPA_DLL_LOCAL void repairEnds();
VESPA_DLL_LOCAL static Index internalCount(const Word *tarr, size_t sz);
Index count() const;