diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2020-01-22 06:12:19 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2020-01-22 06:12:19 +0000 |
commit | 73aa08d82d68b47f46cff025f2ae668980d649f6 (patch) | |
tree | 99d5dd713be02af25cad50def01d3624625379ad /searchlib | |
parent | f936993576920fa1e729ef79b17bdc7d917e73df (diff) |
Maintain the cached bitCount to avoid cost query time.
Diffstat (limited to 'searchlib')
9 files changed, 72 insertions, 75 deletions
diff --git a/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp index ed681d9021b..a9df188d417 100644 --- a/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp +++ b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp @@ -51,10 +51,10 @@ void BitVectorBenchmark::init(size_t n) BitVector *b(BitVector::create(n).release()); srand(1); for(size_t i(0), j(0); i < n; i += rand()%10, j++) { - a->flip(i); + a->flipBit(i); } for(size_t i(0), j(0); i < n; i += rand()%10, j++) { - b->flip(i); + b->flipBit(i); } a->invalidateCachedCount(); b->invalidateCachedCount(); diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp index 4cbe96c74b5..d720b105671 100644 --- a/searchlib/src/tests/common/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp @@ -268,21 +268,21 @@ TEST("requireThatSequentialOperationsOnPartialWorks") p1.invalidateCachedCount(); EXPECT_TRUE(p1.hasTrueBits()); EXPECT_EQUAL(1u, p1.countTrueBits()); - p1.slowSetBit(718); - p1.slowSetBit(739); - p1.slowSetBit(871); - p1.slowSetBit(903); + p1.setBitAndMaintainCount(718); + p1.setBitAndMaintainCount(739); + p1.setBitAndMaintainCount(871); + p1.setBitAndMaintainCount(903); EXPECT_EQUAL(5u, p1.countTrueBits()); EXPECT_TRUE(assertBV("[718,719,739,871,903]", p1)); PartialBitVector p2(717,919); EXPECT_FALSE(p1 == p2); - p2.slowSetBit(719); - p2.slowSetBit(718); - p2.slowSetBit(739); - p2.slowSetBit(871); + p2.setBitAndMaintainCount(719); + p2.setBitAndMaintainCount(718); + p2.setBitAndMaintainCount(739); + p2.setBitAndMaintainCount(871); EXPECT_FALSE(p1 == p2); - p2.slowSetBit(903); + p2.setBitAndMaintainCount(903); EXPECT_TRUE(p1 == p2); AllocatedBitVector full(1000); @@ -422,10 +422,10 @@ TEST("requireThatSetWorks") EXPECT_EQUAL(4u, v1.countTrueBits()); EXPECT_TRUE(assertBV("[7,39,80,103]", v1)); - v1.slowSetBit(39); + v1.setBitAndMaintainCount(39); EXPECT_EQUAL(4u, v1.countTrueBits()); EXPECT_TRUE(assertBV("[7,39,80,103]", v1)); - v1.slowSetBit(57); + v1.setBitAndMaintainCount(57); EXPECT_EQUAL(5u, v1.countTrueBits()); EXPECT_TRUE(assertBV("[7,39,57,80,103]", v1)); } diff --git a/searchlib/src/tests/docstore/document_store_visitor/document_store_visitor_test.cpp b/searchlib/src/tests/docstore/document_store_visitor/document_store_visitor_test.cpp index 0a3c3788c98..cc31fcec4d4 100644 --- a/searchlib/src/tests/docstore/document_store_visitor/document_store_visitor_test.cpp +++ b/searchlib/src/tests/docstore/document_store_visitor/document_store_visitor_test.cpp @@ -127,7 +127,7 @@ MyVisitor::visit(uint32_t lid, const std::shared_ptr<Document> &doc) assert(lid < _docIdLimit); Document::UP expDoc(makeDoc(_repo, lid, _before)); EXPECT_TRUE(*expDoc == *doc); - _valid->slowSetBit(lid); + _valid->setBitAndMaintainCount(lid); } @@ -136,7 +136,7 @@ MyVisitor::visit(uint32_t lid) { ++_visitRmCount; assert(lid < _docIdLimit); - _valid->slowClearBit(lid); + _valid->clearBitAndMaintainCount(lid); } @@ -158,7 +158,7 @@ MyRewriteVisitor::visit(uint32_t lid, const std::shared_ptr<Document> &doc) assert(lid < _docIdLimit); Document::UP expDoc(makeDoc(_repo, lid, _before)); EXPECT_TRUE(*expDoc == *doc); - _valid->slowSetBit(lid); + _valid->setBitAndMaintainCount(lid); doc->set("extra", "foo"); } @@ -297,7 +297,7 @@ Fixture::put(const Document &doc, uint32_t lid) ++_syncToken; assert(lid < _docIdLimit); _store->write(_syncToken, lid, doc); - _valid->slowSetBit(lid); + _valid->setBitAndMaintainCount(lid); } @@ -307,7 +307,7 @@ Fixture::remove(uint32_t lid) ++_syncToken; assert(lid < _docIdLimit); _store->remove(_syncToken, lid); - _valid->slowClearBit(lid); + _valid->clearBitAndMaintainCount(lid); } diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp index a38cbe60e56..1e4bba95b4b 100644 --- a/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.cpp @@ -24,8 +24,7 @@ class SaveBits FA &_fa; public: - SaveBits(vespalib::ConstArrayRef<T> map, - FA &fa) + SaveBits(vespalib::ConstArrayRef<T> map, FA &fa) : _map(map), _fa(fa) { @@ -55,11 +54,9 @@ FlagAttributeT<B>::FlagAttributeT(const vespalib::string & baseFileName, const A template <typename B> AttributeVector::SearchContext::UP -FlagAttributeT<B>::getSearch(QueryTermSimple::UP qTerm, - const attribute::SearchContextParams & params) const +FlagAttributeT<B>::getSearch(QueryTermSimple::UP qTerm, const attribute::SearchContextParams &) const { - (void) params; - return AttributeVector::SearchContext::UP (new SearchContext(std::move(qTerm), *this)); + return std::make_unique<SearchContext>(std::move(qTerm), *this); } template <typename B> @@ -69,7 +66,7 @@ void FlagAttributeT<B>::clearOldValues(DocId doc) for (uint32_t i(0), m(this->get(doc, values)); i < m; i++) { BitVector * bv = _bitVectors[getOffset(values[i].value())]; if (bv != nullptr) { - bv->clearBit(doc); + bv->clearBitAndMaintainCount(doc); } } } @@ -130,10 +127,9 @@ void FlagAttributeT<B>::setNewValues(DocId doc, const std::vector<typename B::WT _bitVectorStore[offset] = BitVector::create(_bitVectorSize); _bitVectors[offset] = _bitVectorStore[offset].get(); bv = _bitVectors[offset]; - bv->invalidateCachedCount(); ensureGuardBit(*bv); } - bv->setBit(doc); + bv->setBitAndMaintainCount(doc); } } @@ -145,13 +141,12 @@ FlagAttributeT<B>::setNewBVValue(DocId doc, typename B::WType::ValueType value) BitVector * bv = _bitVectors[offset]; if (bv == nullptr) { assert(_bitVectorSize >= this->getNumDocs()); - _bitVectorStore[offset] = BitVector::create(_bitVectorSize); + _bitVectorStore[offset] = BitVector::create(_bitVectorSize); _bitVectors[offset] = _bitVectorStore[offset].get(); bv = _bitVectors[offset]; - bv->invalidateCachedCount(); ensureGuardBit(*bv); } - bv->setBit(doc); + bv->setBitAndMaintainCount(doc); } @@ -193,8 +188,7 @@ template <typename B> void FlagAttributeT<B>::ensureGuardBit() { - for (uint32_t i = 0; i < _bitVectors.size(); ++i) { - BitVector * bv = _bitVectors[i]; + for (BitVector * bv : _bitVectors) { if (bv != nullptr) { ensureGuardBit(*bv); } @@ -205,8 +199,7 @@ template <typename B> void FlagAttributeT<B>::clearGuardBit(DocId doc) { - for (uint32_t i = 0; i < _bitVectors.size(); ++i) { - BitVector * bv = _bitVectors[i]; + for (BitVector * bv : _bitVectors) { if (bv != nullptr) { bv->clearBit(doc); // clear guard bit and start using this doc id } @@ -219,8 +212,7 @@ FlagAttributeT<B>::resizeBitVectors(uint32_t neededSize) { const GrowStrategy & gs = this->getConfig().getGrowStrategy(); uint32_t newSize = neededSize + (neededSize * gs.getDocsGrowFactor()) + gs.getDocsGrowDelta(); - for (uint32_t i = 0; i < _bitVectors.size(); ++i) { - BitVector * bv = _bitVectors[i]; + for (BitVector * bv : _bitVectors) { if (bv != nullptr) { vespalib::GenerationHeldBase::UP hold(bv->grow(newSize)); ensureGuardBit(*bv); diff --git a/searchlib/src/vespa/searchlib/attribute/flagattribute.h b/searchlib/src/vespa/searchlib/attribute/flagattribute.h index 742d0cd8592..d3ce5f9af46 100644 --- a/searchlib/src/vespa/searchlib/attribute/flagattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/flagattribute.h @@ -52,10 +52,10 @@ private: return _bitVectors[value + 128]; } - vespalib::GenerationHolder _bitVectorHolder; + vespalib::GenerationHolder _bitVectorHolder; std::vector<std::shared_ptr<BitVector> > _bitVectorStore; std::vector<BitVector *> _bitVectors; - uint32_t _bitVectorSize; + uint32_t _bitVectorSize; template <class SC> friend class FlagAttributeIteratorT; template <class SC> friend class FlagAttributeIteratorStrict; }; diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp index 9c736a16069..2badb199934 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp @@ -246,9 +246,8 @@ PostingStore<DataT>::makeBitVector(EntryRef &ref) uint32_t typeId = getTypeId(iRef); assert(isBTree(typeId)); (void) typeId; - std::shared_ptr<GrowableBitVector> bvsp; vespalib::GenerationHolder &genHolder = _store.getGenerationHolder(); - bvsp.reset(new GrowableBitVector(_bvSize, _bvCapacity, genHolder)); + auto bvsp = std::make_shared<GrowableBitVector>(_bvSize, _bvCapacity, genHolder); AllocatedBitVector &bv = *bvsp.get(); uint32_t docIdLimit = _bvSize; (void) docIdLimit; @@ -288,9 +287,8 @@ PostingStore<DataT>::applyNewBitVector(EntryRef &ref, { assert(!ref.valid()); RefType iRef(ref); - std::shared_ptr<GrowableBitVector> bvsp; vespalib::GenerationHolder &genHolder = _store.getGenerationHolder(); - bvsp.reset(new GrowableBitVector(_bvSize, _bvCapacity, genHolder)); + auto bvsp = std::make_shared<GrowableBitVector>(_bvSize, _bvCapacity, genHolder); AllocatedBitVector &bv = *bvsp.get(); uint32_t docIdLimit = _bvSize; (void) docIdLimit; @@ -329,17 +327,17 @@ PostingStore<DataT>::apply(BitVector &bv, if (r != re && (a == ae || *r < a->_key)) { // remove assert(*r < bv.size()); - bv.slowClearBit(*r); + bv.clearBitAndMaintainCount(*r); ++r; } else { if (r != re && !(a->_key < *r)) { // update or add assert(a->_key < bv.size()); - bv.slowSetBit(a->_key); + bv.setBitAndMaintainCount(a->_key); ++r; } else { assert(a->_key < bv.size()); - bv.slowSetBit(a->_key); + bv.setBitAndMaintainCount(a->_key); } ++a; } diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp index 250182f924b..d350b479c66 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.cpp @@ -60,25 +60,16 @@ SingleBoolAttribute::onCommit() { for (const auto & change : _changes) { if (change._type == ChangeBase::UPDATE) { std::atomic_thread_fence(std::memory_order_release); - if (change._data == 0) { - _bv.clearBit(change._doc); - } else { - _bv.setBit(change._doc); - } + setBit(change._doc, change._data != 0); } else if ((change._type >= ChangeBase::ADD) && (change._type <= ChangeBase::DIV)) { std::atomic_thread_fence(std::memory_order_release); int8_t val = applyArithmetic(getFast(change._doc), change); - if (val == 0) { - _bv.clearBit(change._doc); - } else { - _bv.setBit(change._doc); - } + setBit(change._doc, val != 0); } else if (change._type == ChangeBase::CLEARDOC) { std::atomic_thread_fence(std::memory_order_release); - _bv.clearBit(change._doc); + _bv.clearBitAndMaintainCount(change._doc); } } - _bv.invalidateCachedCount(); } std::atomic_thread_fence(std::memory_order_release); @@ -160,8 +151,7 @@ BitVectorSearchContext::createFilterIterator(fef::TermFieldMatchData * matchData } void -BitVectorSearchContext::fetchPostings(const queryeval::ExecuteInfo &execInfo) { - (void) execInfo; +BitVectorSearchContext::fetchPostings(const queryeval::ExecuteInfo &) { } std::unique_ptr<queryeval::SearchIterator> @@ -172,7 +162,9 @@ BitVectorSearchContext::createPostingIterator(fef::TermFieldMatchData *matchData unsigned int BitVectorSearchContext::approximateHits() const { return valid() - ? (_invert) ? (_bv.size() - _bv.countTrueBits()) : _bv.countTrueBits() + ? (_invert) + ? (_bv.size() - _bv.countTrueBits()) + : _bv.countTrueBits() : 0; } @@ -195,6 +187,7 @@ SingleBoolAttribute::onLoad() uint32_t numDocs = attrReader.getNextData(); _bv.extend(numDocs); ssize_t bytesRead = attrReader.getReader().read(_bv.getStart(), _bv.sizeBytes()); + _bv.invalidateCachedCount(); assert(bytesRead == _bv.sizeBytes()); setNumDocs(numDocs); setCommittedDocIdLimit(numDocs); diff --git a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h index 20ec0b6d077..78c297d55c3 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singleboolattribute.h @@ -91,9 +91,9 @@ public: const BitVector & getBitVector() const { return _bv; } void setBit(DocId doc, bool value) { if (value) { - _bv.setBit(doc); + _bv.setBitAndMaintainCount(doc); } else { - _bv.clearBit(doc); + _bv.clearBitAndMaintainCount(doc); } } protected: diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h index 7405688f4f7..ee6364235e3 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.h +++ b/searchlib/src/vespa/searchlib/common/bitvector.h @@ -134,17 +134,9 @@ public: void clearBit(Index idx) { _words[wordNum(idx)] &= ~ mask(idx); } - void flip(Index idx) { + void flipBit(Index idx) { _words[wordNum(idx)] ^= mask(idx); } - void slowSetBit(Index idx) { - if ( ! testBit(idx) ) { - setBit(idx); - if ( isValidCount() ) { - _numTrueBits++; - } - } - } void andWith(const BitVector &right); void orWith(const BitVector &right); @@ -171,12 +163,24 @@ public: */ void setInterval(Index start, Index end); - void slowClearBit(Index idx) { + /** + * Sets a bit and maintains count of number of bits set. + * @param idx + */ + void setBitAndMaintainCount(Index idx) { + if ( ! testBit(idx) ) { + setBit(idx); + incNumBits(); + } + } + /** + * Clears a bit and maintains count of number of bits set. + * @param idx + */ + void clearBitAndMaintainCount(Index idx) { if (testBit(idx)) { clearBit(idx); - if ( isValidCount() ) { - _numTrueBits--; - } + decNumBits(); } } @@ -279,6 +283,16 @@ private: static size_t numActiveWords(Index start, Index end) { return (numWords(end) - wordNum(start)); } static Index invalidCount() { return std::numeric_limits<Index>::max(); } void setGuardBit() { setBit(size()); } + void incNumBits() { + if ( isValidCount() ) { + _numTrueBits++; + } + } + void decNumBits() { + if ( isValidCount() ) { + _numTrueBits--; + } + } VESPA_DLL_LOCAL void repairEnds(); VESPA_DLL_LOCAL static Index internalCount(const Word *tarr, size_t sz); Index count() const; |