diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-05-14 14:15:01 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2019-05-14 14:15:01 +0200 |
commit | a14263cecfae32738910216c92c48125a5032c92 (patch) | |
tree | 4d1546cff2411a039f519d547d35b0c864aca238 /searchlib | |
parent | 81cda304c2c58e381f334a90817dd1ea63a56fe0 (diff) |
Consolidate disk index iterators.
Diffstat (limited to 'searchlib')
7 files changed, 106 insertions, 243 deletions
diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp index f4aae4e2d73..fa49e1ba3af 100644 --- a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -275,7 +275,7 @@ Test::requireThatBlueprintCanCreateSearchIterators() b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); b->fetchPostings(true); s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true); - ASSERT_TRUE(dynamic_cast<Zc4RareWordPosOccIterator<true> *>(s.get()) != NULL); + ASSERT_TRUE((dynamic_cast<ZcRareWordPosOccIterator<true, false> *>(s.get()) != NULL)); } } diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 10bf5ab7dc2..eaab732d970 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -1219,8 +1219,8 @@ TEST("require that children does not optimize when parents refuse them to") { { const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); EXPECT_EQUAL("search::BitVectorIteratorStrictT<false>", e.getChildren()[0]->getClassName()); - EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName()); - EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName()); + EXPECT_EQUAL("search::diskindex::ZcRareWordPosOccIterator<true, false>", e.getChildren()[1]->getClassName()); + EXPECT_EQUAL("search::diskindex::ZcRareWordPosOccIterator<true, false>", e.getChildren()[2]->getClassName()); } md->resolveTermField(12)->tagAsNotNeeded(); @@ -1229,8 +1229,8 @@ TEST("require that children does not optimize when parents refuse them to") { { const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); EXPECT_EQUAL("search::BitVectorIteratorStrictT<false>", e.getChildren()[0]->getClassName()); - EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName()); - EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName()); + EXPECT_EQUAL("search::diskindex::ZcRareWordPosOccIterator<true, false>", e.getChildren()[1]->getClassName()); + EXPECT_EQUAL("search::diskindex::ZcRareWordPosOccIterator<true, false>", e.getChildren()[2]->getClassName()); } } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp index bea92e5c009..3e0a36bcccd 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.cpp @@ -12,53 +12,25 @@ using search::index::PostingListCounts; #define DEBUG_ZCFILTEROCC_PRINTF 0 #define DEBUG_ZCFILTEROCC_ASSERT 0 -template <bool bigEndian> -Zc4RareWordPosOccIterator<bigEndian>:: -Zc4RareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, bool decode_cheap_features, +template <bool bigEndian, bool dynamic_k> +ZcRareWordPosOccIterator<bigEndian, dynamic_k>:: +ZcRareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, bool decode_cheap_features, const PosOccFieldsParams *fieldsParams, const TermFieldMatchDataArray &matchData) - : Zc4RareWordPostingIterator<bigEndian>(matchData, start, docIdLimit, decode_cheap_features), - _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams) -{ - assert(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size())); - _decodeContext = &_decodeContextReal; -} - - -template <bool bigEndian> -Zc4PosOccIterator<bigEndian>:: -Zc4PosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, bool decode_cheap_features, - uint32_t minChunkDocs, const PostingListCounts &counts, - const PosOccFieldsParams *fieldsParams, - const TermFieldMatchDataArray &matchData) - : ZcPostingIterator<bigEndian>(minChunkDocs, false, counts, matchData, start, docIdLimit, decode_cheap_features), + : ZcRareWordPostingIterator<bigEndian, dynamic_k>(matchData, start, docIdLimit, decode_cheap_features), _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams) { assert(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size())); _decodeContext = &_decodeContextReal; } - -template <bool bigEndian> -ZcRareWordPosOccIterator<bigEndian>:: -ZcRareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, bool decode_cheap_features, - const PosOccFieldsParams *fieldsParams, - const TermFieldMatchDataArray &matchData) - : ZcRareWordPostingIterator<bigEndian>(matchData, start, docIdLimit, decode_cheap_features), - _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams) -{ - assert(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size())); - _decodeContext = &_decodeContextReal; -} - - -template <bool bigEndian> -ZcPosOccIterator<bigEndian>:: +template <bool bigEndian, bool dynamic_k> +ZcPosOccIterator<bigEndian, dynamic_k>:: ZcPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, bool decode_cheap_features, uint32_t minChunkDocs, const PostingListCounts &counts, const PosOccFieldsParams *fieldsParams, const TermFieldMatchDataArray &matchData) - : ZcPostingIterator<bigEndian>(minChunkDocs, true, counts, matchData, start, docIdLimit, decode_cheap_features), + : ZcPostingIterator<bigEndian>(minChunkDocs, dynamic_k, counts, matchData, start, docIdLimit, decode_cheap_features), _decodeContextReal(start.getOccurences(), start.getBitOffset(), bitLength, fieldsParams) { assert(!matchData.valid() || (fieldsParams->getNumFields() == matchData.size())); @@ -79,15 +51,15 @@ create_zc_posocc_iterator(const PostingListCounts &counts, bitcompression::Posit assert((num_docs == counts._numDocs) || ((num_docs == posting_params._min_chunk_docs) && (num_docs < counts._numDocs))); if (num_docs < posting_params._min_skip_docs) { if (posting_params._dynamic_k) { - return std::make_unique<ZcRareWordPosOccIterator<bigEndian>>(start, bit_length, posting_params._doc_id_limit, posting_params._encode_cheap_features, &fields_params, match_data); + return std::make_unique<ZcRareWordPosOccIterator<bigEndian, true>>(start, bit_length, posting_params._doc_id_limit, posting_params._encode_cheap_features, &fields_params, match_data); } else { - return std::make_unique<Zc4RareWordPosOccIterator<bigEndian>>(start, bit_length, posting_params._doc_id_limit, posting_params._encode_cheap_features, &fields_params, match_data); + return std::make_unique<ZcRareWordPosOccIterator<bigEndian, false>>(start, bit_length, posting_params._doc_id_limit, posting_params._encode_cheap_features, &fields_params, match_data); } } else { if (posting_params._dynamic_k) { - return std::make_unique<ZcPosOccIterator<bigEndian>>(start, bit_length, posting_params._doc_id_limit, posting_params._encode_cheap_features, posting_params._min_chunk_docs, counts, &fields_params, match_data); + return std::make_unique<ZcPosOccIterator<bigEndian, true>>(start, bit_length, posting_params._doc_id_limit, posting_params._encode_cheap_features, posting_params._min_chunk_docs, counts, &fields_params, match_data); } else { - return std::make_unique<Zc4PosOccIterator<bigEndian>>(start, bit_length, posting_params._doc_id_limit, posting_params._encode_cheap_features, posting_params._min_chunk_docs, counts, &fields_params, match_data); + return std::make_unique<ZcPosOccIterator<bigEndian, false>>(start, bit_length, posting_params._doc_id_limit, posting_params._encode_cheap_features, posting_params._min_chunk_docs, counts, &fields_params, match_data); } } } @@ -102,16 +74,14 @@ create_zc_posocc_iterator(bool bigEndian, const PostingListCounts &counts, bitco } } -template class Zc4RareWordPosOccIterator<true>; -template class Zc4RareWordPosOccIterator<false>; - -template class Zc4PosOccIterator<true>; -template class Zc4PosOccIterator<false>; - -template class ZcRareWordPosOccIterator<true>; -template class ZcRareWordPosOccIterator<false>; +template class ZcRareWordPosOccIterator<false, false>; +template class ZcRareWordPosOccIterator<false, true>; +template class ZcRareWordPosOccIterator<true, false>; +template class ZcRareWordPosOccIterator<true, true>; -template class ZcPosOccIterator<true>; -template class ZcPosOccIterator<false>; +template class ZcPosOccIterator<false, false>; +template class ZcPosOccIterator<false, true>; +template class ZcPosOccIterator<true, false>; +template class ZcPosOccIterator<true, true>; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h index 76e7b384c11..107636df0bf 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcposocciterators.h @@ -9,85 +9,49 @@ namespace search::diskindex { struct Zc4PostingParams; -template <bool bigEndian> -class Zc4RareWordPosOccIterator : public Zc4RareWordPostingIterator<bigEndian> +template <bool bigEndian, bool dynamic_k> +class ZcRareWordPosOccIterator : public ZcRareWordPostingIterator<bigEndian, dynamic_k> { private: - typedef Zc4RareWordPostingIterator<bigEndian> ParentClass; + using ParentClass = ZcRareWordPostingIterator<bigEndian, dynamic_k>; using ParentClass::_decodeContext; - typedef bitcompression::EG2PosOccDecodeContextCooked<bigEndian> DecodeContextReal; + using DecodeContextReal = std::conditional_t<dynamic_k, bitcompression::EGPosOccDecodeContextCooked<bigEndian>, bitcompression::EG2PosOccDecodeContextCooked<bigEndian>>; DecodeContextReal _decodeContextReal; public: - Zc4RareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, bool decode_cheap_features, + ZcRareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, bool decode_cheap_features, const bitcompression::PosOccFieldsParams *fieldsParams, const fef::TermFieldMatchDataArray &matchData); }; -template <bool bigEndian> -class Zc4PosOccIterator : public ZcPostingIterator<bigEndian> +template <bool bigEndian, bool dynamic_k> +class ZcPosOccIterator : public ZcPostingIterator<bigEndian> { private: typedef ZcPostingIterator<bigEndian> ParentClass; using ParentClass::_decodeContext; - typedef bitcompression::EG2PosOccDecodeContextCooked<bigEndian> DecodeContext; + using DecodeContext = std::conditional_t<dynamic_k, bitcompression::EGPosOccDecodeContextCooked<bigEndian>, bitcompression::EG2PosOccDecodeContextCooked<bigEndian>>; DecodeContext _decodeContextReal; public: - Zc4PosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, bool decode_cheap_features, + ZcPosOccIterator(Position start, uint64_t bitLength, uint32_t docIdLimit, bool decode_cheap_features, uint32_t minChunkDocs, const index::PostingListCounts &counts, const bitcompression::PosOccFieldsParams *fieldsParams, const fef::TermFieldMatchDataArray &matchData); }; - -template <bool bigEndian> -class ZcRareWordPosOccIterator : public ZcRareWordPostingIterator<bigEndian> -{ -private: - typedef ZcRareWordPostingIterator<bigEndian> ParentClass; - using ParentClass::_decodeContext; - - typedef bitcompression::EGPosOccDecodeContextCooked<bigEndian> DecodeContextReal; - DecodeContextReal _decodeContextReal; -public: - ZcRareWordPosOccIterator(Position start, uint64_t bitLength, uint32_t docidLimit, bool decode_cheap_features, - const bitcompression::PosOccFieldsParams *fieldsParams, - const fef::TermFieldMatchDataArray &matchData); -}; - - -template <bool bigEndian> -class ZcPosOccIterator : public ZcPostingIterator<bigEndian> -{ -private: - typedef ZcPostingIterator<bigEndian> ParentClass; - using ParentClass::_decodeContext; - - typedef bitcompression::EGPosOccDecodeContextCooked<bigEndian> DecodeContext; - DecodeContext _decodeContextReal; -public: - ZcPosOccIterator(Position start, uint64_t bitLength, uint32_t docidLimit, bool decode_cheap_features, - uint32_t minChunkDocs, const index::PostingListCounts &counts, - const bitcompression::PosOccFieldsParams *fieldsParams, - const fef::TermFieldMatchDataArray &matchData); -}; - - std::unique_ptr<search::queryeval::SearchIterator> create_zc_posocc_iterator(bool bigEndian, const index::PostingListCounts &counts, bitcompression::Position start, uint64_t bit_length, const Zc4PostingParams &posting_params, const bitcompression::PosOccFieldsParams &fields_params, const fef::TermFieldMatchDataArray &match_data); -extern template class Zc4RareWordPosOccIterator<true>; -extern template class Zc4RareWordPosOccIterator<false>; - -extern template class Zc4PosOccIterator<true>; -extern template class Zc4PosOccIterator<false>; - -extern template class ZcRareWordPosOccIterator<true>; -extern template class ZcRareWordPosOccIterator<false>; +extern template class ZcRareWordPosOccIterator<false, false>; +extern template class ZcRareWordPosOccIterator<false, true>; +extern template class ZcRareWordPosOccIterator<true, false>; +extern template class ZcRareWordPosOccIterator<true, true>; -extern template class ZcPosOccIterator<true>; -extern template class ZcPosOccIterator<false>; +extern template class ZcPosOccIterator<false, false>; +extern template class ZcPosOccIterator<false, true>; +extern template class ZcPosOccIterator<true, false>; +extern template class ZcPosOccIterator<true, true>; } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp index 8ea576b06b0..a9098b89e16 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp @@ -34,8 +34,8 @@ ZcIteratorBase::initRange(uint32_t beginid, uint32_t endid) template <bool bigEndian> -Zc4RareWordPostingIterator<bigEndian>:: -Zc4RareWordPostingIterator(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit, bool decode_cheap_features) +ZcRareWordPostingIteratorBase<bigEndian>:: +ZcRareWordPostingIteratorBase(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit, bool decode_cheap_features) : ZcIteratorBase(matchData, start, docIdLimit), _decodeContext(nullptr), _residue(0), @@ -47,9 +47,17 @@ Zc4RareWordPostingIterator(const TermFieldMatchDataArray &matchData, Position st { } -template <bool bigEndian> +template <bool bigEndian, bool dynamic_k> +ZcRareWordPostingIterator<bigEndian, dynamic_k>:: +ZcRareWordPostingIterator(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit, bool decode_cheap_features) + : ZcRareWordPostingIteratorBase<bigEndian>(matchData, start, docIdLimit, decode_cheap_features), + _doc_id_k_param() +{ +} + +template <bool bigEndian, bool dynamic_k> void -Zc4RareWordPostingIterator<bigEndian>::doSeek(uint32_t docId) +ZcRareWordPostingIterator<bigEndian, dynamic_k>::doSeek(uint32_t docId) { typedef FeatureEncodeContext<bigEndian> EC; uint32_t length; @@ -63,7 +71,7 @@ Zc4RareWordPostingIterator<bigEndian>::doSeek(uint32_t docId) if (__builtin_expect(--_residue == 0, false)) { goto atbreak; } - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DELTA_DOCID, EC); + UC64_DECODEEXPGOLOMB_NS(o, _doc_id_k_param.get_doc_id_k(), EC); oDocId += 1 + static_cast<uint32_t>(val64); #if DEBUG_ZCPOSTING_PRINTF printf("Decode docId=%d\n", @@ -83,7 +91,7 @@ Zc4RareWordPostingIterator<bigEndian>::doSeek(uint32_t docId) if (__builtin_expect(--_residue == 0, false)) { goto atbreak; } - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DELTA_DOCID, EC); + UC64_DECODEEXPGOLOMB_NS(o, _doc_id_k_param.get_doc_id_k(), EC); oDocId += 1 + static_cast<uint32_t>(val64); #if DEBUG_ZCPOSTING_PRINTF printf("Decode docId=%d\n", @@ -107,7 +115,7 @@ Zc4RareWordPostingIterator<bigEndian>::doSeek(uint32_t docId) template <bool bigEndian> void -Zc4RareWordPostingIterator<bigEndian>::doUnpack(uint32_t docId) +ZcRareWordPostingIteratorBase<bigEndian>::doUnpack(uint32_t docId) { if (!_matchData.valid() || getUnpacked()) { return; @@ -118,14 +126,14 @@ Zc4RareWordPostingIterator<bigEndian>::doUnpack(uint32_t docId) } template <bool bigEndian> -void Zc4RareWordPostingIterator<bigEndian>::rewind(Position start) +void ZcRareWordPostingIteratorBase<bigEndian>::rewind(Position start) { _decodeContext->setPosition(start); } -template <bool bigEndian> +template <bool bigEndian, bool dynamic_k> void -Zc4RareWordPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit) +ZcRareWordPostingIterator<bigEndian, dynamic_k>::readWordStart(uint32_t docIdLimit) { (void) docIdLimit; typedef FeatureEncodeContext<bigEndian> EC; @@ -136,102 +144,8 @@ Zc4RareWordPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit) UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); _numDocs = static_cast<uint32_t>(val64) + 1; - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DELTA_DOCID, EC); - uint32_t docId = static_cast<uint32_t>(val64) + 1; - if (_decode_cheap_features) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FIELD_LENGTH, EC); - _field_length = static_cast<uint32_t>(val64) + 1; - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUM_OCCS, EC); - _num_occs = static_cast<uint32_t>(val64) + 1; - } - UC64_DECODECONTEXT_STORE(o, _decodeContext->_); - - setDocId(docId); - _residue = _numDocs; - clearUnpacked(); -} - - -template <bool bigEndian> -ZcRareWordPostingIterator<bigEndian>:: -ZcRareWordPostingIterator(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit, bool decode_cheap_features) - : Zc4RareWordPostingIterator<bigEndian>(matchData, start, docIdLimit, decode_cheap_features), - _docIdK(0) -{ -} - - -template <bool bigEndian> -void -ZcRareWordPostingIterator<bigEndian>::doSeek(uint32_t docId) -{ - typedef FeatureEncodeContext<bigEndian> EC; - uint32_t length; - uint64_t val64; - - uint32_t oDocId = getDocId(); - - UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_); - if (getUnpacked()) { - clearUnpacked(); - if (__builtin_expect(--_residue == 0, false)) { - goto atbreak; - } - UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC); - oDocId += 1 + static_cast<uint32_t>(val64); -#if DEBUG_ZCPOSTING_PRINTF - printf("Decode docId=%d\n", - oDocId); -#endif - if (_decode_cheap_features) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FIELD_LENGTH, EC); - _field_length = static_cast<uint32_t>(val64) + 1; - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUM_OCCS, EC); - _num_occs = static_cast<uint32_t>(val64) + 1; - } - } - while (__builtin_expect(oDocId < docId, true)) { - UC64_DECODECONTEXT_STORE(o, _decodeContext->_); - _decodeContext->skipFeatures(1); - UC64_DECODECONTEXT_LOAD(o, _decodeContext->_); - if (__builtin_expect(--_residue == 0, false)) { - goto atbreak; - } - UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC); - oDocId += 1 + static_cast<uint32_t>(val64); -#if DEBUG_ZCPOSTING_PRINTF - printf("Decode docId=%d\n", - oDocId); -#endif - if (_decode_cheap_features) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FIELD_LENGTH, EC); - _field_length = static_cast<uint32_t>(val64) + 1; - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUM_OCCS, EC); - _num_occs = static_cast<uint32_t>(val64) + 1; - } - } - UC64_DECODECONTEXT_STORE(o, _decodeContext->_); - setDocId(oDocId); - return; - atbreak: - setAtEnd(); // Mark end of data - return; -} - - -template <bool bigEndian> -void -ZcRareWordPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit) -{ - typedef FeatureEncodeContext<bigEndian> EC; - UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_); - uint32_t length; - uint64_t val64; - - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); - _numDocs = static_cast<uint32_t>(val64) + 1; - _docIdK = EC::calcDocIdK(_numDocs, docIdLimit); - UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC); + _doc_id_k_param.setup(_numDocs, docIdLimit); + UC64_DECODEEXPGOLOMB_NS(o, _doc_id_k_param.get_doc_id_k(), EC); uint32_t docId = static_cast<uint32_t>(val64) + 1; if (_decode_cheap_features) { UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FIELD_LENGTH, EC); @@ -652,14 +566,12 @@ void ZcPostingIterator<bigEndian>::rewind(Position start) _chunkNo = 0; } - -template class Zc4RareWordPostingIterator<true>; -template class Zc4RareWordPostingIterator<false>; +template class ZcRareWordPostingIterator<false, false>; +template class ZcRareWordPostingIterator<false, true>; +template class ZcRareWordPostingIterator<true, false>; +template class ZcRareWordPostingIterator<true, true>; template class ZcPostingIterator<true>; template class ZcPostingIterator<false>; -template class ZcRareWordPostingIterator<true>; -template class ZcRareWordPostingIterator<false>; - } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h index 222fb404a7d..584de2786f0 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h @@ -56,7 +56,7 @@ private: }; template <bool bigEndian> -class Zc4RareWordPostingIterator : public ZcIteratorBase +class ZcRareWordPostingIteratorBase : public ZcIteratorBase { private: typedef ZcIteratorBase ParentClass; @@ -72,19 +72,41 @@ public: uint32_t _field_length; uint32_t _num_occs; - Zc4RareWordPostingIterator(const fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit, bool decode_cheap_features); + ZcRareWordPostingIteratorBase(const fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit, bool decode_cheap_features); void doUnpack(uint32_t docId) override; - void doSeek(uint32_t docId) override; - void readWordStart(uint32_t docIdLimit) override; void rewind(Position start) override; }; -template <bool bigEndian> -class ZcRareWordPostingIterator : public Zc4RareWordPostingIterator<bigEndian> +template <bool dynamic_k> class ZcPostingDocIdKParam; + +template <> +class ZcPostingDocIdKParam<false> { -private: - typedef Zc4RareWordPostingIterator<bigEndian> ParentClass; +public: + ZcPostingDocIdKParam() { } + constexpr static uint32_t get_doc_id_k() { return K_VALUE_ZCPOSTING_DELTA_DOCID; } + void setup(uint32_t, uint32_t) { } +}; + +template <> +class ZcPostingDocIdKParam<true> +{ + uint32_t _doc_id_k; +public: + ZcPostingDocIdKParam() : _doc_id_k(0) { } + uint32_t get_doc_id_k() const { return _doc_id_k; } + void setup(uint32_t num_docs, uint32_t doc_id_limit) { + using EC = bitcompression::FeatureEncodeContext<true>; + _doc_id_k = EC::calcDocIdK(num_docs, doc_id_limit); + } +}; + + +template <bool bigEndian, bool dynamic_k> +class ZcRareWordPostingIterator : public ZcRareWordPostingIteratorBase<bigEndian> +{ + using ParentClass = ZcRareWordPostingIteratorBase<bigEndian>; using ParentClass::getDocId; using ParentClass::getUnpacked; using ParentClass::clearUnpacked; @@ -95,18 +117,14 @@ private: using ParentClass::_decode_cheap_features; using ParentClass::_field_length; using ParentClass::_num_occs; - - uint32_t _docIdK; - + ZcPostingDocIdKParam<dynamic_k> _doc_id_k_param; public: using ParentClass::_decodeContext; - ZcRareWordPostingIterator(const search::fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit, bool decode_cheap_features); - + ZcRareWordPostingIterator(const fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit, bool decode_cheap_features); void doSeek(uint32_t docId) override; void readWordStart(uint32_t docIdLimit) override; }; - class ZcPostingIteratorBase : public ZcIteratorBase { protected: @@ -308,13 +326,12 @@ public: }; -extern template class Zc4RareWordPostingIterator<true>; -extern template class Zc4RareWordPostingIterator<false>; +extern template class ZcRareWordPostingIterator<false, false>; +extern template class ZcRareWordPostingIterator<false, true>; +extern template class ZcRareWordPostingIterator<true, false>; +extern template class ZcRareWordPostingIterator<true, true>; extern template class ZcPostingIterator<true>; extern template class ZcPostingIterator<false>; -extern template class ZcRareWordPostingIterator<true>; -extern template class ZcRareWordPostingIterator<false>; - } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp index 0f914e2e3b1..154a05ebebe 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp @@ -1144,8 +1144,8 @@ SearchIterator * FakeEGCompr64PosOcc<bigEndian>:: createIterator(const TermFieldMatchDataArray &matchData) const { - return new ZcRareWordPosOccIterator<bigEndian>(Position(_compressed.first, 0), - _compressedBits, _posting_params._doc_id_limit, false, &_fieldsParams, matchData); + return new ZcRareWordPosOccIterator<bigEndian, true>(Position(_compressed.first, 0), + _compressedBits, _posting_params._doc_id_limit, false, &_fieldsParams, matchData); } @@ -1197,8 +1197,8 @@ SearchIterator * FakeEG2Compr64PosOcc<bigEndian>:: createIterator(const TermFieldMatchDataArray &matchData) const { - return new Zc4RareWordPosOccIterator<bigEndian>(Position(_compressed.first, 0), - _compressedBits, _posting_params._doc_id_limit, false, &_fieldsParams, matchData); + return new ZcRareWordPosOccIterator<bigEndian, false>(Position(_compressed.first, 0), + _compressedBits, _posting_params._doc_id_limit, false, &_fieldsParams, matchData); } @@ -1255,7 +1255,7 @@ SearchIterator * FakeZcSkipPosOcc<bigEndian>:: createIterator(const TermFieldMatchDataArray &matchData) const { - return new ZcPosOccIterator<bigEndian>(Position(_compressed.first, 0), _compressedBits, _posting_params._doc_id_limit, false, + return new ZcPosOccIterator<bigEndian, true>(Position(_compressed.first, 0), _compressedBits, _posting_params._doc_id_limit, false, static_cast<uint32_t>(-1), _counts, &_fieldsParams, |