diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2019-05-15 12:08:29 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2019-05-15 12:08:29 +0200 |
commit | 345afc7be5ca149d8e28a4eaddf16ef765e57fa6 (patch) | |
tree | f895820f792e2e7be700a61a43cec8cc91b7609c /searchlib | |
parent | 20f644c175b73230373cec00f8c80ffbf22193e6 (diff) |
Unpack cheap features in zc posting iterators.
Diffstat (limited to 'searchlib')
11 files changed, 60 insertions, 19 deletions
diff --git a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp index 6d4f483fecc..421beb10386 100644 --- a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp +++ b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp @@ -458,6 +458,7 @@ void randReadField(FakeWordSet &wordSet, const std::string &namepref, bool dynamicK, + bool decode_cheap_features, bool verbose) { const char *dynamicKStr = dynamicK ? "true" : "false"; @@ -469,9 +470,10 @@ randReadField(FakeWordSet &wordSet, LOG(info, "enter randReadField," - " namepref=%s, dynamicK=%s", + " namepref=%s, dynamicK=%s, decode_cheap_features=%s", namepref.c_str(), - dynamicKStr); + dynamicKStr, + bool_to_str(decode_cheap_features)); tv.SetNow(); before = tv.Secs(); @@ -533,12 +535,12 @@ randReadField(FakeWordSet &wordSet, sb(handle.createIterator(counts, tfmda)); // LOG(info, "loop=%d, wordNum=%u", loop, wordNum); - word->validate(sb.get(), tfmda, verbose); - word->validate(sb.get(), tfmda, 19, verbose); - word->validate(sb.get(), tfmda, 99, verbose); - word->validate(sb.get(), tfmda, 799, verbose); - word->validate(sb.get(), tfmda, 6399, verbose); - word->validate(sb.get(), tfmda, 11999, verbose); + word->validate(sb.get(), tfmda, decode_cheap_features, verbose); + word->validate(sb.get(), tfmda, 19, decode_cheap_features, verbose); + word->validate(sb.get(), tfmda, 99, decode_cheap_features, verbose); + word->validate(sb.get(), tfmda, 799, decode_cheap_features, verbose); + word->validate(sb.get(), tfmda, 6399, decode_cheap_features, verbose); + word->validate(sb.get(), tfmda, 11999, decode_cheap_features, verbose); ++wordNum; } } @@ -552,10 +554,11 @@ randReadField(FakeWordSet &wordSet, after = tv.Secs(); LOG(info, "leave randReadField, namepref=%s," - " dynamicK=%s, " + " dynamicK=%s, decode_cheap_features=%s, " "elapsed=%10.6f", namepref.c_str(), dynamicKStr, + bool_to_str(decode_cheap_features), after - before); } @@ -634,7 +637,7 @@ testFieldWriterVariant(FakeWordSet &wordSet, uint32_t doc_id_limit, { writeField(wordSet, doc_id_limit, file_name_prefix, dynamic_k, encode_cheap_features); readField(wordSet, doc_id_limit, file_name_prefix, dynamic_k, encode_cheap_features, verbose); - randReadField(wordSet, file_name_prefix, dynamic_k, verbose); + randReadField(wordSet, file_name_prefix, dynamic_k, encode_cheap_features, verbose); fusionField(wordSet.getNumWords(), doc_id_limit, file_name_prefix, file_name_prefix + "x", diff --git a/searchlib/src/tests/postinglistbm/posting_list_test.cpp b/searchlib/src/tests/postinglistbm/posting_list_test.cpp index dad21ada469..c4e375e8c7d 100644 --- a/searchlib/src/tests/postinglistbm/posting_list_test.cpp +++ b/searchlib/src/tests/postinglistbm/posting_list_test.cpp @@ -27,7 +27,7 @@ validate_posting_list_for_word(const FakePosting& posting, const FakeWord& word) std::unique_ptr<SearchIterator> iterator(posting.createIterator(tfmda)); if (posting.hasWordPositions()) { - word.validate(iterator.get(), tfmda, false); + word.validate(iterator.get(), tfmda, posting.has_cheap_features(), false); } else { word.validate(iterator.get(), false); } diff --git a/searchlib/src/tests/postinglistbm/stress_runner.cpp b/searchlib/src/tests/postinglistbm/stress_runner.cpp index 98ace5e00a1..f8829d4eab6 100644 --- a/searchlib/src/tests/postinglistbm/stress_runner.cpp +++ b/searchlib/src/tests/postinglistbm/stress_runner.cpp @@ -217,9 +217,9 @@ makeSomePostings(FPFactory *postingFactory, std::unique_ptr<SearchIterator> iterator(posting->createIterator(tfmda)); if (posting->hasWordPositions()) { if (stride != 0) { - word->validate(iterator.get(), tfmda, stride, verbose); + word->validate(iterator.get(), tfmda, stride, posting->has_cheap_features(), verbose); } else { - word->validate(iterator.get(), tfmda, verbose); + word->validate(iterator.get(), tfmda, posting->has_cheap_features(), verbose); } } else { word->validate(iterator.get(), verbose); diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp index a9098b89e16..53edae444af 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp @@ -1,12 +1,14 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "zcpostingiterators.h" +#include <vespa/searchlib/fef/termfieldmatchdata.h> #include <vespa/searchlib/fef/termfieldmatchdataarray.h> #include <vespa/searchlib/bitcompression/posocccompression.h> namespace search::diskindex { using search::fef::TermFieldMatchDataArray; +using search::fef::TermFieldMatchData; using search::bitcompression::FeatureDecodeContext; using search::bitcompression::FeatureEncodeContext; using queryeval::RankedSearchIteratorBase; @@ -122,6 +124,11 @@ ZcRareWordPostingIteratorBase<bigEndian>::doUnpack(uint32_t docId) } assert(docId == getDocId()); _decodeContext->unpackFeatures(_matchData, docId); + if (_decode_cheap_features) { + TermFieldMatchData *tfmd = _matchData[0]; + tfmd->setFieldLength(_field_length); + tfmd->setNumOccs(_num_occs); + } setUnpacked(); } @@ -521,8 +528,8 @@ ZcPostingIteratorBase::doSeek(uint32_t docId) oDocId); #endif if (_decode_cheap_features) { - ZCDECODE(oCompr, field_length =); - ZCDECODE(oCompr, num_occs =); + ZCDECODE(oCompr, field_length = 1 +); + ZCDECODE(oCompr, num_occs = 1 +); } incNeedUnpack(); } @@ -554,6 +561,11 @@ ZcPostingIterator<bigEndian>::doUnpack(uint32_t docId) _decodeContext->skipFeatures(needUnpack - 1); } _decodeContext->unpackFeatures(_matchData, docId); + if (_decode_cheap_features) { + TermFieldMatchData *tfmd = _matchData[0]; + tfmd->setFieldLength(_field_length); + tfmd->setNumOccs(_num_occs); + } setUnpacked(); } diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h index 584de2786f0..a66c47f49fd 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h @@ -273,8 +273,8 @@ protected: ZCDECODE(_valI, docId +=); setDocId(docId); if (_decode_cheap_features) { - ZCDECODE(_valI, _field_length =); - ZCDECODE(_valI, _num_occs =); + ZCDECODE(_valI, _field_length = 1 +); + ZCDECODE(_valI, _num_occs = 1 +); } } virtual void featureSeek(uint64_t offset) = 0; diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp index efed07ba611..1c4ee6c6cd4 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp @@ -49,4 +49,10 @@ FakePosting::l4SkipBitSize() const return 0; } +bool +FakePosting::has_cheap_features() const +{ + return false; +} + } diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h index cd501fdfa5d..9d55b4f375b 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h @@ -62,6 +62,8 @@ public: virtual bool hasWordPositions() const = 0; + virtual bool has_cheap_features() const; + /* * Single posting list performance, without feature unpack. */ diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp index 5076a88603a..68f0d41ad48 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp @@ -395,6 +395,7 @@ bool FakeWord::validate(search::queryeval::SearchIterator *iterator, const fef::TermFieldMatchDataArray &matchData, uint32_t stride, + bool decode_cheap_features, bool verbose) const { iterator->initFullRange(); @@ -432,6 +433,10 @@ FakeWord::validate(search::queryeval::SearchIterator *iterator, for (size_t lfi = 0; lfi < matchData.size(); ++lfi) { if (matchData[lfi]->getDocId() != docId) continue; + if (decode_cheap_features) { + assert(d->_collapsedDocWordFeatures._field_len == matchData[lfi]->getFieldLength()); + assert(d->_collapsedDocWordFeatures._num_occs == matchData[lfi]->getNumOccs()); + } TMDPI mdpe = matchData[lfi]->end(); TMDPI mdp = matchData[lfi]->begin(); while (mdp != mdpe) { @@ -462,6 +467,7 @@ FakeWord::validate(search::queryeval::SearchIterator *iterator, bool FakeWord::validate(search::queryeval::SearchIterator *iterator, const fef::TermFieldMatchDataArray &matchData, + bool decode_cheap_features, bool verbose) const { iterator->initFullRange(); @@ -487,6 +493,10 @@ FakeWord::validate(search::queryeval::SearchIterator *iterator, for (size_t lfi = 0; lfi < matchData.size(); ++lfi) { if (matchData[lfi]->getDocId() != docId) continue; + if (decode_cheap_features) { + assert(d->_collapsedDocWordFeatures._field_len == matchData[lfi]->getFieldLength()); + assert(d->_collapsedDocWordFeatures._num_occs == matchData[lfi]->getNumOccs()); + } TMDPI mdpe = matchData[lfi]->end(); TMDPI mdp = matchData[lfi]->begin(); while (mdp != mdpe) { diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h index 48aaf9f41ed..5e2d37d18d5 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h @@ -240,14 +240,17 @@ public: validate(search::queryeval::SearchIterator *iterator, const fef::TermFieldMatchDataArray &matchData, uint32_t stride, + bool decode_cheap_features, bool verbose) const; bool validate(search::queryeval::SearchIterator *iterator, const fef::TermFieldMatchDataArray &matchData, + bool decode_cheap_features, bool verbose) const; - bool validate(search::queryeval::SearchIterator *iterator, bool verbose) const; + bool validate(search::queryeval::SearchIterator *iterator, + bool verbose) const; bool validate(search::diskindex::FieldReader &fieldReader, diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp index 154a05ebebe..31e2a323781 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp @@ -316,6 +316,11 @@ FakeZcFilterOcc::hasWordPositions() const return false; } +bool +FakeZcFilterOcc::has_cheap_features() const +{ + return _posting_params._encode_cheap_features; +} size_t FakeZcFilterOcc::skipBitSize() const @@ -1267,7 +1272,6 @@ template <bool bigEndian> class FakeZc4SkipPosOcc : public FakeZcFilterOcc { search::index::PostingListCounts _counts; - bool _encode_cheap_features; protected: FakeZc4SkipPosOcc(const FakeWord &fw, const Zc4PostingParams &posting_params, const char *name_suffix); public: diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h index 3d1673edec7..398f537c67a 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h @@ -57,6 +57,7 @@ public: size_t bitSize() const override; bool hasWordPositions() const override; + bool has_cheap_features() const override; size_t skipBitSize() const override; size_t l1SkipBitSize() const override; size_t l2SkipBitSize() const override; |