summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-05-15 12:08:29 +0200
committerTor Egge <Tor.Egge@broadpark.no>2019-05-15 12:08:29 +0200
commit345afc7be5ca149d8e28a4eaddf16ef765e57fa6 (patch)
treef895820f792e2e7be700a61a43cec8cc91b7609c /searchlib
parent20f644c175b73230373cec00f8c80ffbf22193e6 (diff)
Unpack cheap features in zc posting iterators.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp23
-rw-r--r--searchlib/src/tests/postinglistbm/posting_list_test.cpp2
-rw-r--r--searchlib/src/tests/postinglistbm/stress_runner.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h4
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h2
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakeword.h5
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h1
11 files changed, 60 insertions, 19 deletions
diff --git a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
index 6d4f483fecc..421beb10386 100644
--- a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
+++ b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp
@@ -458,6 +458,7 @@ void
randReadField(FakeWordSet &wordSet,
const std::string &namepref,
bool dynamicK,
+ bool decode_cheap_features,
bool verbose)
{
const char *dynamicKStr = dynamicK ? "true" : "false";
@@ -469,9 +470,10 @@ randReadField(FakeWordSet &wordSet,
LOG(info,
"enter randReadField,"
- " namepref=%s, dynamicK=%s",
+ " namepref=%s, dynamicK=%s, decode_cheap_features=%s",
namepref.c_str(),
- dynamicKStr);
+ dynamicKStr,
+ bool_to_str(decode_cheap_features));
tv.SetNow();
before = tv.Secs();
@@ -533,12 +535,12 @@ randReadField(FakeWordSet &wordSet,
sb(handle.createIterator(counts, tfmda));
// LOG(info, "loop=%d, wordNum=%u", loop, wordNum);
- word->validate(sb.get(), tfmda, verbose);
- word->validate(sb.get(), tfmda, 19, verbose);
- word->validate(sb.get(), tfmda, 99, verbose);
- word->validate(sb.get(), tfmda, 799, verbose);
- word->validate(sb.get(), tfmda, 6399, verbose);
- word->validate(sb.get(), tfmda, 11999, verbose);
+ word->validate(sb.get(), tfmda, decode_cheap_features, verbose);
+ word->validate(sb.get(), tfmda, 19, decode_cheap_features, verbose);
+ word->validate(sb.get(), tfmda, 99, decode_cheap_features, verbose);
+ word->validate(sb.get(), tfmda, 799, decode_cheap_features, verbose);
+ word->validate(sb.get(), tfmda, 6399, decode_cheap_features, verbose);
+ word->validate(sb.get(), tfmda, 11999, decode_cheap_features, verbose);
++wordNum;
}
}
@@ -552,10 +554,11 @@ randReadField(FakeWordSet &wordSet,
after = tv.Secs();
LOG(info,
"leave randReadField, namepref=%s,"
- " dynamicK=%s, "
+ " dynamicK=%s, decode_cheap_features=%s, "
"elapsed=%10.6f",
namepref.c_str(),
dynamicKStr,
+ bool_to_str(decode_cheap_features),
after - before);
}
@@ -634,7 +637,7 @@ testFieldWriterVariant(FakeWordSet &wordSet, uint32_t doc_id_limit,
{
writeField(wordSet, doc_id_limit, file_name_prefix, dynamic_k, encode_cheap_features);
readField(wordSet, doc_id_limit, file_name_prefix, dynamic_k, encode_cheap_features, verbose);
- randReadField(wordSet, file_name_prefix, dynamic_k, verbose);
+ randReadField(wordSet, file_name_prefix, dynamic_k, encode_cheap_features, verbose);
fusionField(wordSet.getNumWords(),
doc_id_limit,
file_name_prefix, file_name_prefix + "x",
diff --git a/searchlib/src/tests/postinglistbm/posting_list_test.cpp b/searchlib/src/tests/postinglistbm/posting_list_test.cpp
index dad21ada469..c4e375e8c7d 100644
--- a/searchlib/src/tests/postinglistbm/posting_list_test.cpp
+++ b/searchlib/src/tests/postinglistbm/posting_list_test.cpp
@@ -27,7 +27,7 @@ validate_posting_list_for_word(const FakePosting& posting, const FakeWord& word)
std::unique_ptr<SearchIterator> iterator(posting.createIterator(tfmda));
if (posting.hasWordPositions()) {
- word.validate(iterator.get(), tfmda, false);
+ word.validate(iterator.get(), tfmda, posting.has_cheap_features(), false);
} else {
word.validate(iterator.get(), false);
}
diff --git a/searchlib/src/tests/postinglistbm/stress_runner.cpp b/searchlib/src/tests/postinglistbm/stress_runner.cpp
index 98ace5e00a1..f8829d4eab6 100644
--- a/searchlib/src/tests/postinglistbm/stress_runner.cpp
+++ b/searchlib/src/tests/postinglistbm/stress_runner.cpp
@@ -217,9 +217,9 @@ makeSomePostings(FPFactory *postingFactory,
std::unique_ptr<SearchIterator> iterator(posting->createIterator(tfmda));
if (posting->hasWordPositions()) {
if (stride != 0) {
- word->validate(iterator.get(), tfmda, stride, verbose);
+ word->validate(iterator.get(), tfmda, stride, posting->has_cheap_features(), verbose);
} else {
- word->validate(iterator.get(), tfmda, verbose);
+ word->validate(iterator.get(), tfmda, posting->has_cheap_features(), verbose);
}
} else {
word->validate(iterator.get(), verbose);
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp
index a9098b89e16..53edae444af 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp
@@ -1,12 +1,14 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "zcpostingiterators.h"
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/searchlib/bitcompression/posocccompression.h>
namespace search::diskindex {
using search::fef::TermFieldMatchDataArray;
+using search::fef::TermFieldMatchData;
using search::bitcompression::FeatureDecodeContext;
using search::bitcompression::FeatureEncodeContext;
using queryeval::RankedSearchIteratorBase;
@@ -122,6 +124,11 @@ ZcRareWordPostingIteratorBase<bigEndian>::doUnpack(uint32_t docId)
}
assert(docId == getDocId());
_decodeContext->unpackFeatures(_matchData, docId);
+ if (_decode_cheap_features) {
+ TermFieldMatchData *tfmd = _matchData[0];
+ tfmd->setFieldLength(_field_length);
+ tfmd->setNumOccs(_num_occs);
+ }
setUnpacked();
}
@@ -521,8 +528,8 @@ ZcPostingIteratorBase::doSeek(uint32_t docId)
oDocId);
#endif
if (_decode_cheap_features) {
- ZCDECODE(oCompr, field_length =);
- ZCDECODE(oCompr, num_occs =);
+ ZCDECODE(oCompr, field_length = 1 +);
+ ZCDECODE(oCompr, num_occs = 1 +);
}
incNeedUnpack();
}
@@ -554,6 +561,11 @@ ZcPostingIterator<bigEndian>::doUnpack(uint32_t docId)
_decodeContext->skipFeatures(needUnpack - 1);
}
_decodeContext->unpackFeatures(_matchData, docId);
+ if (_decode_cheap_features) {
+ TermFieldMatchData *tfmd = _matchData[0];
+ tfmd->setFieldLength(_field_length);
+ tfmd->setNumOccs(_num_occs);
+ }
setUnpacked();
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h
index 584de2786f0..a66c47f49fd 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h
+++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h
@@ -273,8 +273,8 @@ protected:
ZCDECODE(_valI, docId +=);
setDocId(docId);
if (_decode_cheap_features) {
- ZCDECODE(_valI, _field_length =);
- ZCDECODE(_valI, _num_occs =);
+ ZCDECODE(_valI, _field_length = 1 +);
+ ZCDECODE(_valI, _num_occs = 1 +);
}
}
virtual void featureSeek(uint64_t offset) = 0;
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp
index efed07ba611..1c4ee6c6cd4 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.cpp
@@ -49,4 +49,10 @@ FakePosting::l4SkipBitSize() const
return 0;
}
+bool
+FakePosting::has_cheap_features() const
+{
+ return false;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h
index cd501fdfa5d..9d55b4f375b 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeposting.h
@@ -62,6 +62,8 @@ public:
virtual bool
hasWordPositions() const = 0;
+ virtual bool has_cheap_features() const;
+
/*
* Single posting list performance, without feature unpack.
*/
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp
index 5076a88603a..68f0d41ad48 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.cpp
@@ -395,6 +395,7 @@ bool
FakeWord::validate(search::queryeval::SearchIterator *iterator,
const fef::TermFieldMatchDataArray &matchData,
uint32_t stride,
+ bool decode_cheap_features,
bool verbose) const
{
iterator->initFullRange();
@@ -432,6 +433,10 @@ FakeWord::validate(search::queryeval::SearchIterator *iterator,
for (size_t lfi = 0; lfi < matchData.size(); ++lfi) {
if (matchData[lfi]->getDocId() != docId)
continue;
+ if (decode_cheap_features) {
+ assert(d->_collapsedDocWordFeatures._field_len == matchData[lfi]->getFieldLength());
+ assert(d->_collapsedDocWordFeatures._num_occs == matchData[lfi]->getNumOccs());
+ }
TMDPI mdpe = matchData[lfi]->end();
TMDPI mdp = matchData[lfi]->begin();
while (mdp != mdpe) {
@@ -462,6 +467,7 @@ FakeWord::validate(search::queryeval::SearchIterator *iterator,
bool
FakeWord::validate(search::queryeval::SearchIterator *iterator,
const fef::TermFieldMatchDataArray &matchData,
+ bool decode_cheap_features,
bool verbose) const
{
iterator->initFullRange();
@@ -487,6 +493,10 @@ FakeWord::validate(search::queryeval::SearchIterator *iterator,
for (size_t lfi = 0; lfi < matchData.size(); ++lfi) {
if (matchData[lfi]->getDocId() != docId)
continue;
+ if (decode_cheap_features) {
+ assert(d->_collapsedDocWordFeatures._field_len == matchData[lfi]->getFieldLength());
+ assert(d->_collapsedDocWordFeatures._num_occs == matchData[lfi]->getNumOccs());
+ }
TMDPI mdpe = matchData[lfi]->end();
TMDPI mdp = matchData[lfi]->begin();
while (mdp != mdpe) {
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
index 48aaf9f41ed..5e2d37d18d5 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakeword.h
@@ -240,14 +240,17 @@ public:
validate(search::queryeval::SearchIterator *iterator,
const fef::TermFieldMatchDataArray &matchData,
uint32_t stride,
+ bool decode_cheap_features,
bool verbose) const;
bool
validate(search::queryeval::SearchIterator *iterator,
const fef::TermFieldMatchDataArray &matchData,
+ bool decode_cheap_features,
bool verbose) const;
- bool validate(search::queryeval::SearchIterator *iterator, bool verbose) const;
+ bool validate(search::queryeval::SearchIterator *iterator,
+ bool verbose) const;
bool
validate(search::diskindex::FieldReader &fieldReader,
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
index 154a05ebebe..31e2a323781 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
@@ -316,6 +316,11 @@ FakeZcFilterOcc::hasWordPositions() const
return false;
}
+bool
+FakeZcFilterOcc::has_cheap_features() const
+{
+ return _posting_params._encode_cheap_features;
+}
size_t
FakeZcFilterOcc::skipBitSize() const
@@ -1267,7 +1272,6 @@ template <bool bigEndian>
class FakeZc4SkipPosOcc : public FakeZcFilterOcc
{
search::index::PostingListCounts _counts;
- bool _encode_cheap_features;
protected:
FakeZc4SkipPosOcc(const FakeWord &fw, const Zc4PostingParams &posting_params, const char *name_suffix);
public:
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h
index 3d1673edec7..398f537c67a 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h
@@ -57,6 +57,7 @@ public:
size_t bitSize() const override;
bool hasWordPositions() const override;
+ bool has_cheap_features() const override;
size_t skipBitSize() const override;
size_t l1SkipBitSize() const override;
size_t l2SkipBitSize() const override;