summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-04-29 15:05:48 +0200
committerTor Egge <Tor.Egge@broadpark.no>2019-04-29 15:05:48 +0200
commit2ee7b657bd8597b5fd27b133782d240d2faf62cd (patch)
tree1fc6249cde4efae8a5c02214d7dd85d6fa1fc0f2 /searchlib
parent7553e0390c1ceb3834cba62774b3ddc77a6944d1 (diff)
Check that posting list for fake word can be read by posting list reader.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp69
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h4
-rw-r--r--searchlib/src/vespa/searchlib/util/comprfile.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/util/comprfile.h1
5 files changed, 116 insertions, 15 deletions
diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp
index c9b8cf0b017..30cef1dc258 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp
@@ -72,22 +72,28 @@ Zc4PostingReader<bigEndian>::read_common_word_doc_id_and_features(DocIdAndFeatur
if (docId > _l1SkipDocId) {
_l1SkipDocIdPos += _l1Skip.decode() + 1;
assert(docIdPos == _l1SkipDocIdPos);
- _l1SkipFeaturesPos += _l1Skip.decode() + 1;
uint64_t featuresPos = _decodeContext->getReadOffset();
- assert(featuresPos == _l1SkipFeaturesPos);
+ if (_posting_params._encode_features) {
+ _l1SkipFeaturesPos += _l1Skip.decode() + 1;
+ assert(featuresPos == _l1SkipFeaturesPos);
+ }
(void) featuresPos;
if (docId > _l2SkipDocId) {
_l2SkipDocIdPos += _l2Skip.decode() + 1;
assert(docIdPos == _l2SkipDocIdPos);
- _l2SkipFeaturesPos += _l2Skip.decode() + 1;
- assert(featuresPos == _l2SkipFeaturesPos);
+ if (_posting_params._encode_features) {
+ _l2SkipFeaturesPos += _l2Skip.decode() + 1;
+ assert(featuresPos == _l2SkipFeaturesPos);
+ }
_l2SkipL1SkipPos += _l2Skip.decode() + 1;
assert(_l1Skip.pos() == _l2SkipL1SkipPos);
if (docId > _l3SkipDocId) {
_l3SkipDocIdPos += _l3Skip.decode() + 1;
assert(docIdPos == _l3SkipDocIdPos);
- _l3SkipFeaturesPos += _l3Skip.decode() + 1;
- assert(featuresPos == _l3SkipFeaturesPos);
+ if (_posting_params._encode_features) {
+ _l3SkipFeaturesPos += _l3Skip.decode() + 1;
+ assert(featuresPos == _l3SkipFeaturesPos);
+ }
_l3SkipL1SkipPos += _l3Skip.decode() + 1;
assert(_l1Skip.pos() == _l3SkipL1SkipPos);
_l3SkipL2SkipPos += _l3Skip.decode() + 1;
@@ -96,8 +102,10 @@ Zc4PostingReader<bigEndian>::read_common_word_doc_id_and_features(DocIdAndFeatur
_l4SkipDocIdPos += _l4Skip.decode() + 1;
assert(docIdPos == _l4SkipDocIdPos);
(void) docIdPos;
- _l4SkipFeaturesPos += _l4Skip.decode() + 1;
- assert(featuresPos == _l4SkipFeaturesPos);
+ if (_posting_params._encode_features) {
+ _l4SkipFeaturesPos += _l4Skip.decode() + 1;
+ assert(featuresPos == _l4SkipFeaturesPos);
+ }
_l4SkipL1SkipPos += _l4Skip.decode() + 1;
assert(_l1Skip.pos() == _l4SkipL1SkipPos);
_l4SkipL2SkipPos += _l4Skip.decode() + 1;
@@ -141,7 +149,9 @@ Zc4PostingReader<bigEndian>::read_common_word_doc_id_and_features(DocIdAndFeatur
_chunkNo = 0;
}
}
- _decodeContext->readFeatures(features);
+ if (_posting_params._encode_features) {
+ _decodeContext->readFeatures(features);
+ }
--_residue;
}
@@ -175,7 +185,9 @@ Zc4PostingReader<bigEndian>::read_doc_id_and_features(DocIdAndFeatures &features
if (__builtin_expect(oCompr >= d._valE, false)) {
_readContext.readComprBuffer();
}
- _decodeContext->readFeatures(features);
+ if (_posting_params._encode_features) {
+ _decodeContext->readFeatures(features);
+ }
--_residue;
}
@@ -262,8 +274,10 @@ Zc4PostingReader<bigEndian>::read_word_start_with_skip()
UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
l4SkipSize = val64;
}
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC);
- _featuresSize = val64;
+ if (_posting_params._encode_features) {
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC);
+ _featuresSize = val64;
+ }
if (__builtin_expect(oCompr >= valE, false)) {
UC64_DECODECONTEXT_STORE(o, d._);
_readContext.readComprBuffer();
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
index 3d4567ed2ab..f4145c594e3 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
@@ -3,9 +3,10 @@
#include "fakezcfilterocc.h"
#include "fpfactory.h"
#include <vespa/searchlib/diskindex/zcposocciterators.h>
-#include <vespa/searchlib/diskindex/zc4_posting_writer.h>
#include <vespa/searchlib/diskindex/zc4_posting_header.h>
#include <vespa/searchlib/diskindex/zc4_posting_params.h>
+#include <vespa/searchlib/diskindex/zc4_posting_reader.h>
+#include <vespa/searchlib/diskindex/zc4_posting_writer.h>
using search::fef::TermFieldMatchData;
using search::fef::TermFieldMatchDataArray;
@@ -125,10 +126,12 @@ void
FakeZcFilterOcc::setup(const FakeWord &fw, bool doFeatures,
bool dynamicK)
{
- if (_bigEndian)
+ if (_bigEndian) {
setupT<true>(fw, doFeatures, dynamicK);
- else
+ } else {
setupT<false>(fw, doFeatures, dynamicK);
+ }
+ validate_read(fw, doFeatures, dynamicK);
}
@@ -219,6 +222,66 @@ FakeZcFilterOcc::read_header(bool doFeatures, bool dynamicK, uint32_t min_skip_d
}
+void
+FakeZcFilterOcc::validate_read(const FakeWord &fw, bool encode_features, bool dynamic_k) const
+{
+ if (_bigEndian) {
+ validate_read<true>(fw, encode_features, dynamic_k);
+ } else {
+ validate_read<false>(fw, encode_features, dynamic_k);
+ }
+}
+
+template <bool bigEndian>
+void
+FakeZcFilterOcc::validate_read(const FakeWord &fw, bool encode_features, bool dynamic_k) const
+{
+ bitcompression::EGPosOccDecodeContextCooked<bigEndian> decode_context_dynamic_k(&_fieldsParams);
+ bitcompression::EG2PosOccDecodeContextCooked<bigEndian> decode_context_static_k(&_fieldsParams);
+ bitcompression::FeatureDecodeContext<bigEndian> &decode_context_dynamic_k_upcast = decode_context_dynamic_k;
+ bitcompression::FeatureDecodeContext<bigEndian> &decode_context_static_k_upcast = decode_context_static_k;
+ bitcompression::FeatureDecodeContext<bigEndian> &decode_context = dynamic_k ? decode_context_dynamic_k_upcast : decode_context_static_k_upcast;
+ Zc4PostingReader<bigEndian> reader(dynamic_k);
+ reader.set_decode_features(&decode_context);
+ auto &params = reader.get_posting_params();
+ params._min_skip_docs = 1;
+ params._min_chunk_docs = 1000000000;
+ params._doc_id_limit = _docIdLimit;
+ params._encode_features = encode_features;
+ reader.get_read_context().reference_compressed_buffer(_compressed.first, _compressed.second);
+ assert(decode_context.getReadOffset() == 0u);
+ PostingListCounts counts;
+ counts._bitLength = _compressedBits;
+ counts._numDocs = _hitDocs;
+ reader.set_counts(counts);
+ auto d(fw._postings.begin());
+ auto de(fw._postings.end());
+ auto p(fw._wordPosFeatures.begin());
+ auto pe(fw._wordPosFeatures.end());
+ DocIdAndPosOccFeatures check_features;
+ DocIdAndFeatures features;
+ uint32_t hits = 0;
+ while (d != de) {
+ if (encode_features) {
+ fw.setupFeatures(*d, &*p, check_features);
+ p += d->_positions;
+ } else {
+ check_features.clear(d->_docId);
+ }
+ reader.read_doc_id_and_features(features);
+ assert(features._docId == d->_docId);
+ assert(features._elements.size() == check_features._elements.size());
+ assert(features._wordPositions.size() == check_features._wordPositions.size());
+ ++d;
+ ++hits;
+ }
+ if (encode_features) {
+ assert(p == pe);
+ }
+ reader.read_doc_id_and_features(features);
+ assert(static_cast<int32_t>(features._docId) == -1);
+}
+
FakeZcFilterOcc::~FakeZcFilterOcc()
{
free(_compressedMalloc);
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h
index b68e3866461..36738a0f5a8 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.h
@@ -40,6 +40,10 @@ protected:
template <bool bigEndian>
void read_header(bool do_features, bool dynamic_k, uint32_t min_skip_docs, uint32_t min_cunk_docs);
+ void validate_read(const FakeWord &fw, bool encode_features, bool dynamic_k) const;
+ template <bool bigEndian>
+ void validate_read(const FakeWord &fw, bool encode_features, bool dynamic_k) const;
+
public:
FakeZcFilterOcc(const FakeWord &fw);
FakeZcFilterOcc(const FakeWord &fw, bool bigEndian, const char *nameSuffix);
diff --git a/searchlib/src/vespa/searchlib/util/comprfile.cpp b/searchlib/src/vespa/searchlib/util/comprfile.cpp
index 155bb194f97..400a93acd26 100644
--- a/searchlib/src/vespa/searchlib/util/comprfile.cpp
+++ b/searchlib/src/vespa/searchlib/util/comprfile.cpp
@@ -408,6 +408,25 @@ ComprFileReadContext::referenceWriteContext(const ComprFileWriteContext &rhs)
}
}
+void
+ComprFileReadContext::reference_compressed_buffer(void *buffer, size_t usedUnits)
+{
+ ComprFileDecodeContext *d = getDecodeContext();
+
+ _comprBuf = buffer;
+ _comprBufSize = usedUnits;
+ setBufferEndFilePos(static_cast<uint64_t>(usedUnits) * _unitSize);
+ setFileSize(static_cast<uint64_t>(usedUnits) * _unitSize);
+ if (d != NULL) {
+ d->afterRead(_comprBuf,
+ usedUnits,
+ static_cast<uint64_t>(usedUnits) * _unitSize,
+ false);
+ d->setupBits(0);
+ setBitOffset(-1);
+ assert(d->getBitPosV() == 0);
+ }
+}
ComprFileWriteContext::
ComprFileWriteContext(ComprFileEncodeContext &encodeContext)
diff --git a/searchlib/src/vespa/searchlib/util/comprfile.h b/searchlib/src/vespa/searchlib/util/comprfile.h
index d4de1d305fa..431126dee47 100644
--- a/searchlib/src/vespa/searchlib/util/comprfile.h
+++ b/searchlib/src/vespa/searchlib/util/comprfile.h
@@ -137,6 +137,7 @@ public:
* long as rhs is live and unchanged.
*/
void referenceWriteContext(const ComprFileWriteContext &rhs);
+ void reference_compressed_buffer(void *buffer, size_t usedUnits);
};