Factor out Zc4PostingReader from Zc4PostingSeqRead.

author: Tor Egge <Tor.Egge@broadpark.no> 2019-04-26 11:08:17 +0200
committer: Tor Egge <Tor.Egge@broadpark.no> 2019-04-28 23:49:16 +0200
commit: 7553e0390c1ceb3834cba62774b3ddc77a6944d1 (patch)
tree: 0f524636b34a18fa5948889d2f1b3f01a78c9881 /searchlib
parent: e287c58dce2df5eb5451a61000aab34553698a55 (diff)
10 files changed, 573 insertions, 670 deletions
diff --git a/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt
index 104994ad038..2fea4f2bab7 100644
--- a/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/diskindex/CMakeLists.txt
@@ -19,6 +19,7 @@ vespa_add_library(searchlib_diskindex OBJECT
     pagedict4randread.cpp
     wordnummapper.cpp
     zc4_posting_header.cpp
+    zc4_posting_reader.cpp
     zc4_posting_writer.cpp
     zc4_posting_writer_base.cpp
     zcbuf.cpp
diff --git a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp
index f6e4da945e0..34e64a9b558 100644
--- a/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/extposocc.cpp
@@ -69,7 +69,7 @@ makePosOccWrite(const vespalib::string &name,
             fileHeader.getBigEndian() &&
             fileHeader.getFormats().size() == 2 &&
             fileHeader.getFormats()[0] ==
-            ZcPosOccSeqRead::getIdentifier() &&
+            Zc4PosOccSeqRead::getIdentifier(true) &&
             fileHeader.getFormats()[1] ==
             ZcPosOccSeqRead::getSubIdentifier()) {
             dynamicK = true;
@@ -77,7 +77,7 @@ makePosOccWrite(const vespalib::string &name,
                    fileHeader.getBigEndian() &&
                    fileHeader.getFormats().size() == 2 &&
                    fileHeader.getFormats()[0] ==
-                   Zc4PosOccSeqRead::getIdentifier() &&
+                   Zc4PosOccSeqRead::getIdentifier(false) &&
                    fileHeader.getFormats()[1] ==
                    Zc4PosOccSeqRead::getSubIdentifier()) {
             dynamicK = false;
@@ -115,7 +115,7 @@ makePosOccRead(const vespalib::string &name,
             fileHeader.getBigEndian() &&
             fileHeader.getFormats().size() == 2 &&
             fileHeader.getFormats()[0] ==
-            ZcPosOccSeqRead::getIdentifier() &&
+            Zc4PosOccSeqRead::getIdentifier(true) &&
             fileHeader.getFormats()[1] ==
             ZcPosOccSeqRead::getSubIdentifier()) {
             dynamicK = true;
@@ -123,7 +123,7 @@ makePosOccRead(const vespalib::string &name,
                    fileHeader.getBigEndian() &&
                    fileHeader.getFormats().size() == 2 &&
                    fileHeader.getFormats()[0] ==
-                   Zc4PosOccSeqRead::getIdentifier() &&
+                   Zc4PosOccSeqRead::getIdentifier(false) &&
                    fileHeader.getFormats()[1] ==
                    Zc4PosOccSeqRead::getSubIdentifier()) {
             dynamicK = false;
diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp
new file mode 100644
index 00000000000..c9b8cf0b017
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.cpp
@@ -0,0 +1,424 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "zc4_posting_reader.h"
+#include <vespa/searchlib/index/docidandfeatures.h>
+
+namespace search::diskindex {
+
+using index::PostingListCounts;
+using index::DocIdAndFeatures;
+using bitcompression::FeatureEncodeContext;
+
+
+template <bool bigEndian>
+Zc4PostingReader<bigEndian>::Zc4PostingReader(bool dynamic_k)
+    : _decodeContext(nullptr),
+      _docIdK(K_VALUE_ZCPOSTING_DELTA_DOCID),
+      _prevDocId(0),
+      _numDocs(0),
+      _readContext(sizeof(uint64_t)),
+      _has_more(false),
+      _posting_params(64, 1 << 30, 10000000, dynamic_k, true),
+      _lastDocId(0),
+      _zcDocIds(),
+      _l1Skip(),
+      _l2Skip(),
+      _l3Skip(),
+      _l4Skip(),
+      _chunkNo(0),
+      _l1SkipDocId(0),
+      _l1SkipDocIdPos(0),
+      _l1SkipFeaturesPos(0),
+      _l2SkipDocId(0),
+      _l2SkipDocIdPos(0),
+      _l2SkipL1SkipPos(0),
+      _l2SkipFeaturesPos(0),
+      _l3SkipDocId(0),
+      _l3SkipDocIdPos(0),
+      _l3SkipL1SkipPos(0),
+      _l3SkipL2SkipPos(0),
+      _l3SkipFeaturesPos(0),
+      _l4SkipDocId(0),
+      _l4SkipDocIdPos(0),
+      _l4SkipL1SkipPos(0),
+      _l4SkipL2SkipPos(0),
+      _l4SkipL3SkipPos(0),
+      _l4SkipFeaturesPos(0),
+      _featuresSize(0),
+      _counts(),
+      _residue(0)
+{
+}
+
+template <bool bigEndian>
+Zc4PostingReader<bigEndian>::~Zc4PostingReader()
+{
+}
+
+template <bool bigEndian>
+void
+Zc4PostingReader<bigEndian>::read_common_word_doc_id_and_features(DocIdAndFeatures &features)
+{
+    if ((_zcDocIds._valI >= _zcDocIds._valE) && _has_more) {
+        read_word_start();    // Read start of next chunk
+    }
+    // Split docid & features.
+    assert(_zcDocIds._valI < _zcDocIds._valE);
+    uint32_t docIdPos = _zcDocIds.pos();
+    uint32_t docId = _prevDocId + 1 + _zcDocIds.decode();
+    features._docId = docId;
+    _prevDocId = docId;
+    assert(docId <= _lastDocId);
+    if (docId > _l1SkipDocId) {
+        _l1SkipDocIdPos += _l1Skip.decode() + 1;
+        assert(docIdPos == _l1SkipDocIdPos);
+        _l1SkipFeaturesPos += _l1Skip.decode() + 1;
+        uint64_t featuresPos = _decodeContext->getReadOffset();
+        assert(featuresPos == _l1SkipFeaturesPos);
+        (void) featuresPos;
+        if (docId > _l2SkipDocId) {
+            _l2SkipDocIdPos += _l2Skip.decode() + 1;
+            assert(docIdPos == _l2SkipDocIdPos);
+            _l2SkipFeaturesPos += _l2Skip.decode() + 1;
+            assert(featuresPos == _l2SkipFeaturesPos);
+            _l2SkipL1SkipPos += _l2Skip.decode() + 1;
+            assert(_l1Skip.pos() == _l2SkipL1SkipPos);
+            if (docId > _l3SkipDocId) {
+                _l3SkipDocIdPos += _l3Skip.decode() + 1;
+                assert(docIdPos == _l3SkipDocIdPos);
+                _l3SkipFeaturesPos += _l3Skip.decode() + 1;
+                assert(featuresPos == _l3SkipFeaturesPos);
+                _l3SkipL1SkipPos += _l3Skip.decode() + 1;
+                assert(_l1Skip.pos() == _l3SkipL1SkipPos);
+                _l3SkipL2SkipPos += _l3Skip.decode() + 1;
+                assert(_l2Skip.pos() == _l3SkipL2SkipPos);
+                if (docId > _l4SkipDocId) {
+                    _l4SkipDocIdPos += _l4Skip.decode() + 1;
+                    assert(docIdPos == _l4SkipDocIdPos);
+                    (void) docIdPos;
+                    _l4SkipFeaturesPos += _l4Skip.decode() + 1;
+                    assert(featuresPos == _l4SkipFeaturesPos);
+                    _l4SkipL1SkipPos += _l4Skip.decode() + 1;
+                    assert(_l1Skip.pos() == _l4SkipL1SkipPos);
+                    _l4SkipL2SkipPos += _l4Skip.decode() + 1;
+                    assert(_l2Skip.pos() == _l4SkipL2SkipPos);
+                    _l4SkipL3SkipPos += _l4Skip.decode() + 1;
+                    assert(_l3Skip.pos() == _l4SkipL3SkipPos);
+                    _l4SkipDocId += _l4Skip.decode() + 1;
+                    assert(_l4SkipDocId <= _lastDocId);
+                    assert(_l4SkipDocId >= docId);
+                }
+                _l3SkipDocId += _l3Skip.decode() + 1;
+                assert(_l3SkipDocId <= _lastDocId);
+                assert(_l3SkipDocId <= _l4SkipDocId);
+                assert(_l3SkipDocId >= docId);
+            }
+            _l2SkipDocId += _l2Skip.decode() + 1;
+            assert(_l2SkipDocId <= _lastDocId);
+            assert(_l2SkipDocId <= _l4SkipDocId);
+            assert(_l2SkipDocId <= _l3SkipDocId);
+            assert(_l2SkipDocId >= docId);
+        }
+        _l1SkipDocId += _l1Skip.decode() + 1;
+        assert(_l1SkipDocId <= _lastDocId);
+        assert(_l1SkipDocId <= _l4SkipDocId);
+        assert(_l1SkipDocId <= _l3SkipDocId);
+        assert(_l1SkipDocId <= _l2SkipDocId);
+        assert(_l1SkipDocId >= docId);
+    }
+    if (docId < _lastDocId) {
+        // Assert more space available when not yet at last docid
+        assert(_zcDocIds._valI < _zcDocIds._valE);
+    } else {
+        // Assert that space has been used when at last docid
+        assert(_zcDocIds._valI == _zcDocIds._valE);
+        // Assert that we've read to end of skip info
+        assert(_l1SkipDocId == _lastDocId);
+        assert(_l2SkipDocId == _lastDocId);
+        assert(_l3SkipDocId == _lastDocId);
+        assert(_l4SkipDocId == _lastDocId);
+        if (!_has_more) {
+            _chunkNo = 0;
+        }
+    }
+    _decodeContext->readFeatures(features);
+    --_residue;
+}
+
+template <bool bigEndian>
+void
+Zc4PostingReader<bigEndian>::read_doc_id_and_features(DocIdAndFeatures &features)
+{
+    if (_residue == 0 && !_has_more) {
+        if (_residue == 0) {
+            // Don't read past end of posting list.
+            features.clear(static_cast<uint32_t>(-1));
+            return;
+        }
+    }
+    if (_lastDocId > 0) {
+        read_common_word_doc_id_and_features(features);
+        return;
+    }
+    // Interleaves docid & features
+    using EC = FeatureEncodeContext<bigEndian>;
+    DecodeContext &d = *_decodeContext;
+    uint32_t length;
+    uint64_t val64;
+    UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+
+    UC64_DECODEEXPGOLOMB_SMALL_NS(o, _docIdK, EC);
+    uint32_t docId = _prevDocId + 1 + val64;
+    features._docId = docId;
+    _prevDocId = docId;
+    UC64_DECODECONTEXT_STORE(o, d._);
+    if (__builtin_expect(oCompr >= d._valE, false)) {
+        _readContext.readComprBuffer();
+    }
+    _decodeContext->readFeatures(features);
+    --_residue;
+}
+
+template <bool bigEndian>
+void
+Zc4PostingReader<bigEndian>::read_word_start_with_skip()
+{
+    using EC = FeatureEncodeContext<bigEndian>;
+    DecodeContext &d = *_decodeContext;
+    UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
+    uint32_t length;
+    uint64_t val64;
+    const uint64_t *valE = d._valE;
+
+    if (_has_more) {
+        ++_chunkNo;
+    } else {
+        _chunkNo = 0;
+    }
+    assert(_numDocs >= _posting_params._min_skip_docs || _has_more);
+    bool has_more = false;
+    if (__builtin_expect(_numDocs >= _posting_params._min_chunk_docs, false)) {
+        if (bigEndian) {
+            has_more = static_cast<int64_t>(oVal) < 0;
+            oVal <<= 1;
+        } else {
+            has_more = (oVal & 1) != 0;
+            oVal >>= 1;
+        }
+        length = 1;
+        UC64_READBITS_NS(o, EC);
+    }
+    if (_posting_params._dynamic_k) {
+        _docIdK = EC::calcDocIdK((_has_more || has_more) ? 1 : _numDocs,
+                                 _posting_params._doc_id_limit);
+    }
+    if (_has_more || has_more) {
+        assert(has_more == (_chunkNo + 1 < _counts._segments.size()));
+        assert(_numDocs == _counts._segments[_chunkNo]._numDocs);
+        if (has_more) {
+            assert(_numDocs >= _posting_params._min_skip_docs);
+            assert(_numDocs >= _posting_params._min_chunk_docs);
+        }
+    } else {
+        assert(_numDocs >= _posting_params._min_skip_docs);
+        assert(_numDocs == _counts._numDocs);
+    }
+    if (__builtin_expect(oCompr >= valE, false)) {
+        UC64_DECODECONTEXT_STORE(o, d._);
+        _readContext.readComprBuffer();
+        valE = d._valE;
+        UC64_DECODECONTEXT_LOAD(o, d._);
+    }
+    UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC);
+    uint32_t docIdsSize = val64 + 1;
+    UC64_DECODEEXPGOLOMB_NS(o,
+                              K_VALUE_ZCPOSTING_L1SKIPSIZE,
+                              EC);
+    uint32_t l1SkipSize = val64;
+    if (__builtin_expect(oCompr >= valE, false)) {
+        UC64_DECODECONTEXT_STORE(o, d._);
+        _readContext.readComprBuffer();
+        valE = d._valE;
+        UC64_DECODECONTEXT_LOAD(o, d._);
+    }
+    uint32_t l2SkipSize = 0;
+    if (l1SkipSize != 0) {
+        UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC);
+        l2SkipSize = val64;
+    }
+    uint32_t l3SkipSize = 0;
+    if (l2SkipSize != 0) {
+        UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC);
+        l3SkipSize = val64;
+    }
+    if (__builtin_expect(oCompr >= valE, false)) {
+        UC64_DECODECONTEXT_STORE(o, d._);
+        _readContext.readComprBuffer();
+        valE = d._valE;
+        UC64_DECODECONTEXT_LOAD(o, d._);
+    }
+    uint32_t l4SkipSize = 0;
+    if (l3SkipSize != 0) {
+        UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
+        l4SkipSize = val64;
+    }
+    UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC);
+    _featuresSize = val64;
+    if (__builtin_expect(oCompr >= valE, false)) {
+        UC64_DECODECONTEXT_STORE(o, d._);
+        _readContext.readComprBuffer();
+        valE = d._valE;
+        UC64_DECODECONTEXT_LOAD(o, d._);
+    }
+    if (_posting_params._dynamic_k) {
+        UC64_DECODEEXPGOLOMB_NS(o, _docIdK, EC);
+    } else {
+        UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_LASTDOCID, EC);
+    }
+    _lastDocId = _posting_params._doc_id_limit - 1 - val64;
+    if (_has_more || has_more) {
+        assert(_lastDocId == _counts._segments[_chunkNo]._lastDoc);
+    }
+
+    if (__builtin_expect(oCompr >= valE, false)) {
+        UC64_DECODECONTEXT_STORE(o, d._);
+        _readContext.readComprBuffer();
+        valE = d._valE;
+        UC64_DECODECONTEXT_LOAD(o, d._);
+    }
+    uint64_t bytePad = oPreRead & 7;
+    if (bytePad > 0) {
+        length = bytePad;
+        if (bigEndian) {
+            oVal <<= length;
+        } else {
+            oVal >>= length;
+        }
+        UC64_READBITS_NS(o, EC);
+    }
+    UC64_DECODECONTEXT_STORE(o, d._);
+    if (__builtin_expect(oCompr >= valE, false)) {
+        _readContext.readComprBuffer();
+    }
+    _zcDocIds.clearReserve(docIdsSize);
+    _l1Skip.clearReserve(l1SkipSize);
+    _l2Skip.clearReserve(l2SkipSize);
+    _l3Skip.clearReserve(l3SkipSize);
+    _l4Skip.clearReserve(l4SkipSize);
+    _decodeContext->readBytes(_zcDocIds._valI, docIdsSize);
+    _zcDocIds._valE = _zcDocIds._valI + docIdsSize;
+    if (l1SkipSize > 0) {
+        _decodeContext->readBytes(_l1Skip._valI, l1SkipSize);
+    }
+    _l1Skip._valE = _l1Skip._valI + l1SkipSize;
+    if (l2SkipSize > 0) {
+        _decodeContext->readBytes(_l2Skip._valI, l2SkipSize);
+    }
+    _l2Skip._valE = _l2Skip._valI + l2SkipSize;
+    if (l3SkipSize > 0) {
+        _decodeContext->readBytes(_l3Skip._valI, l3SkipSize);
+    }
+    _l3Skip._valE = _l3Skip._valI + l3SkipSize;
+    if (l4SkipSize > 0) {
+        _decodeContext->readBytes(_l4Skip._valI, l4SkipSize);
+    }
+    _l4Skip._valE = _l4Skip._valI + l4SkipSize;
+
+    if (l1SkipSize > 0) {
+        _l1SkipDocId = _l1Skip.decode() + 1 + _prevDocId;
+    } else {
+        _l1SkipDocId = _lastDocId;
+    }
+    if (l2SkipSize > 0) {
+        _l2SkipDocId = _l2Skip.decode() + 1 + _prevDocId;
+    } else {
+        _l2SkipDocId = _lastDocId;
+    }
+    if (l3SkipSize > 0) {
+        _l3SkipDocId = _l3Skip.decode() + 1 + _prevDocId;
+    } else {
+        _l3SkipDocId = _lastDocId;
+    }
+    if (l4SkipSize > 0) {
+        _l4SkipDocId = _l4Skip.decode() + 1 + _prevDocId;
+    } else {
+        _l4SkipDocId = _lastDocId;
+    }
+    _l1SkipDocIdPos = 0;
+    _l1SkipFeaturesPos = _decodeContext->getReadOffset();
+    _l2SkipDocIdPos = 0;
+    _l2SkipL1SkipPos = 0;
+    _l2SkipFeaturesPos = _decodeContext->getReadOffset();
+    _l3SkipDocIdPos = 0;
+    _l3SkipL1SkipPos = 0;
+    _l3SkipL2SkipPos = 0;
+    _l3SkipFeaturesPos = _decodeContext->getReadOffset();
+    _l4SkipDocIdPos = 0;
+    _l4SkipL1SkipPos = 0;
+    _l4SkipL2SkipPos = 0;
+    _l4SkipL3SkipPos = 0;
+    _l4SkipFeaturesPos = _decodeContext->getReadOffset();
+    _has_more = has_more;
+    // Decode context is now positioned at start of features
+}
+
+template <bool bigEndian>
+void
+Zc4PostingReader<bigEndian>::read_word_start()
+{
+    using EC = FeatureEncodeContext<bigEndian>;
+    UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_);
+    uint32_t length;
+    uint64_t val64;
+    const uint64_t *valE = _decodeContext->_valE;
+
+    UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
+    UC64_DECODECONTEXT_STORE(o, _decodeContext->_);
+    if (oCompr >= valE) {
+        _readContext.readComprBuffer();
+    }
+    _numDocs = static_cast<uint32_t>(val64) + 1;
+    _residue = _numDocs;
+    _prevDocId = _has_more ? _lastDocId : 0u;
+    assert(_numDocs <= _counts._numDocs);
+    assert(_numDocs == _counts._numDocs ||
+           _numDocs >= _posting_params._min_chunk_docs ||
+           _has_more);
+
+    if (_numDocs >= _posting_params._min_skip_docs || _has_more) {
+        read_word_start_with_skip();
+        // Decode context is not positioned at start of features
+    } else {
+        if (_posting_params._dynamic_k) {
+            _docIdK = EC::calcDocIdK(_numDocs, _posting_params._doc_id_limit);
+        }
+        _lastDocId = 0u;
+        // Decode context is not positioned at start of docids & features
+    }
+}
+
+template <bool bigEndian>
+void
+Zc4PostingReader<bigEndian>::set_counts(const PostingListCounts &counts)
+{
+    assert(!_has_more && _residue == 0);  // Previous words must have been read.
+    _counts = counts;
+    assert((_counts._numDocs == 0) == (_counts._bitLength == 0));
+    if (_counts._numDocs > 0) {
+        read_word_start();
+    }
+}
+
+template <bool bigEndian>
+void
+Zc4PostingReader<bigEndian>::set_decode_features(DecodeContext *decode_features)
+{
+    _decodeContext = decode_features;
+    _decodeContext->setReadContext(&_readContext);
+    _readContext.setDecodeContext(_decodeContext);
+}
+
+template class Zc4PostingReader<false>;
+template class Zc4PostingReader<true>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h
new file mode 100644
index 00000000000..d8161da15d5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_reader.h
@@ -0,0 +1,96 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "zc4_posting_writer.h"
+#include <vespa/searchlib/index/postinglistfile.h>
+#include <vespa/fastos/file.h>
+#include "zc4_posting_params.h"
+
+namespace search::index {
+    class PostingListCountFileSeqRead;
+}
+
+namespace search::diskindex {
+
+/*
+ * Class used to read posting lists of type "Zc.4" and "Zc.5" (dynamic k).
+ *
+ * Common words have docid deltas and skip info separate from
+ * features.
+ * 
+ * Rare words do not have skip info, and docid deltas and features are
+ * interleaved.
+ */
+template <bool bigEndian>
+class Zc4PostingReader
+{
+
+protected:
+    using DecodeContext = bitcompression::FeatureDecodeContext<bigEndian>;
+
+    DecodeContext *_decodeContext;
+    uint32_t _docIdK;
+    uint32_t _prevDocId;    // Previous document id
+    uint32_t _numDocs;      // Documents in chunk or word
+    search::ComprFileReadContext _readContext;
+    bool _has_more;
+    Zc4PostingParams _posting_params;
+    uint32_t _lastDocId;    // last document in chunk or word
+
+    ZcBuf _zcDocIds;    // Document id deltas
+    ZcBuf _l1Skip;      // L1 skip info
+    ZcBuf _l2Skip;      // L2 skip info
+    ZcBuf _l3Skip;      // L3 skip info
+    ZcBuf _l4Skip;      // L4 skip info
+
+    uint64_t _numWords;     // Number of words in file
+    uint32_t _chunkNo;      // Chunk number
+
+    // Variables for validating skip information while reading
+    uint32_t _l1SkipDocId;
+    uint32_t _l1SkipDocIdPos;
+    uint64_t _l1SkipFeaturesPos;
+    uint32_t _l2SkipDocId;
+    uint32_t _l2SkipDocIdPos;
+    uint32_t _l2SkipL1SkipPos;
+    uint64_t _l2SkipFeaturesPos;
+    uint32_t _l3SkipDocId;
+    uint32_t _l3SkipDocIdPos;
+    uint32_t _l3SkipL1SkipPos;
+    uint32_t _l3SkipL2SkipPos;
+    uint64_t _l3SkipFeaturesPos;
+    uint32_t _l4SkipDocId;
+    uint32_t _l4SkipDocIdPos;
+    uint32_t _l4SkipL1SkipPos;
+    uint32_t _l4SkipL2SkipPos;
+    uint32_t _l4SkipL3SkipPos;
+    uint64_t _l4SkipFeaturesPos;
+
+    // Variable for validating chunk information while reading
+    uint64_t _featuresSize;
+    index::PostingListCounts _counts;
+
+    uint32_t _residue;            // Number of unread documents after word header
+    void read_common_word_doc_id_and_features(index::DocIdAndFeatures &features);
+    void read_word_start_with_skip();
+    void read_word_start();
+public:
+    Zc4PostingReader(bool dynamic_k);
+    Zc4PostingReader(const Zc4PostingReader &) = delete;
+    Zc4PostingReader(Zc4PostingReader &&) = delete;
+    Zc4PostingReader &operator=(const Zc4PostingReader &) = delete;
+    Zc4PostingReader &operator=(Zc4PostingReader &&) = delete;
+    ~Zc4PostingReader();
+    void read_doc_id_and_features(index::DocIdAndFeatures &features);
+    void set_counts(const index::PostingListCounts &counts);
+    void set_decode_features(DecodeContext *decode_features);
+    DecodeContext &get_decode_features() const { return *_decodeContext; }
+    ComprFileReadContext &get_read_context() { return _readContext; }
+    Zc4PostingParams &get_posting_params() { return _posting_params; }
+};
+
+extern template class Zc4PostingReader<false>;
+extern template class Zc4PostingReader<true>;
+
+}
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp
index 10c08af92cb..3ae2a631cb1 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposocc.cpp
@@ -16,14 +16,12 @@ using search::index::PostingListCountFileSeqRead;
 using search::index::PostingListCountFileSeqWrite;
 
 Zc4PosOccSeqRead::Zc4PosOccSeqRead(PostingListCountFileSeqRead *countFile)
-    : Zc4PostingSeqRead(countFile),
+    : Zc4PostingSeqRead(countFile, false),
       _fieldsParams(),
       _cookedDecodeContext(&_fieldsParams),
       _rawDecodeContext(&_fieldsParams)
 {
-    _decodeContext = &_cookedDecodeContext;
-    _decodeContext->setReadContext(&_readContext);
-    _readContext.setDecodeContext(_decodeContext);
+    _reader.set_decode_features(&_cookedDecodeContext);
 }
 
 
@@ -31,18 +29,17 @@ void
 Zc4PosOccSeqRead::
 setFeatureParams(const PostingListParams &params)
 {
-    bool oldCooked = _decodeContext == &_cookedDecodeContext;
+    bool oldCooked = &_reader.get_decode_features() == &_cookedDecodeContext;
     bool newCooked = oldCooked;
     params.get("cooked", newCooked);
     if (oldCooked != newCooked) {
         if (newCooked) {
             _cookedDecodeContext = _rawDecodeContext;
-            _decodeContext = &_cookedDecodeContext;
+            _reader.set_decode_features(&_cookedDecodeContext);
         } else {
             _rawDecodeContext = _cookedDecodeContext;
-            _decodeContext = &_rawDecodeContext;
+            _reader.set_decode_features(&_rawDecodeContext);
         }
-        _readContext.setDecodeContext(_decodeContext);
     }
 }
 
@@ -69,14 +66,12 @@ Zc4PosOccSeqWrite::Zc4PosOccSeqWrite(const Schema &schema,
 
 
 ZcPosOccSeqRead::ZcPosOccSeqRead(PostingListCountFileSeqRead *countFile)
-    : ZcPostingSeqRead(countFile),
+    : Zc4PostingSeqRead(countFile, true),
       _fieldsParams(),
       _cookedDecodeContext(&_fieldsParams),
       _rawDecodeContext(&_fieldsParams)
 {
-    _decodeContext = &_cookedDecodeContext;
-    _decodeContext->setReadContext(&_readContext);
-    _readContext.setDecodeContext(_decodeContext);
+    _reader.set_decode_features(&_cookedDecodeContext);
 }
 
 
@@ -84,18 +79,17 @@ void
 ZcPosOccSeqRead::
 setFeatureParams(const PostingListParams &params)
 {
-    bool oldCooked = _decodeContext == &_cookedDecodeContext;
+    bool oldCooked = &_reader.get_decode_features() == &_cookedDecodeContext;
     bool newCooked = oldCooked;
     params.get("cooked", newCooked);
     if (oldCooked != newCooked) {
         if (newCooked) {
             _cookedDecodeContext = _rawDecodeContext;
-            _decodeContext = &_cookedDecodeContext;
+            _reader.set_decode_features(&_cookedDecodeContext);
         } else {
             _rawDecodeContext = _cookedDecodeContext;
-            _decodeContext = &_rawDecodeContext;
+            _reader.set_decode_features(&_rawDecodeContext);
         }
-        _readContext.setDecodeContext(_decodeContext);
     }
 }
 
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposocc.h b/searchlib/src/vespa/searchlib/diskindex/zcposocc.h
index cd21fb02f33..1e0555116ce 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcposocc.h
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposocc.h
@@ -34,7 +34,7 @@ public:
 };
 
 
-class ZcPosOccSeqRead : public ZcPostingSeqRead
+class ZcPosOccSeqRead : public Zc4PostingSeqRead
 {
 private:
     bitcompression::PosOccFieldsParams _fieldsParams;
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp
index e40842737c9..a0203b64197 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.cpp
@@ -29,60 +29,19 @@ using bitcompression::FeatureEncodeContextBE;
 using vespalib::getLastErrorString;
 
 
-Zc4PostingSeqRead::
-Zc4PostingSeqRead(PostingListCountFileSeqRead *countFile)
+Zc4PostingSeqRead::Zc4PostingSeqRead(PostingListCountFileSeqRead *countFile, bool dynamic_k)
     : PostingListFileSeqRead(),
-      _decodeContext(),
-      _docIdK(0),
-      _prevDocId(0),
-      _numDocs(0),
-      _readContext(sizeof(uint64_t)),
+      _reader(dynamic_k),
       _file(),
-      _hasMore(false),
-      _dynamicK(false),
-      _lastDocId(0),
-      _minChunkDocs(1 << 30),
-      _minSkipDocs(64),
-      _docIdLimit(10000000),
-      _zcDocIds(),
-      _l1Skip(),
-      _l2Skip(),
-      _l3Skip(),
-      _l4Skip(),
       _numWords(0),
       _fileBitSize(0),
-      _chunkNo(0),
-      _l1SkipDocId(0),
-      _l1SkipDocIdPos(0),
-      _l1SkipFeaturesPos(0),
-      _l2SkipDocId(0),
-      _l2SkipDocIdPos(0),
-      _l2SkipL1SkipPos(0),
-      _l2SkipFeaturesPos(0),
-      _l3SkipDocId(0),
-      _l3SkipDocIdPos(0),
-      _l3SkipL1SkipPos(0),
-      _l3SkipL2SkipPos(0),
-      _l3SkipFeaturesPos(0),
-      _l4SkipDocId(0),
-      _l4SkipDocIdPos(0),
-      _l4SkipL1SkipPos(0),
-      _l4SkipL2SkipPos(0),
-      _l4SkipL3SkipPos(0),
-      _l4SkipFeaturesPos(0),
-      _featuresSize(0),
-      _countFile(countFile),
-      _headerBitLen(0),
-      _rangeEndOffset(0),
-      _readAheadEndOffset(0),
-      _wordStart(0),
-      _residue(0)
+      _countFile(countFile)
 {
     if (_countFile != nullptr) {
         PostingListParams params;
         _countFile->getParams(params);
-        params.get("docIdLimit", _docIdLimit);
-        params.get("minChunkDocs", _minChunkDocs);
+        params.get("docIdLimit", _reader.get_posting_params()._doc_id_limit);
+        params.get("minChunkDocs", _reader.get_posting_params()._min_chunk_docs);
     }
 }
 
@@ -91,387 +50,16 @@ Zc4PostingSeqRead::~Zc4PostingSeqRead()
 {
 }
 
-
-void
-Zc4PostingSeqRead::
-readCommonWordDocIdAndFeatures(DocIdAndFeatures &features)
-{
-    if ((_zcDocIds._valI >= _zcDocIds._valE) && _hasMore) {
-        readWordStart();    // Read start of next chunk
-    }
-    // Split docid & features.
-    assert(_zcDocIds._valI < _zcDocIds._valE);
-    uint32_t docIdPos = _zcDocIds.pos();
-    uint32_t docId = _prevDocId + 1 + _zcDocIds.decode();
-    features._docId = docId;
-    _prevDocId = docId;
-    assert(docId <= _lastDocId);
-    if (docId > _l1SkipDocId) {
-        _l1SkipDocIdPos += _l1Skip.decode() + 1;
-        assert(docIdPos == _l1SkipDocIdPos);
-        _l1SkipFeaturesPos += _l1Skip.decode() + 1;
-        uint64_t featuresPos = _decodeContext->getReadOffset();
-        assert(featuresPos == _l1SkipFeaturesPos);
-        (void) featuresPos;
-        if (docId > _l2SkipDocId) {
-            _l2SkipDocIdPos += _l2Skip.decode() + 1;
-            assert(docIdPos == _l2SkipDocIdPos);
-            _l2SkipFeaturesPos += _l2Skip.decode() + 1;
-            assert(featuresPos == _l2SkipFeaturesPos);
-            _l2SkipL1SkipPos += _l2Skip.decode() + 1;
-            assert(_l1Skip.pos() == _l2SkipL1SkipPos);
-            if (docId > _l3SkipDocId) {
-                _l3SkipDocIdPos += _l3Skip.decode() + 1;
-                assert(docIdPos == _l3SkipDocIdPos);
-                _l3SkipFeaturesPos += _l3Skip.decode() + 1;
-                assert(featuresPos == _l3SkipFeaturesPos);
-                _l3SkipL1SkipPos += _l3Skip.decode() + 1;
-                assert(_l1Skip.pos() == _l3SkipL1SkipPos);
-                _l3SkipL2SkipPos += _l3Skip.decode() + 1;
-                assert(_l2Skip.pos() == _l3SkipL2SkipPos);
-                if (docId > _l4SkipDocId) {
-                    _l4SkipDocIdPos += _l4Skip.decode() + 1;
-                    assert(docIdPos == _l4SkipDocIdPos);
-                    (void) docIdPos;
-                    _l4SkipFeaturesPos += _l4Skip.decode() + 1;
-                    assert(featuresPos == _l4SkipFeaturesPos);
-                    _l4SkipL1SkipPos += _l4Skip.decode() + 1;
-                    assert(_l1Skip.pos() == _l4SkipL1SkipPos);
-                    _l4SkipL2SkipPos += _l4Skip.decode() + 1;
-                    assert(_l2Skip.pos() == _l4SkipL2SkipPos);
-                    _l4SkipL3SkipPos += _l4Skip.decode() + 1;
-                    assert(_l3Skip.pos() == _l4SkipL3SkipPos);
-                    _l4SkipDocId += _l4Skip.decode() + 1;
-                    assert(_l4SkipDocId <= _lastDocId);
-                    assert(_l4SkipDocId >= docId);
-                }
-                _l3SkipDocId += _l3Skip.decode() + 1;
-                assert(_l3SkipDocId <= _lastDocId);
-                assert(_l3SkipDocId <= _l4SkipDocId);
-                assert(_l3SkipDocId >= docId);
-            }
-            _l2SkipDocId += _l2Skip.decode() + 1;
-            assert(_l2SkipDocId <= _lastDocId);
-            assert(_l2SkipDocId <= _l4SkipDocId);
-            assert(_l2SkipDocId <= _l3SkipDocId);
-            assert(_l2SkipDocId >= docId);
-        }
-        _l1SkipDocId += _l1Skip.decode() + 1;
-        assert(_l1SkipDocId <= _lastDocId);
-        assert(_l1SkipDocId <= _l4SkipDocId);
-        assert(_l1SkipDocId <= _l3SkipDocId);
-        assert(_l1SkipDocId <= _l2SkipDocId);
-        assert(_l1SkipDocId >= docId);
-    }
-    if (docId < _lastDocId) {
-        // Assert more space available when not yet at last docid
-        assert(_zcDocIds._valI < _zcDocIds._valE);
-    } else {
-        // Assert that space has been used when at last docid
-        assert(_zcDocIds._valI == _zcDocIds._valE);
-        // Assert that we've read to end of skip info
-        assert(_l1SkipDocId == _lastDocId);
-        assert(_l2SkipDocId == _lastDocId);
-        assert(_l3SkipDocId == _lastDocId);
-        assert(_l4SkipDocId == _lastDocId);
-        if (!_hasMore) {
-            _chunkNo = 0;
-        }
-    }
-    _decodeContext->readFeatures(features);
-    --_residue;
-}
-
-
-void
-Zc4PostingSeqRead::
-readDocIdAndFeatures(DocIdAndFeatures &features)
-{
-    if (_residue == 0 && !_hasMore) {
-        if (_rangeEndOffset != 0) {
-            DecodeContext &d = *_decodeContext;
-            uint64_t curOffset = d.getReadOffset();
-            assert(curOffset <= _rangeEndOffset);
-            if (curOffset < _rangeEndOffset) {
-                readWordStart();
-            }
-        }
-        if (_residue == 0) {
-            // Don't read past end of posting list.
-            features.clear(static_cast<uint32_t>(-1));
-            return;
-        }
-    }
-    if (_lastDocId > 0) {
-        return readCommonWordDocIdAndFeatures(features);
-    }
-    // Interleaves docid & features
-    typedef FeatureEncodeContextBE EC;
-    DecodeContext &d = *_decodeContext;
-    uint32_t length;
-    uint64_t val64;
-    UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
-
-    UC64BE_DECODEEXPGOLOMB_SMALL_NS(o,
-                                    K_VALUE_ZCPOSTING_DELTA_DOCID,
-                                    EC);
-    uint32_t docId = _prevDocId + 1 + val64;
-    features._docId = docId;
-    _prevDocId = docId;
-    UC64_DECODECONTEXT_STORE(o, d._);
-    if (__builtin_expect(oCompr >= d._valE, false)) {
-        _readContext.readComprBuffer();
-    }
-    _decodeContext->readFeatures(features);
-    --_residue;
-}
-
-
-void
-Zc4PostingSeqRead::readWordStartWithSkip()
-{
-    typedef FeatureEncodeContextBE EC;
-    DecodeContext &d = *_decodeContext;
-    UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
-    uint32_t length;
-    uint64_t val64;
-    const uint64_t *valE = d._valE;
-
-    if (_hasMore) {
-        ++_chunkNo;
-    } else {
-        _chunkNo = 0;
-    }
-    assert(_numDocs >= _minSkipDocs || _hasMore);
-    bool hasMore = false;
-    if (__builtin_expect(_numDocs >= _minChunkDocs, false)) {
-        hasMore = static_cast<int64_t>(oVal) < 0;
-        oVal <<= 1;
-        length = 1;
-        UC64BE_READBITS_NS(o, EC);
-    }
-    if (_dynamicK) {
-        _docIdK = EC::calcDocIdK((_hasMore || hasMore) ? 1 : _numDocs,
-                                 _docIdLimit);
-    }
-    if (_hasMore || hasMore) {
-        if (_rangeEndOffset == 0) {
-            assert(hasMore == (_chunkNo + 1 < _counts._segments.size()));
-            assert(_numDocs == _counts._segments[_chunkNo]._numDocs);
-        }
-        if (hasMore) {
-            assert(_numDocs >= _minSkipDocs);
-            assert(_numDocs >= _minChunkDocs);
-        }
-    } else {
-        assert(_numDocs >= _minSkipDocs);
-        if (_rangeEndOffset == 0) {
-            assert(_numDocs == _counts._numDocs);
-        }
-    }
-    if (__builtin_expect(oCompr >= valE, false)) {
-        UC64_DECODECONTEXT_STORE(o, d._);
-        _readContext.readComprBuffer();
-        valE = d._valE;
-        UC64_DECODECONTEXT_LOAD(o, d._);
-    }
-    UC64BE_DECODEEXPGOLOMB_NS(o,
-                              K_VALUE_ZCPOSTING_DOCIDSSIZE,
-                              EC);
-    uint32_t docIdsSize = val64 + 1;
-    UC64BE_DECODEEXPGOLOMB_NS(o,
-                              K_VALUE_ZCPOSTING_L1SKIPSIZE,
-                              EC);
-    uint32_t l1SkipSize = val64;
-    if (__builtin_expect(oCompr >= valE, false)) {
-        UC64_DECODECONTEXT_STORE(o, d._);
-        _readContext.readComprBuffer();
-        valE = d._valE;
-        UC64_DECODECONTEXT_LOAD(o, d._);
-    }
-    uint32_t l2SkipSize = 0;
-    if (l1SkipSize != 0) {
-        UC64BE_DECODEEXPGOLOMB_NS(o,
-                                  K_VALUE_ZCPOSTING_L2SKIPSIZE,
-                                  EC);
-        l2SkipSize = val64;
-    }
-    uint32_t l3SkipSize = 0;
-    if (l2SkipSize != 0) {
-        UC64BE_DECODEEXPGOLOMB_NS(o,
-                                  K_VALUE_ZCPOSTING_L3SKIPSIZE,
-                                  EC);
-        l3SkipSize = val64;
-    }
-    if (__builtin_expect(oCompr >= valE, false)) {
-        UC64_DECODECONTEXT_STORE(o, d._);
-        _readContext.readComprBuffer();
-        valE = d._valE;
-        UC64_DECODECONTEXT_LOAD(o, d._);
-    }
-    uint32_t l4SkipSize = 0;
-    if (l3SkipSize != 0) {
-        UC64BE_DECODEEXPGOLOMB_NS(o,
-                                  K_VALUE_ZCPOSTING_L4SKIPSIZE,
-                                  EC);
-        l4SkipSize = val64;
-    }
-    UC64BE_DECODEEXPGOLOMB_NS(o,
-                              K_VALUE_ZCPOSTING_FEATURESSIZE,
-                              EC);
-    _featuresSize = val64;
-    if (__builtin_expect(oCompr >= valE, false)) {
-        UC64_DECODECONTEXT_STORE(o, d._);
-        _readContext.readComprBuffer();
-        valE = d._valE;
-        UC64_DECODECONTEXT_LOAD(o, d._);
-    }
-    if (_dynamicK) {
-        UC64BE_DECODEEXPGOLOMB_NS(o,
-                                  _docIdK,
-                                  EC);
-    } else {
-        UC64BE_DECODEEXPGOLOMB_NS(o,
-                                  K_VALUE_ZCPOSTING_LASTDOCID,
-                                  EC);
-    }
-    _lastDocId = _docIdLimit - 1 - val64;
-    if (_hasMore || hasMore) {
-        if (_rangeEndOffset == 0) {
-            assert(_lastDocId == _counts._segments[_chunkNo]._lastDoc);
-        }
-    }
-
-    if (__builtin_expect(oCompr >= valE, false)) {
-        UC64_DECODECONTEXT_STORE(o, d._);
-        _readContext.readComprBuffer();
-        valE = d._valE;
-        UC64_DECODECONTEXT_LOAD(o, d._);
-    }
-    uint64_t bytePad = oPreRead & 7;
-    if (bytePad > 0) {
-        length = bytePad;
-        oVal <<= length;
-        UC64BE_READBITS_NS(o, EC);
-    }
-    UC64_DECODECONTEXT_STORE(o, d._);
-    if (__builtin_expect(oCompr >= valE, false)) {
-        _readContext.readComprBuffer();
-    }
-    _zcDocIds.clearReserve(docIdsSize);
-    _l1Skip.clearReserve(l1SkipSize);
-    _l2Skip.clearReserve(l2SkipSize);
-    _l3Skip.clearReserve(l3SkipSize);
-    _l4Skip.clearReserve(l4SkipSize);
-    _decodeContext->readBytes(_zcDocIds._valI, docIdsSize);
-    _zcDocIds._valE = _zcDocIds._valI + docIdsSize;
-    if (l1SkipSize > 0) {
-        _decodeContext->readBytes(_l1Skip._valI, l1SkipSize);
-    }
-    _l1Skip._valE = _l1Skip._valI + l1SkipSize;
-    if (l2SkipSize > 0) {
-        _decodeContext->readBytes(_l2Skip._valI, l2SkipSize);
-    }
-    _l2Skip._valE = _l2Skip._valI + l2SkipSize;
-    if (l3SkipSize > 0) {
-        _decodeContext->readBytes(_l3Skip._valI, l3SkipSize);
-    }
-    _l3Skip._valE = _l3Skip._valI + l3SkipSize;
-    if (l4SkipSize > 0) {
-        _decodeContext->readBytes(_l4Skip._valI, l4SkipSize);
-    }
-    _l4Skip._valE = _l4Skip._valI + l4SkipSize;
-
-    if (l1SkipSize > 0) {
-        _l1SkipDocId = _l1Skip.decode() + 1 + _prevDocId;
-    } else {
-        _l1SkipDocId = _lastDocId;
-    }
-    if (l2SkipSize > 0) {
-        _l2SkipDocId = _l2Skip.decode() + 1 + _prevDocId;
-    } else {
-        _l2SkipDocId = _lastDocId;
-    }
-    if (l3SkipSize > 0) {
-        _l3SkipDocId = _l3Skip.decode() + 1 + _prevDocId;
-    } else {
-        _l3SkipDocId = _lastDocId;
-    }
-    if (l4SkipSize > 0) {
-        _l4SkipDocId = _l4Skip.decode() + 1 + _prevDocId;
-    } else {
-        _l4SkipDocId = _lastDocId;
-    }
-    _l1SkipDocIdPos = 0;
-    _l1SkipFeaturesPos = _decodeContext->getReadOffset();
-    _l2SkipDocIdPos = 0;
-    _l2SkipL1SkipPos = 0;
-    _l2SkipFeaturesPos = _decodeContext->getReadOffset();
-    _l3SkipDocIdPos = 0;
-    _l3SkipL1SkipPos = 0;
-    _l3SkipL2SkipPos = 0;
-    _l3SkipFeaturesPos = _decodeContext->getReadOffset();
-    _l4SkipDocIdPos = 0;
-    _l4SkipL1SkipPos = 0;
-    _l4SkipL2SkipPos = 0;
-    _l4SkipL3SkipPos = 0;
-    _l4SkipFeaturesPos = _decodeContext->getReadOffset();
-    _hasMore = hasMore;
-    // Decode context is now positioned at start of features
-}
-
-
 void
-Zc4PostingSeqRead::readWordStart()
+Zc4PostingSeqRead::readDocIdAndFeatures(DocIdAndFeatures &features)
 {
-    typedef FeatureEncodeContextBE EC;
-    UC64_DECODECONTEXT_CONSTRUCTOR(o, _decodeContext->_);
-    uint32_t length;
-    uint64_t val64;
-    const uint64_t *valE = _decodeContext->_valE;
-
-    UC64BE_DECODEEXPGOLOMB_NS(o,
-                              K_VALUE_ZCPOSTING_NUMDOCS,
-                              EC);
-    UC64_DECODECONTEXT_STORE(o, _decodeContext->_);
-    if (oCompr >= valE) {
-        _readContext.readComprBuffer();
-    }
-    _numDocs = static_cast<uint32_t>(val64) + 1;
-    _residue = _numDocs;
-    _prevDocId = _hasMore ? _lastDocId : 0u;
-    if (_rangeEndOffset == 0) {
-        assert(_numDocs <= _counts._numDocs);
-        assert(_numDocs == _counts._numDocs ||
-               _numDocs >= _minChunkDocs ||
-               _hasMore);
-    }
-
-    if (_numDocs >= _minSkipDocs || _hasMore) {
-        readWordStartWithSkip();
-        // Decode context is not positioned at start of features
-    } else {
-        if (_dynamicK) {
-            _docIdK = EC::calcDocIdK(_numDocs, _docIdLimit);
-        }
-        _lastDocId = 0u;
-        // Decode context is not positioned at start of docids & features
-    }
+    _reader.read_doc_id_and_features(features);
 }
 
-
 void
 Zc4PostingSeqRead::readCounts(const PostingListCounts &counts)
 {
-    assert(!_hasMore);  // Previous words must have been read.
-
-    _counts = counts;
-
-    assert((_counts._numDocs == 0) == (_counts._bitLength == 0));
-    if (_counts._numDocs > 0) {
-        _wordStart = _decodeContext->getReadOffset();
-        readWordStart();
-    }
+    _reader.set_counts(counts);
 }
 
 
@@ -484,16 +72,17 @@ Zc4PostingSeqRead::open(const vespalib::string &name,
     }
     bool res = _file.OpenReadOnly(name.c_str());
     if (res) {
-        _readContext.setFile(&_file);
-        _readContext.setFileSize(_file.GetSize());
-        DecodeContext &d = *_decodeContext;
-        _readContext.allocComprBuf(65536u, 32768u);
+        auto &readContext = _reader.get_read_context();
+        readContext.setFile(&_file);
+        readContext.setFileSize(_file.GetSize());
+        auto &d = _reader.get_decode_features();
+        readContext.allocComprBuf(65536u, 32768u);
         d.emptyBuffer(0);
-        _readContext.readComprBuffer();
+        readContext.readComprBuffer();
 
         readHeader();
         if (d._valI >= d._valE) {
-            _readContext.readComprBuffer();
+            readContext.readComprBuffer();
         }
     } else {
         LOG(error, "could not open %s: %s",
@@ -506,9 +95,10 @@ Zc4PostingSeqRead::open(const vespalib::string &name,
 bool
 Zc4PostingSeqRead::close()
 {
-    _readContext.dropComprBuf();
+    auto &readContext = _reader.get_read_context();
+    readContext.dropComprBuf();
     _file.Close();
-    _readContext.setFile(nullptr);
+    readContext.setFile(nullptr);
     return true;
 }
 
@@ -524,29 +114,30 @@ Zc4PostingSeqRead::getParams(PostingListParams &params)
         uint32_t countMinChunkDocs = 0;
         countParams.get("docIdLimit", countDocIdLimit);
         countParams.get("minChunkDocs", countMinChunkDocs);
-        assert(_docIdLimit == countDocIdLimit);
-        assert(_minChunkDocs == countMinChunkDocs);
+        assert(_reader.get_posting_params()._doc_id_limit == countDocIdLimit);
+        assert(_reader.get_posting_params()._min_chunk_docs == countMinChunkDocs);
     } else {
         params.clear();
-        params.set("docIdLimit", _docIdLimit);
-        params.set("minChunkDocs", _minChunkDocs);
+        params.set("docIdLimit", _reader.get_posting_params()._doc_id_limit);
+        params.set("minChunkDocs", _reader.get_posting_params()._min_chunk_docs);
     }
-    params.set("minSkipDocs", _minSkipDocs);
+    params.set("minSkipDocs", _reader.get_posting_params()._min_skip_docs);
 }
 
 
 void
 Zc4PostingSeqRead::getFeatureParams(PostingListParams &params)
 {
-    _decodeContext->getParams(params);
+    _reader.get_decode_features().getParams(params);
 }
 
 
 void
 Zc4PostingSeqRead::readHeader()
 {
-    FeatureDecodeContextBE &d = *_decodeContext;
-    const vespalib::string &myId = _dynamicK ? myId5 : myId4;
+    FeatureDecodeContextBE &d = _reader.get_decode_features();
+    auto &posting_params = _reader.get_posting_params();
+    const vespalib::string &myId = posting_params._dynamic_k ? myId5 : myId4;
 
     vespalib::FileHeader header;
     d.readHeader(header, _file.getSize());
@@ -571,9 +162,9 @@ Zc4PostingSeqRead::readHeader()
     (void) myId;
     assert(header.getTag("format.1").asString() == d.getIdentifier());
     _numWords = header.getTag("numWords").asInteger();
-    _minChunkDocs = header.getTag("minChunkDocs").asInteger();
-    _docIdLimit = header.getTag("docIdLimit").asInteger();
-    _minSkipDocs = header.getTag("minSkipDocs").asInteger();
+    posting_params._min_chunk_docs = header.getTag("minChunkDocs").asInteger();
+    posting_params._doc_id_limit = header.getTag("docIdLimit").asInteger();
+    posting_params._min_skip_docs = header.getTag("minSkipDocs").asInteger();
     assert(header.getTag("endian").asString() == "big");
     // Read feature decoding specific subheader
     d.readHeader(header, "features.");
@@ -585,38 +176,9 @@ Zc4PostingSeqRead::readHeader()
 
 
 const vespalib::string &
-Zc4PostingSeqRead::getIdentifier()
-{
-    return myId4;
-}
-
-
-uint64_t
-Zc4PostingSeqRead::getCurrentPostingOffset() const
+Zc4PostingSeqRead::getIdentifier(bool dynamic_k)
 {
-    FeatureDecodeContextBE &d = *_decodeContext;
-    return d.getReadOffset() - _headerBitLen;
-}
-
-
-void
-Zc4PostingSeqRead::setPostingOffset(uint64_t offset,
-                                    uint64_t endOffset,
-                                    uint64_t readAheadOffset)
-{
-    assert(_residue == 0);  // Only to be called between posting lists
-
-    FeatureDecodeContextBE &d = *_decodeContext;
-
-    _rangeEndOffset = endOffset + _headerBitLen;
-    _readAheadEndOffset = readAheadOffset +  _headerBitLen;
-    _readContext.setStopOffset(_readAheadEndOffset, false);
-    uint64_t newOffset = offset + _headerBitLen;
-    if (newOffset != d.getReadOffset()) {
-        _readContext.setPosition(newOffset);
-        assert(newOffset == d.getReadOffset());
-        _readContext.readComprBuffer();
-    }
+    return (dynamic_k ? myId5 : myId4);
 }
 
 
@@ -809,65 +371,6 @@ getFeatureParams(PostingListParams &params)
 }
 
 
-ZcPostingSeqRead::ZcPostingSeqRead(PostingListCountFileSeqRead *countFile)
-    : Zc4PostingSeqRead(countFile)
-{
-    _dynamicK = true;
-}
-
-
-void
-ZcPostingSeqRead::
-readDocIdAndFeatures(DocIdAndFeatures &features)
-{
-    if (_residue == 0 && !_hasMore) {
-        if (_rangeEndOffset != 0) {
-            DecodeContext &d = *_decodeContext;
-            uint64_t curOffset = d.getReadOffset();
-            assert(curOffset <= _rangeEndOffset);
-            if (curOffset < _rangeEndOffset) {
-                readWordStart();
-            }
-        }
-        if (_residue == 0) {
-            // Don't read past end of posting list.
-            features.clear(static_cast<uint32_t>(-1));
-            return;
-        }
-    }
-    if (_lastDocId > 0) {
-        readCommonWordDocIdAndFeatures(features);
-        return;
-    }
-    // Interleaves docid & features
-    typedef FeatureEncodeContextBE EC;
-    DecodeContext &d = *_decodeContext;
-    uint32_t length;
-    uint64_t val64;
-    UC64_DECODECONTEXT_CONSTRUCTOR(o, d._);
-
-    UC64BE_DECODEEXPGOLOMB_SMALL_NS(o,
-                                    _docIdK,
-                                    EC);
-    uint32_t docId = _prevDocId + 1 + val64;
-    features._docId = docId;
-    _prevDocId = docId;
-    UC64_DECODECONTEXT_STORE(o, d._);
-    if (__builtin_expect(oCompr >= d._valE, false)) {
-        _readContext.readComprBuffer();
-    }
-    _decodeContext->readFeatures(features);
-    --_residue;
-}
-
-
-const vespalib::string &
-ZcPostingSeqRead::getIdentifier()
-{
-    return myId5;
-}
-
-
 ZcPostingSeqWrite::ZcPostingSeqWrite(PostingListCountFileSeqWrite *countFile)
     : Zc4PostingSeqWrite(countFile)
 {
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposting.h b/searchlib/src/vespa/searchlib/diskindex/zcposting.h
index 96cc306cea8..01049e720a9 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcposting.h
+++ b/searchlib/src/vespa/searchlib/diskindex/zcposting.h
@@ -3,8 +3,10 @@
 #pragma once
 
 #include "zc4_posting_writer.h"
+#include "zc4_posting_reader.h"
 #include <vespa/searchlib/index/postinglistfile.h>
 #include <vespa/fastos/file.h>
+#include "zc4_posting_params.h"
 
 namespace search::index {
     class PostingListCountFileSeqRead;
@@ -19,63 +21,14 @@ class Zc4PostingSeqRead : public index::PostingListFileSeqRead
     Zc4PostingSeqRead &operator=(const Zc4PostingSeqRead &);
 
 protected:
-    typedef bitcompression::FeatureDecodeContextBE DecodeContext;
-    typedef bitcompression::FeatureEncodeContextBE EncodeContext;
-
-    DecodeContext *_decodeContext;
-    uint32_t _docIdK;
-    uint32_t _prevDocId;    // Previous document id
-    uint32_t _numDocs;      // Documents in chunk or word
-    search::ComprFileReadContext _readContext;
+    Zc4PostingReader<true> _reader;
     FastOS_File _file;
-    bool _hasMore;
-    bool _dynamicK;         // Caclulate EG compression parameters ?
-    uint32_t _lastDocId;    // last document in chunk or word
-    uint32_t _minChunkDocs; // # of documents needed for chunking
-    uint32_t _minSkipDocs;  // # of documents needed for skipping
-    uint32_t _docIdLimit;   // Limit for document ids (docId < docIdLimit)
-
-    ZcBuf _zcDocIds;    // Document id deltas
-    ZcBuf _l1Skip;      // L1 skip info
-    ZcBuf _l2Skip;      // L2 skip info
-    ZcBuf _l3Skip;      // L3 skip info
-    ZcBuf _l4Skip;      // L4 skip info
-
     uint64_t _numWords;     // Number of words in file
     uint64_t _fileBitSize;
-    uint32_t _chunkNo;      // Chunk number
-
-    // Variables for validating skip information while reading
-    uint32_t _l1SkipDocId;
-    uint32_t _l1SkipDocIdPos;
-    uint64_t _l1SkipFeaturesPos;
-    uint32_t _l2SkipDocId;
-    uint32_t _l2SkipDocIdPos;
-    uint32_t _l2SkipL1SkipPos;
-    uint64_t _l2SkipFeaturesPos;
-    uint32_t _l3SkipDocId;
-    uint32_t _l3SkipDocIdPos;
-    uint32_t _l3SkipL1SkipPos;
-    uint32_t _l3SkipL2SkipPos;
-    uint64_t _l3SkipFeaturesPos;
-    uint32_t _l4SkipDocId;
-    uint32_t _l4SkipDocIdPos;
-    uint32_t _l4SkipL1SkipPos;
-    uint32_t _l4SkipL2SkipPos;
-    uint32_t _l4SkipL3SkipPos;
-    uint64_t _l4SkipFeaturesPos;
-
-    // Variable for validating chunk information while reading
-    uint64_t _featuresSize;
     index::PostingListCountFileSeqRead *const _countFile;
-
     uint64_t _headerBitLen;       // Size of file header in bits
-    uint64_t _rangeEndOffset;     // End offset for word pair
-    uint64_t _readAheadEndOffset; // Readahead end offset for word pair
-    uint64_t _wordStart;          // last word header position
-    uint32_t _residue;            // Number of unread documents after word header
 public:
-    Zc4PostingSeqRead(index::PostingListCountFileSeqRead *countFile);
+    Zc4PostingSeqRead(index::PostingListCountFileSeqRead *countFile, bool dynamic_k);
 
     ~Zc4PostingSeqRead();
 
@@ -83,11 +36,6 @@ public:
     typedef index::PostingListCounts PostingListCounts;
     typedef index::PostingListParams PostingListParams;
 
-    /**
-     * Read document id and features for common word.
-     */
-    virtual void readCommonWordDocIdAndFeatures(DocIdAndFeatures &features);
-
     void readDocIdAndFeatures(DocIdAndFeatures &features) override;
     void readCounts(const PostingListCounts &counts) override; // Fill in for next word
     bool open(const vespalib::string &name, const TuneFileSeqRead &tuneFileRead) override;
@@ -97,28 +45,7 @@ public:
     void readWordStartWithSkip();
     void readWordStart();
     void readHeader();
-    static const vespalib::string &getIdentifier();
-
-    // Methods used when generating posting list for common word pairs.
-
-    /*
-     * Get current posting offset, measured in bits.  First posting list
-     * starts at 0, i.e.  file header is not accounted for here.
-     *
-     * @return current posting offset, measured in bits.
-     */
-    uint64_t getCurrentPostingOffset() const override;
-
-    /**
-     * Set current posting offset, measured in bits.  First posting
-     * list starts at 0, i.e.  file header is not accounted for here.
-     *
-     * @param Offset start of posting lists for word pair.
-     * @param endOffset end of posting lists for word pair.
-     * @param readAheadOffset end of posting list for either this or a
-     *               later word pair, depending on disk seek cost.
-     */
-    void setPostingOffset(uint64_t offset, uint64_t endOffset, uint64_t readAheadOffset) override;
+    static const vespalib::string &getIdentifier(bool dynamic_k);
 };
 
 
@@ -161,15 +88,6 @@ public:
     void updateHeader();
 };
 
-
-class ZcPostingSeqRead : public Zc4PostingSeqRead
-{
-public:
-    ZcPostingSeqRead(index::PostingListCountFileSeqRead *countFile);
-    void readDocIdAndFeatures(DocIdAndFeatures &features) override;
-    static const vespalib::string &getIdentifier();
-};
-
 class ZcPostingSeqWrite : public Zc4PostingSeqWrite
 {
 public:
diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp
index 0f0860f9145..52c6b85a0b8 100644
--- a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp
+++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp
@@ -6,8 +6,6 @@
 namespace search::index {
 
 PostingListFileSeqRead::PostingListFileSeqRead()
-    : _counts(),
-      _residueDocs(0)
 {
 }
 
diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h
index 194ac519a19..1e7dde7f139 100644
--- a/searchlib/src/vespa/searchlib/index/postinglistfile.h
+++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h
@@ -19,9 +19,6 @@ class DocIdAndFeatures;
  * for words.
  */
 class PostingListFileSeqRead {
-protected:
-    PostingListCounts _counts;
-    unsigned int _residueDocs;  // Docids left to read for word
 public:
     PostingListFileSeqRead();
 
@@ -63,34 +60,6 @@ public:
      * Get current (word, docid) feature parameters.
      */
     virtual void getFeatureParams(PostingListParams &params);
-
-    // Methods used when generating posting list for common word pairs.
-
-    /*
-     * Get current posting offset, measured in bits.  First posting list
-     * starts at 0, i.e.  file header is not accounted for here.
-     *
-     * @return current posting offset, measured in bits.
-     */
-    virtual uint64_t getCurrentPostingOffset() const = 0;
-
-    /**
-     * Set current posting offset, measured in bits.  First posting
-     * list starts at 0, i.e.  file header is not accounted for here.
-     *
-     * @param Offset start of posting lists for word pair.
-     * @param endOffset end of posting lists for word pair.
-     * @param readAheadOffset end of posting list for either this or a
-     *               later word pair, depending on disk seek cost.
-     */
-    virtual void setPostingOffset(uint64_t offset, uint64_t endOffset, uint64_t readAheadOffset) = 0;
-
-    /**
-     * Get counts read by last readCounts().
-     */
-    const PostingListCounts &getCounts() const { return _counts; }
-
-    PostingListCounts &getCounts() { return _counts; }
 };
 
 /**
author	Tor Egge <Tor.Egge@broadpark.no>	2019-04-26 11:08:17 +0200
committer	Tor Egge <Tor.Egge@broadpark.no>	2019-04-28 23:49:16 +0200
commit	7553e0390c1ceb3834cba62774b3ddc77a6944d1 (patch)
tree	0f524636b34a18fa5948889d2f1b3f01a78c9881 /searchlib
parent	e287c58dce2df5eb5451a61000aab34553698a55 (diff)