From b9b19ad3198d8e071130b11fd582f6edc5d17ea8 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 30 Apr 2019 13:50:31 +0200 Subject: Simplify reading of posting list header. --- .../vespa/searchlib/bitcompression/compression.h | 58 ++++++---------- .../searchlib/diskindex/zc4_posting_header.cpp | 77 ++++------------------ .../vespa/searchlib/diskindex/zc4_posting_header.h | 1 - .../searchlib/test/fakedata/fakezcfilterocc.cpp | 6 +- 4 files changed, 35 insertions(+), 107 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h index 33b5e0b0975..b9166f675aa 100644 --- a/searchlib/src/vespa/searchlib/bitcompression/compression.h +++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h @@ -1141,7 +1141,7 @@ public: DecodeContext64Base() : search::ComprFileDecodeContext(), _valI(nullptr), - _valE(nullptr), + _valE(static_cast(nullptr) - 1), _realValE(nullptr), _val(0), _cacheInt(0), @@ -1298,6 +1298,8 @@ public: } } virtual uint64_t readBits(uint32_t length) = 0; + virtual void align(uint32_t alignment) = 0; + virtual uint64_t decode_exp_golomb(int k) = 0; void readBytes(uint8_t *buf, size_t len); uint32_t readHeader(vespalib::GenericHeader &header, int64_t fileSize); }; @@ -1321,7 +1323,7 @@ public: DecodeContext64(const uint64_t *compr, int bitOffset) : DecodeContext64Base(compr + 1, - nullptr, + static_cast(nullptr) - 1, nullptr, 0, EC::bswap(*compr), @@ -1407,10 +1409,12 @@ public: }; void skipBits(int bits) override { + readComprBufferIfNeeded(); while (bits >= 64) { _val = 0; ReadBits(64, _val, _cacheInt, _preRead, _valI); bits -= 64; + readComprBufferIfNeeded(); } if (bits > 0) { if (bigEndian) { @@ -1419,6 +1423,7 @@ public: _val >>= bits; } ReadBits(bits, _val, _cacheInt, _preRead, _valI); + readComprBufferIfNeeded(); } } @@ -1474,20 +1479,32 @@ public: _val = 0; } UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC); + readComprBufferIfNeeded(); return res; } + uint64_t decode_exp_golomb(int k) override { + uint32_t length; + uint64_t val64; + UC64_DECODEEXPGOLOMB(_val, _valI, _preRead, _cacheInt, k, EC); + readComprBufferIfNeeded(); + return val64; + } + void - align(uint32_t alignment) + align(uint32_t alignment) override { + readComprBufferIfNeeded(); uint64_t pad = (- getReadOffset()) & (alignment - 1); while (pad > 64) { (void) readBits(64); pad -= 64; + readComprBufferIfNeeded(); } if (pad > 0) { (void) readBits(pad); } + readComprBufferIfNeeded(); } /* @@ -1576,41 +1593,6 @@ public: */ virtual void getParams(PostingListParams ¶ms) const; - - void skipBits(int bits) override { - readComprBufferIfNeeded(); - while (bits >= 64) { - _val = 0; - ReadBits(64, _val, _cacheInt, _preRead, _valI); - bits -= 64; - readComprBufferIfNeeded(); - } - if (bits > 0) { - if (bigEndian) { - _val <<= bits; - } else { - _val >>= bits; - } - ReadBits(bits, _val, _cacheInt, _preRead, _valI); - readComprBufferIfNeeded(); - } - } - - void - align(uint32_t alignment) - { - readComprBufferIfNeeded(); - uint64_t pad = (- getReadOffset()) & (alignment - 1); - while (pad > 64) { - (void) readBits(64); - pad -= 64; - readComprBufferIfNeeded(); - } - if (pad > 0) { - (void) readBits(pad); - } - readComprBufferIfNeeded(); - } }; typedef FeatureDecodeContext FeatureDecodeContextBE; diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp index 5288d054ef0..2149a44f5ce 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp @@ -20,35 +20,13 @@ Zc4PostingHeader::Zc4PostingHeader() { } -template void Zc4PostingHeader::read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams ¶ms) { - using EC = bitcompression::FeatureEncodeContext; - UC64_DECODECONTEXT_CONSTRUCTOR(o, decode_context._); - uint32_t length; - uint64_t val64; - - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC); - _num_docs = static_cast(val64) + 1; - bool has_more = false; - if (__builtin_expect(_num_docs >= params._min_chunk_docs, false)) { - if (bigEndian) { - has_more = static_cast(oVal) < 0; - oVal <<= 1; - length = 1; - } else { - has_more = (oVal & 1) != 0; - oVal >>= 1; - length = 1; - } - UC64_READBITS_NS(o, EC); - } - if (params._dynamic_k) { - _doc_id_k = EC::calcDocIdK((_has_more || has_more) ? 1 : _num_docs, params._doc_id_limit); - } else { - _doc_id_k = K_VALUE_ZCPOSTING_LASTDOCID; - } + using EC = bitcompression::FeatureEncodeContext; + _num_docs = decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_NUMDOCS) + 1; + bool has_more = (_num_docs >= params._min_chunk_docs) ? (decode_context.readBits(1) != 0) : false; + _doc_id_k = params._dynamic_k ? EC::calcDocIdK((_has_more || has_more) ? 1 : _num_docs, params._doc_id_limit) : K_VALUE_ZCPOSTING_LASTDOCID; if (_num_docs < params._min_skip_docs && !_has_more) { _doc_ids_size = 0; _l1_skip_size = 0; @@ -58,47 +36,16 @@ Zc4PostingHeader::read(bitcompression::DecodeContext64Base &decode_context, cons _features_size = 0; _last_doc_id = 0; } else { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC); - _doc_ids_size = val64 + 1; - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC); - _l1_skip_size = val64; - if (_l1_skip_size != 0) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC); - _l2_skip_size = val64; - } - if (_l2_skip_size != 0) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC); - _l3_skip_size = val64; - } - if (_l3_skip_size != 0) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC); - _l4_skip_size = val64; - } - if (params._encode_features) { - UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC); - _features_size = val64; - } else { - _features_size = 0; - } - UC64_DECODEEXPGOLOMB_NS(o, _doc_id_k, EC); - _last_doc_id = params._doc_id_limit - 1 - val64; - uint64_t bytePad = oPreRead & 7; - if (bytePad > 0) { - length = bytePad; - UC64_READBITS_NS(o, EC); - } + _doc_ids_size = decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_DOCIDSSIZE) + 1; + _l1_skip_size = decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L1SKIPSIZE); + _l2_skip_size = (_l1_skip_size != 0) ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L2SKIPSIZE) : 0; + _l3_skip_size = (_l2_skip_size != 0) ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L3SKIPSIZE) : 0; + _l4_skip_size = (_l3_skip_size != 0) ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L4SKIPSIZE) : 0; + _features_size = params._encode_features ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_FEATURESSIZE) : 0; + _last_doc_id = params._doc_id_limit - 1 - decode_context.decode_exp_golomb(_doc_id_k); + decode_context.align(8); } - UC64_DECODECONTEXT_STORE(o, decode_context._); _has_more = has_more; } -template -void -Zc4PostingHeader::read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams ¶ms); - -template -void -Zc4PostingHeader::read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams ¶ms); - - } diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h index 7382f59d176..d4032864e16 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h +++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h @@ -27,7 +27,6 @@ struct Zc4PostingHeader { Zc4PostingHeader(); - template void read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams ¶ms); }; diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp index 25997faae24..f8fe31773c2 100644 --- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp +++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp @@ -211,7 +211,7 @@ FakeZcFilterOcc::read_header(bool doFeatures, bool dynamicK, uint32_t min_skip_d decode_context.setPosition({ _compressed.first, 0 }); Zc4PostingParams params(min_skip_docs, min_chunk_docs, _docIdLimit, dynamicK, doFeatures); Zc4PostingHeader header; - header.read(decode_context, params); + header.read(decode_context, params); _docIdsSize = header._doc_ids_size; _l1SkipSize = header._l1_skip_size; _l2SkipSize = header._l2_skip_size; @@ -429,7 +429,7 @@ FakeFilterOccZCArrayIterator::initRange(uint32_t begin, uint32_t end) DecodeContext &d = _decodeContext; Zc4PostingParams params(1, 1000000000, _docIdLimit, true, false); Zc4PostingHeader header; - header.read(d, params); + header.read(d, params); assert((d.getBitOffset() & 7) == 0); const uint8_t *bcompr = d.getByteCompr(); _valI = bcompr; @@ -650,7 +650,7 @@ initRange(uint32_t begin, uint32_t end) DecodeContext &d = _decodeContext; Zc4PostingParams params(1, 1000000000, _docIdLimit, true, false); Zc4PostingHeader header; - header.read(d, params); + header.read(d, params); _lastDocId = header._last_doc_id; assert((d.getBitOffset() & 7) == 0); const uint8_t *bcompr = d.getByteCompr(); -- cgit v1.2.3