summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-04-30 13:50:31 +0200
committerTor Egge <Tor.Egge@broadpark.no>2019-04-30 14:40:48 +0200
commitb9b19ad3198d8e071130b11fd582f6edc5d17ea8 (patch)
tree53a4b65d71c89f13748df0242f67943faab5e3e4 /searchlib
parent4ff8b123a23d91c9838979317ae434d16ef0054e (diff)
Simplify reading of posting list header.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/compression.h58
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp77
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h1
-rw-r--r--searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp6
4 files changed, 35 insertions, 107 deletions
diff --git a/searchlib/src/vespa/searchlib/bitcompression/compression.h b/searchlib/src/vespa/searchlib/bitcompression/compression.h
index 33b5e0b0975..b9166f675aa 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/compression.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/compression.h
@@ -1141,7 +1141,7 @@ public:
DecodeContext64Base()
: search::ComprFileDecodeContext(),
_valI(nullptr),
- _valE(nullptr),
+ _valE(static_cast<const uint64_t *>(nullptr) - 1),
_realValE(nullptr),
_val(0),
_cacheInt(0),
@@ -1298,6 +1298,8 @@ public:
}
}
virtual uint64_t readBits(uint32_t length) = 0;
+ virtual void align(uint32_t alignment) = 0;
+ virtual uint64_t decode_exp_golomb(int k) = 0;
void readBytes(uint8_t *buf, size_t len);
uint32_t readHeader(vespalib::GenericHeader &header, int64_t fileSize);
};
@@ -1321,7 +1323,7 @@ public:
DecodeContext64(const uint64_t *compr,
int bitOffset)
: DecodeContext64Base(compr + 1,
- nullptr,
+ static_cast<const uint64_t *>(nullptr) - 1,
nullptr,
0,
EC::bswap(*compr),
@@ -1407,10 +1409,12 @@ public:
};
void skipBits(int bits) override {
+ readComprBufferIfNeeded();
while (bits >= 64) {
_val = 0;
ReadBits(64, _val, _cacheInt, _preRead, _valI);
bits -= 64;
+ readComprBufferIfNeeded();
}
if (bits > 0) {
if (bigEndian) {
@@ -1419,6 +1423,7 @@ public:
_val >>= bits;
}
ReadBits(bits, _val, _cacheInt, _preRead, _valI);
+ readComprBufferIfNeeded();
}
}
@@ -1474,20 +1479,32 @@ public:
_val = 0;
}
UC64_READBITS(_val, _valI, _preRead, _cacheInt, EC);
+ readComprBufferIfNeeded();
return res;
}
+ uint64_t decode_exp_golomb(int k) override {
+ uint32_t length;
+ uint64_t val64;
+ UC64_DECODEEXPGOLOMB(_val, _valI, _preRead, _cacheInt, k, EC);
+ readComprBufferIfNeeded();
+ return val64;
+ }
+
void
- align(uint32_t alignment)
+ align(uint32_t alignment) override
{
+ readComprBufferIfNeeded();
uint64_t pad = (- getReadOffset()) & (alignment - 1);
while (pad > 64) {
(void) readBits(64);
pad -= 64;
+ readComprBufferIfNeeded();
}
if (pad > 0) {
(void) readBits(pad);
}
+ readComprBufferIfNeeded();
}
/*
@@ -1576,41 +1593,6 @@ public:
*/
virtual void
getParams(PostingListParams &params) const;
-
- void skipBits(int bits) override {
- readComprBufferIfNeeded();
- while (bits >= 64) {
- _val = 0;
- ReadBits(64, _val, _cacheInt, _preRead, _valI);
- bits -= 64;
- readComprBufferIfNeeded();
- }
- if (bits > 0) {
- if (bigEndian) {
- _val <<= bits;
- } else {
- _val >>= bits;
- }
- ReadBits(bits, _val, _cacheInt, _preRead, _valI);
- readComprBufferIfNeeded();
- }
- }
-
- void
- align(uint32_t alignment)
- {
- readComprBufferIfNeeded();
- uint64_t pad = (- getReadOffset()) & (alignment - 1);
- while (pad > 64) {
- (void) readBits(64);
- pad -= 64;
- readComprBufferIfNeeded();
- }
- if (pad > 0) {
- (void) readBits(pad);
- }
- readComprBufferIfNeeded();
- }
};
typedef FeatureDecodeContext<true> FeatureDecodeContextBE;
diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp
index 5288d054ef0..2149a44f5ce 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.cpp
@@ -20,35 +20,13 @@ Zc4PostingHeader::Zc4PostingHeader()
{
}
-template <bool bigEndian>
void
Zc4PostingHeader::read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams &params)
{
- using EC = bitcompression::FeatureEncodeContext<bigEndian>;
- UC64_DECODECONTEXT_CONSTRUCTOR(o, decode_context._);
- uint32_t length;
- uint64_t val64;
-
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
- _num_docs = static_cast<uint32_t>(val64) + 1;
- bool has_more = false;
- if (__builtin_expect(_num_docs >= params._min_chunk_docs, false)) {
- if (bigEndian) {
- has_more = static_cast<int64_t>(oVal) < 0;
- oVal <<= 1;
- length = 1;
- } else {
- has_more = (oVal & 1) != 0;
- oVal >>= 1;
- length = 1;
- }
- UC64_READBITS_NS(o, EC);
- }
- if (params._dynamic_k) {
- _doc_id_k = EC::calcDocIdK((_has_more || has_more) ? 1 : _num_docs, params._doc_id_limit);
- } else {
- _doc_id_k = K_VALUE_ZCPOSTING_LASTDOCID;
- }
+ using EC = bitcompression::FeatureEncodeContext<true>;
+ _num_docs = decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_NUMDOCS) + 1;
+ bool has_more = (_num_docs >= params._min_chunk_docs) ? (decode_context.readBits(1) != 0) : false;
+ _doc_id_k = params._dynamic_k ? EC::calcDocIdK((_has_more || has_more) ? 1 : _num_docs, params._doc_id_limit) : K_VALUE_ZCPOSTING_LASTDOCID;
if (_num_docs < params._min_skip_docs && !_has_more) {
_doc_ids_size = 0;
_l1_skip_size = 0;
@@ -58,47 +36,16 @@ Zc4PostingHeader::read(bitcompression::DecodeContext64Base &decode_context, cons
_features_size = 0;
_last_doc_id = 0;
} else {
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_DOCIDSSIZE, EC);
- _doc_ids_size = val64 + 1;
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L1SKIPSIZE, EC);
- _l1_skip_size = val64;
- if (_l1_skip_size != 0) {
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L2SKIPSIZE, EC);
- _l2_skip_size = val64;
- }
- if (_l2_skip_size != 0) {
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L3SKIPSIZE, EC);
- _l3_skip_size = val64;
- }
- if (_l3_skip_size != 0) {
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_L4SKIPSIZE, EC);
- _l4_skip_size = val64;
- }
- if (params._encode_features) {
- UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_FEATURESSIZE, EC);
- _features_size = val64;
- } else {
- _features_size = 0;
- }
- UC64_DECODEEXPGOLOMB_NS(o, _doc_id_k, EC);
- _last_doc_id = params._doc_id_limit - 1 - val64;
- uint64_t bytePad = oPreRead & 7;
- if (bytePad > 0) {
- length = bytePad;
- UC64_READBITS_NS(o, EC);
- }
+ _doc_ids_size = decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_DOCIDSSIZE) + 1;
+ _l1_skip_size = decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L1SKIPSIZE);
+ _l2_skip_size = (_l1_skip_size != 0) ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L2SKIPSIZE) : 0;
+ _l3_skip_size = (_l2_skip_size != 0) ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L3SKIPSIZE) : 0;
+ _l4_skip_size = (_l3_skip_size != 0) ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_L4SKIPSIZE) : 0;
+ _features_size = params._encode_features ? decode_context.decode_exp_golomb(K_VALUE_ZCPOSTING_FEATURESSIZE) : 0;
+ _last_doc_id = params._doc_id_limit - 1 - decode_context.decode_exp_golomb(_doc_id_k);
+ decode_context.align(8);
}
- UC64_DECODECONTEXT_STORE(o, decode_context._);
_has_more = has_more;
}
-template
-void
-Zc4PostingHeader::read<false>(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams &params);
-
-template
-void
-Zc4PostingHeader::read<true>(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams &params);
-
-
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h
index 7382f59d176..d4032864e16 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h
+++ b/searchlib/src/vespa/searchlib/diskindex/zc4_posting_header.h
@@ -27,7 +27,6 @@ struct Zc4PostingHeader {
Zc4PostingHeader();
- template <bool bigEndian>
void
read(bitcompression::DecodeContext64Base &decode_context, const Zc4PostingParams &params);
};
diff --git a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
index 25997faae24..f8fe31773c2 100644
--- a/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
+++ b/searchlib/src/vespa/searchlib/test/fakedata/fakezcfilterocc.cpp
@@ -211,7 +211,7 @@ FakeZcFilterOcc::read_header(bool doFeatures, bool dynamicK, uint32_t min_skip_d
decode_context.setPosition({ _compressed.first, 0 });
Zc4PostingParams params(min_skip_docs, min_chunk_docs, _docIdLimit, dynamicK, doFeatures);
Zc4PostingHeader header;
- header.read<bigEndian>(decode_context, params);
+ header.read(decode_context, params);
_docIdsSize = header._doc_ids_size;
_l1SkipSize = header._l1_skip_size;
_l2SkipSize = header._l2_skip_size;
@@ -429,7 +429,7 @@ FakeFilterOccZCArrayIterator::initRange(uint32_t begin, uint32_t end)
DecodeContext &d = _decodeContext;
Zc4PostingParams params(1, 1000000000, _docIdLimit, true, false);
Zc4PostingHeader header;
- header.read<true>(d, params);
+ header.read(d, params);
assert((d.getBitOffset() & 7) == 0);
const uint8_t *bcompr = d.getByteCompr();
_valI = bcompr;
@@ -650,7 +650,7 @@ initRange(uint32_t begin, uint32_t end)
DecodeContext &d = _decodeContext;
Zc4PostingParams params(1, 1000000000, _docIdLimit, true, false);
Zc4PostingHeader header;
- header.read<true>(d, params);
+ header.read(d, params);
_lastDocId = header._last_doc_id;
assert((d.getBitOffset() & 7) == 0);
const uint8_t *bcompr = d.getByteCompr();