summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahoo-inc.com>2017-06-12 21:36:37 +0000
committerTor Egge <Tor.Egge@yahoo-inc.com>2017-06-13 13:23:25 +0000
commit1cbb534ecb5e8be7fcc46dc3ff42551a3343313d (patch)
tree49d1252bb3ee5303bf38f611472de4c8b44f4c9e
parent906e8a650e14870b8169f694ba783ad564cf4c60 (diff)
Introduce helper classes for skip info byte streams used by
iterators for disk based posting lists.
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp364
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h201
2 files changed, 296 insertions, 269 deletions
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp
index ef93b4653b7..16400294a38 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.cpp
@@ -204,6 +204,21 @@ ZcRareWordPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit)
clearUnpacked();
}
+ZcPostingIteratorBase::ZcPostingIteratorBase(const TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit)
+ : ZcIteratorBase(matchData, start, docIdLimit),
+ _valI(NULL),
+ _valIBase(NULL),
+ _featureSeekPos(0),
+ _l1(),
+ _l2(),
+ _l3(),
+ _l4(),
+ _chunk(),
+ _featuresSize(0),
+ _hasMore(false),
+ _chunkNo(0)
+{
+}
template <bool bigEndian>
ZcPostingIterator<bigEndian>::
@@ -212,44 +227,12 @@ ZcPostingIterator(uint32_t minChunkDocs,
const PostingListCounts &counts,
const search::fef::TermFieldMatchDataArray &matchData,
Position start, uint32_t docIdLimit)
- : ZcIteratorBase(matchData, start, docIdLimit),
- _valI(NULL),
- _lastDocId(0),
- _l1SkipDocId(0),
- _l2SkipDocId(0),
- _l3SkipDocId(0),
- _l4SkipDocId(0),
- _l1SkipDocIdPos(NULL),
- _l1SkipValI(NULL),
- _l1SkipFeaturePos(0),
- _valIBase(NULL),
- _l1SkipValIBase(NULL),
- _l2SkipDocIdPos(NULL),
- _l2SkipValI(NULL),
- _l2SkipFeaturePos(0),
- _l2SkipL1SkipPos(NULL),
- _l2SkipValIBase(NULL),
- _l3SkipDocIdPos(NULL),
- _l3SkipValI(NULL),
- _l3SkipFeaturePos(0),
- _l3SkipL1SkipPos(NULL),
- _l3SkipL2SkipPos(NULL),
- _l3SkipValIBase(NULL),
- _l4SkipDocIdPos(NULL),
- _l4SkipValI(NULL),
- _l4SkipFeaturePos(0),
- _l4SkipL1SkipPos(NULL),
- _l4SkipL2SkipPos(NULL),
- _l4SkipL3SkipPos(NULL),
+ : ZcPostingIteratorBase(matchData, start, docIdLimit),
_decodeContext(NULL),
_minChunkDocs(minChunkDocs),
_docIdK(0),
- _hasMore(false),
_dynamicK(dynamicK),
- _chunkNo(0),
_numDocs(0),
- _featuresSize(0),
- _featureSeekPos(0),
_featuresValI(NULL),
_featuresBitOffset(0),
_counts(counts)
@@ -266,7 +249,7 @@ ZcPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit)
uint32_t length;
uint64_t val64;
- uint32_t prevDocId = _hasMore ? _lastDocId : 0u;
+ uint32_t prevDocId = _hasMore ? _chunk._lastDocId : 0u;
UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_NUMDOCS, EC);
_numDocs = static_cast<uint32_t>(val64) + 1;
@@ -311,10 +294,10 @@ ZcPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit)
} else {
UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_ZCPOSTING_LASTDOCID, EC);
}
- _lastDocId = docIdLimit - 1 - val64;
+ _chunk._lastDocId = docIdLimit - 1 - val64;
if (_hasMore || hasMore) {
if (!_counts._segments.empty()) {
- assert(_lastDocId == _counts._segments[_chunkNo]._lastDoc);
+ assert(_chunk._lastDocId == _counts._segments[_chunkNo]._lastDoc);
}
}
@@ -328,316 +311,243 @@ ZcPostingIterator<bigEndian>::readWordStart(uint32_t docIdLimit)
assert((d.getBitOffset() & 7) == 0);
const uint8_t *bcompr = d.getByteCompr();
_valIBase = _valI = bcompr;
- _l1SkipDocIdPos = _l2SkipDocIdPos = bcompr;
- _l3SkipDocIdPos = _l4SkipDocIdPos = bcompr;
bcompr += docIdsSize;
- if (l1SkipSize != 0) {
- _l1SkipValIBase = _l1SkipValI = bcompr;
- _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = bcompr;
- bcompr += l1SkipSize;
- } else {
- _l1SkipValIBase = _l1SkipValI = NULL;
- _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos = NULL;
- }
- if (l2SkipSize != 0) {
- _l2SkipValIBase = _l2SkipValI = bcompr;
- _l3SkipL2SkipPos = _l4SkipL2SkipPos = bcompr;
- bcompr += l2SkipSize;
- } else {
- _l2SkipValIBase = _l2SkipValI = NULL;
- _l3SkipL2SkipPos = _l4SkipL2SkipPos = NULL;
- }
- if (l3SkipSize != 0) {
- _l3SkipValIBase = _l3SkipValI = bcompr;
- _l4SkipL3SkipPos = bcompr;
- bcompr += l3SkipSize;
- } else {
- _l3SkipValIBase = _l3SkipValI = NULL;
- _l4SkipL3SkipPos = NULL;
- }
- if (l4SkipSize != 0) {
- _l4SkipValI = bcompr;
- bcompr += l4SkipSize;
- } else {
- _l4SkipValI = NULL;
- }
+ _l1.setup(prevDocId, _chunk._lastDocId, bcompr, l1SkipSize);
+ _l2.setup(prevDocId, _chunk._lastDocId, bcompr, l2SkipSize);
+ _l3.setup(prevDocId, _chunk._lastDocId, bcompr, l3SkipSize);
+ _l4.setup(prevDocId, _chunk._lastDocId, bcompr, l4SkipSize);
+ _l1.postSetup(*this);
+ _l2.postSetup(_l1);
+ _l3.postSetup(_l2);
+ _l4.postSetup(_l3);
d.setByteCompr(bcompr);
_hasMore = hasMore;
// Save information about start of next chunk
_featuresValI = d.getCompr();
_featuresBitOffset = d.getBitOffset();
- _l1SkipFeaturePos = _l2SkipFeaturePos = 0;
- _l3SkipFeaturePos = _l4SkipFeaturePos = 0;
_featureSeekPos = 0;
clearUnpacked();
// Unpack first docid delta in chunk
- uint32_t oDocId = prevDocId;
- ZCDECODE(_valI, oDocId += 1 +);
+ nextDocId(prevDocId);
#if DEBUG_ZCPOSTING_PRINTF
- printf("Decode docId=%d\n",
- oDocId);
+ printf("Decode docId=%d\n", getDocId());
#endif
- setDocId(oDocId);
- // Unpack first L1 Skip info docid delta
- if (_l1SkipValI != NULL) {
- _l1SkipDocId = prevDocId;
- ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
- } else
- _l1SkipDocId = _lastDocId;
- // Unpack first L2 skip info docid delta
- if (_l2SkipValI != NULL) {
- _l2SkipDocId = prevDocId;
- ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +);
- } else
- _l2SkipDocId = _lastDocId;
- // Unpack first L3 skip info docid delta
- if (_l3SkipValI != NULL) {
- _l3SkipDocId = prevDocId;
- ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +);
- } else
- _l3SkipDocId = _lastDocId;
- // Unpack first L4 skip info docid delta
- if (_l4SkipValI != NULL) {
- _l4SkipDocId = prevDocId;
- ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 +);
- } else
- _l4SkipDocId = _lastDocId;
}
-template <bool bigEndian>
void
-ZcPostingIterator<bigEndian>::doChunkSkipSeek(uint32_t docId)
+ZcPostingIteratorBase::doChunkSkipSeek(uint32_t docId)
{
- while (docId > _lastDocId && _hasMore) {
+ while (docId > _chunk._lastDocId && _hasMore) {
// Skip to start of next chunk
_featureSeekPos = 0;
featureSeek(_featuresSize);
_chunkNo++;
readWordStart(getDocIdLimit()); // Read word start for next chunk
}
- if (docId > _lastDocId) {
- _l4SkipDocId = _l3SkipDocId = _l2SkipDocId = _l1SkipDocId = search::endDocId;
+ if (docId > _chunk._lastDocId) {
+ _l4._skipDocId = _l3._skipDocId = _l2._skipDocId = _l1._skipDocId = search::endDocId;
setAtEnd();
}
}
-template <bool bigEndian>
void
-ZcPostingIterator<bigEndian>::doL4SkipSeek(uint32_t docId)
+ZcPostingIteratorBase::doL4SkipSeek(uint32_t docId)
{
uint32_t lastL4SkipDocId;
- if (__builtin_expect(docId > _lastDocId, false)) {
+ if (__builtin_expect(docId > _chunk._lastDocId, false)) {
doChunkSkipSeek(docId);
- if (docId <= _l4SkipDocId)
+ if (docId <= _l4._skipDocId)
return;
}
do {
- lastL4SkipDocId = _l4SkipDocId;
- ZCDECODE(_l4SkipValI, _l4SkipDocIdPos += 1 +);
- ZCDECODE(_l4SkipValI, _l4SkipFeaturePos += 1 +);
- ZCDECODE(_l4SkipValI, _l4SkipL1SkipPos += 1 + );
- ZCDECODE(_l4SkipValI, _l4SkipL2SkipPos += 1 + );
- ZCDECODE(_l4SkipValI, _l4SkipL3SkipPos += 1 + );
- ZCDECODE(_l4SkipValI, _l4SkipDocId += 1 + );
+ lastL4SkipDocId = _l4._skipDocId;
+ _l4.decodeSkipEntry();
+ _l4.nextDocId();
#if DEBUG_ZCPOSTING_PRINTF
printf("L4Decode docId %d, docIdPos %d,"
"l1SkipPos %d, l2SkipPos %d, l3SkipPos %d, nextDocId %d\n",
lastL4SkipDocId,
- (int) (_l4SkipDocIdPos - _valIBase),
- (int) (_l4SkipL1SkipPos - _l1SkipValIBase),
- (int) (_l4SkipL2SkipPos - _l2SkipValIBase),
- (int) (_l4SkipL3SkipPos - _l3SkipValIBase),
- _l4SkipDocId);
+ (int) (_l4._docIdPos - _valIBase),
+ (int) (_l4._l1Pos - _l1._valIBase),
+ (int) (_l4._l2Pos - _l2._valIBase),
+ (int) (_l4._l3Pos - _l3._valIBase),
+ _l4._skipDocId);
#endif
- } while (docId > _l4SkipDocId);
- _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos =
- _l4SkipDocIdPos;
- _l1SkipFeaturePos = _l2SkipFeaturePos = _l3SkipFeaturePos =
- _l4SkipFeaturePos;
- _l1SkipDocId = _l2SkipDocId = _l3SkipDocId = lastL4SkipDocId;
- _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos = _l4SkipL1SkipPos;
- _l2SkipValI = _l3SkipL2SkipPos = _l4SkipL2SkipPos;
- _l3SkipValI = _l4SkipL3SkipPos;
- ZCDECODE(_valI, lastL4SkipDocId += 1 +);
- ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
- ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +);
- ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 +);
+ } while (docId > _l4._skipDocId);
+ _valI = _l1._docIdPos = _l2._docIdPos = _l3._docIdPos =
+ _l4._docIdPos;
+ _l1._skipFeaturePos = _l2._skipFeaturePos = _l3._skipFeaturePos =
+ _l4._skipFeaturePos;
+ _l1._skipDocId = _l2._skipDocId = _l3._skipDocId = lastL4SkipDocId;
+ _l1._valI = _l2._l1Pos = _l3._l1Pos = _l4._l1Pos;
+ _l2._valI = _l3._l2Pos = _l4._l2Pos;
+ _l3._valI = _l4._l3Pos;
+ nextDocId(lastL4SkipDocId);
+ _l1.nextDocId();
+ _l2.nextDocId();
+ _l3.nextDocId();
#if DEBUG_ZCPOSTING_PRINTF
printf("L4Seek, docId %d docIdPos %d"
" L1SkipPos %d L2SkipPos %d L3SkipPos %d, nextDocId %d\n",
lastL4SkipDocId,
- (int) (_l4SkipDocIdPos - _valIBase),
- (int) (_l4SkipL1SkipPos - _l1SkipValIBase),
- (int) (_l4SkipL2SkipPos - _l2SkipValIBase),
- (int) (_l4SkipL3SkipPos - _l3SkipValIBase),
- _l4SkipDocId);
+ (int) (_l4._docIdPos - _valIBase),
+ (int) (_l4._l1Pos - _l1._valIBase),
+ (int) (_l4._l2Pos - _l2._valIBase),
+ (int) (_l4._l3Pos - _l3._valIBase),
+ _l4._skipDocId);
#endif
- setDocId(lastL4SkipDocId);
- _featureSeekPos = _l4SkipFeaturePos;
+ _featureSeekPos = _l4._skipFeaturePos;
clearUnpacked();
}
-template <bool bigEndian>
void
-ZcPostingIterator<bigEndian>::doL3SkipSeek(uint32_t docId)
+ZcPostingIteratorBase::doL3SkipSeek(uint32_t docId)
{
uint32_t lastL3SkipDocId;
- if (__builtin_expect(docId > _l4SkipDocId, false)) {
+ if (__builtin_expect(docId > _l4._skipDocId, false)) {
doL4SkipSeek(docId);
- if (docId <= _l3SkipDocId)
+ if (docId <= _l3._skipDocId)
return;
}
do {
- lastL3SkipDocId = _l3SkipDocId;
- ZCDECODE(_l3SkipValI, _l3SkipDocIdPos += 1 +);
- ZCDECODE(_l3SkipValI, _l3SkipFeaturePos += 1 +);
- ZCDECODE(_l3SkipValI, _l3SkipL1SkipPos += 1 + );
- ZCDECODE(_l3SkipValI, _l3SkipL2SkipPos += 1 + );
- ZCDECODE(_l3SkipValI, _l3SkipDocId += 1 + );
+ lastL3SkipDocId = _l3._skipDocId;
+ _l3.decodeSkipEntry();
+ _l3.nextDocId();
#if DEBUG_ZCPOSTING_PRINTF
printf("L3Decode docId %d, docIdPos %d,"
"l1SkipPos %d, l2SkipPos %d, nextDocId %d\n",
lastL3SkipDocId,
- (int) (_l3SkipDocIdPos - _valIBase),
- (int) (_l3SkipL1SkipPos - _l1SkipValIBase),
- (int) (_l3SkipL2SkipPos - _l2SkipValIBase),
- _l3SkipDocId);
+ (int) (_l3._docIdPos - _valIBase),
+ (int) (_l3._l1Pos - _l1._valIBase),
+ (int) (_l3._l2Pos - _l2._valIBase),
+ _l3._skipDocId);
#endif
- } while (docId > _l3SkipDocId);
- _valI = _l1SkipDocIdPos = _l2SkipDocIdPos = _l3SkipDocIdPos;
- _l1SkipFeaturePos = _l2SkipFeaturePos = _l3SkipFeaturePos;
- _l1SkipDocId = _l2SkipDocId = lastL3SkipDocId;
- _l1SkipValI = _l2SkipL1SkipPos = _l3SkipL1SkipPos;
- _l2SkipValI = _l3SkipL2SkipPos;
- ZCDECODE(_valI, lastL3SkipDocId += 1 +);
- ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
- ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 +);
+ } while (docId > _l3._skipDocId);
+ _valI = _l1._docIdPos = _l2._docIdPos = _l3._docIdPos;
+ _l1._skipFeaturePos = _l2._skipFeaturePos = _l3._skipFeaturePos;
+ _l1._skipDocId = _l2._skipDocId = lastL3SkipDocId;
+ _l1._valI = _l2._l1Pos = _l3._l1Pos;
+ _l2._valI = _l3._l2Pos;
+ nextDocId(lastL3SkipDocId);
+ _l1.nextDocId();
+ _l2.nextDocId();
#if DEBUG_ZCPOSTING_PRINTF
printf("L3Seek, docId %d docIdPos %d"
" L1SkipPos %d L2SkipPos %d, nextDocId %d\n",
lastL3SkipDocId,
- (int) (_l3SkipDocIdPos - _valIBase),
- (int) (_l3SkipL1SkipPos - _l1SkipValIBase),
- (int) (_l3SkipL2SkipPos - _l2SkipValIBase),
- _l3SkipDocId);
+ (int) (_l3._docIdPos - _valIBase),
+ (int) (_l3._l1Pos - _l1._valIBase),
+ (int) (_l3._l2Pos - _l2._valIBase),
+ _l3._skipDocId);
#endif
- setDocId(lastL3SkipDocId);
- _featureSeekPos = _l3SkipFeaturePos;
+ _featureSeekPos = _l3._skipFeaturePos;
clearUnpacked();
}
-template <bool bigEndian>
void
-ZcPostingIterator<bigEndian>::doL2SkipSeek(uint32_t docId)
+ZcPostingIteratorBase::doL2SkipSeek(uint32_t docId)
{
uint32_t lastL2SkipDocId;
- if (__builtin_expect(docId > _l3SkipDocId, false)) {
+ if (__builtin_expect(docId > _l3._skipDocId, false)) {
doL3SkipSeek(docId);
- if (docId <= _l2SkipDocId)
+ if (docId <= _l2._skipDocId)
return;
}
do {
- lastL2SkipDocId = _l2SkipDocId;
- ZCDECODE(_l2SkipValI, _l2SkipDocIdPos += 1 +);
- ZCDECODE(_l2SkipValI, _l2SkipFeaturePos += 1 +);
- ZCDECODE(_l2SkipValI, _l2SkipL1SkipPos += 1 + );
- ZCDECODE(_l2SkipValI, _l2SkipDocId += 1 + );
+ lastL2SkipDocId = _l2._skipDocId;
+ _l2.decodeSkipEntry();
+ _l2.nextDocId();
#if DEBUG_ZCPOSTING_PRINTF
printf("L2Decode docId %d, docIdPos %d, l1SkipPos %d, nextDocId %d\n",
lastL2SkipDocId,
- (int) (_l2SkipDocIdPos - _valIBase),
- (int) (_l2SkipL1SkipPos - _l1SkipValIBase),
- _l2SkipDocId);
+ (int) (_l2._docIdPos - _valIBase),
+ (int) (_l2._l1Pos - _l1._valIBase),
+ _l2._skipDocId);
#endif
- } while (docId > _l2SkipDocId);
- _valI = _l1SkipDocIdPos = _l2SkipDocIdPos;
- _l1SkipFeaturePos = _l2SkipFeaturePos;
- _l1SkipDocId = lastL2SkipDocId;
- _l1SkipValI = _l2SkipL1SkipPos;
- ZCDECODE(_valI, lastL2SkipDocId += 1 +);
- ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+ } while (docId > _l2._skipDocId);
+ _valI = _l1._docIdPos = _l2._docIdPos;
+ _l1._skipFeaturePos = _l2._skipFeaturePos;
+ _l1._skipDocId = lastL2SkipDocId;
+ _l1._valI = _l2._l1Pos;
+ nextDocId(lastL2SkipDocId);
+ _l1.nextDocId();
#if DEBUG_ZCPOSTING_PRINTF
printf("L2Seek, docId %d docIdPos %d L1SkipPos %d, nextDocId %d\n",
lastL2SkipDocId,
- (int) (_l2SkipDocIdPos - _valIBase),
- (int) (_l2SkipL1SkipPos - _l1SkipValIBase),
- _l2SkipDocId);
+ (int) (_l2._docIdPos - _valIBase),
+ (int) (_l2._l1Pos - _l1._valIBase),
+ _l2._skipDocId);
#endif
- setDocId(lastL2SkipDocId);
- _featureSeekPos = _l2SkipFeaturePos;
+ _featureSeekPos = _l2._skipFeaturePos;
clearUnpacked();
}
-template <bool bigEndian>
void
-ZcPostingIterator<bigEndian>::doL1SkipSeek(uint32_t docId)
+ZcPostingIteratorBase::doL1SkipSeek(uint32_t docId)
{
uint32_t lastL1SkipDocId;
- if (__builtin_expect(docId > _l2SkipDocId, false)) {
+ if (__builtin_expect(docId > _l2._skipDocId, false)) {
doL2SkipSeek(docId);
- if (docId <= _l1SkipDocId)
+ if (docId <= _l1._skipDocId)
return;
}
do {
- lastL1SkipDocId = _l1SkipDocId;
- ZCDECODE(_l1SkipValI, _l1SkipDocIdPos += 1 +);
- ZCDECODE(_l1SkipValI, _l1SkipFeaturePos += 1 +);
- ZCDECODE(_l1SkipValI, _l1SkipDocId += 1 +);
+ lastL1SkipDocId = _l1._skipDocId;
+ _l1.decodeSkipEntry();
+ _l1.nextDocId();
#if DEBUG_ZCPOSTING_PRINTF
printf("L1Decode docId %d, docIdPos %d, L1SkipPos %d, nextDocId %d\n",
lastL1SkipDocId,
- (int) (_l1SkipDocIdPos - _valIBase),
- (int) (_l1SkipValI - _l1SkipValIBase),
- _l1SkipDocId);
+ (int) (_l1._docIdPos - _valIBase),
+ (int) (_l1._valI - _l1._valIBase),
+ _l1._skipDocId);
#endif
- } while (docId > _l1SkipDocId);
- _valI = _l1SkipDocIdPos;
- ZCDECODE(_valI, lastL1SkipDocId += 1 +);
- setDocId(lastL1SkipDocId);
+ } while (docId > _l1._skipDocId);
+ _valI = _l1._docIdPos;
+ nextDocId(lastL1SkipDocId);
#if DEBUG_ZCPOSTING_PRINTF
printf("L1SkipSeek, docId %d docIdPos %d, nextDocId %d\n",
lastL1SkipDocId,
- (int) (_l1SkipDocIdPos - _valIBase),
- _l1SkipDocId);
+ (int) (_l1._docIdPos - _valIBase),
+ _l1._skipDocId);
#endif
- _featureSeekPos = _l1SkipFeaturePos;
+ _featureSeekPos = _l1._skipFeaturePos;
clearUnpacked();
}
-template <bool bigEndian>
void
-ZcPostingIterator<bigEndian>::doSeek(uint32_t docId)
+ZcPostingIteratorBase::doSeek(uint32_t docId)
{
- if (docId > _l1SkipDocId) {
+ if (docId > _l1._skipDocId) {
doL1SkipSeek(docId);
}
uint32_t oDocId = getDocId();
#if DEBUG_ZCPOSTING_ASSERT
- assert(oDocId <= _l1SkipDocId);
- assert(docId <= _l1SkipDocId);
- assert(oDocId <= _l2SkipDocId);
- assert(docId <= _l2SkipDocId);
- assert(oDocId <= _l3SkipDocId);
- assert(docId <= _l3SkipDocId);
- assert(oDocId <= _l4SkipDocId);
- assert(docId <= _l4SkipDocId);
+ assert(oDocId <= _l1._skipDocId);
+ assert(docId <= _l1._skipDocId);
+ assert(oDocId <= _l2._skipDocId);
+ assert(docId <= _l2._skipDocId);
+ assert(oDocId <= _l3._skipDocId);
+ assert(docId <= _l3._skipDocId);
+ assert(oDocId <= _l4._skipDocId);
+ assert(docId <= _l4._skipDocId);
#endif
const uint8_t *oCompr = _valI;
while (__builtin_expect(oDocId < docId, true)) {
#if DEBUG_ZCPOSTING_ASSERT
- assert(oDocId <= _l1SkipDocId);
- assert(oDocId <= _l2SkipDocId);
- assert(oDocId <= _l3SkipDocId);
- assert(oDocId <= _l4SkipDocId);
+ assert(oDocId <= _l1._skipDocId);
+ assert(oDocId <= _l2._skipDocId);
+ assert(oDocId <= _l3._skipDocId);
+ assert(oDocId <= _l4._skipDocId);
#endif
ZCDECODE(oCompr, oDocId += 1 +);
#if DEBUG_ZCPOSTING_PRINTF
@@ -676,7 +586,7 @@ void ZcPostingIterator<bigEndian>::rewind(Position start)
{
_decodeContext->setPosition(start);
_hasMore = false;
- _lastDocId = 0;
+ _chunk._lastDocId = 0;
_chunkNo = 0;
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h
index 874d3ca4cba..407c6402c63 100644
--- a/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h
+++ b/searchlib/src/vespa/searchlib/diskindex/zcpostingiterators.h
@@ -103,43 +103,170 @@ public:
};
+class ZcPostingIteratorBase : public ZcIteratorBase
+{
+protected:
+ const uint8_t *_valI; // docid deltas
+ const uint8_t *_valIBase; // start of docid deltas
+ uint64_t _featureSeekPos;
+
+ // Helper class for L1 skip info
+ class L1Skip
+ {
+ public:
+ uint32_t _skipDocId;
+ const uint8_t *_valI;
+ const uint8_t *_docIdPos;
+ uint64_t _skipFeaturePos;
+ const uint8_t *_valIBase;
+
+ L1Skip()
+ : _skipDocId(0),
+ _valI(nullptr),
+ _docIdPos(nullptr),
+ _skipFeaturePos(0),
+ _valIBase(nullptr)
+ {
+ }
+
+ void setup(uint32_t prevDocId, uint32_t lastDocId, const uint8_t *&bcompr, uint32_t skipSize) {
+ if (skipSize != 0) {
+ _valI = _valIBase = bcompr;
+ bcompr += skipSize;
+ _skipDocId = prevDocId + 1;
+ ZCDECODE(_valI, _skipDocId +=);
+ } else {
+ _valI = _valIBase = nullptr;
+ _skipDocId = lastDocId;
+ }
+ _skipFeaturePos = 0;
+ }
+ void postSetup(const ZcPostingIteratorBase &l0) {
+ _docIdPos = l0._valIBase;
+ }
+ void decodeSkipEntry() {
+ ZCDECODE(_valI, _docIdPos += 1 +);
+ ZCDECODE(_valI, _skipFeaturePos += 1 +);
+ }
+ void nextDocId() {
+ ZCDECODE(_valI, _skipDocId += 1 +);
+ }
+ };
+
+ // Helper class for L2 skip info
+ class L2Skip : public L1Skip
+ {
+ public:
+ const uint8_t *_l1Pos;
+
+ L2Skip()
+ : L1Skip(),
+ _l1Pos(nullptr)
+ {
+ }
+
+ void postSetup(const L1Skip &l1) {
+ _docIdPos = l1._docIdPos;
+ _l1Pos = l1._valIBase;
+ }
+ void decodeSkipEntry() {
+ L1Skip::decodeSkipEntry();
+ ZCDECODE(_valI, _l1Pos += 1 + );
+ }
+ };
+
+ // Helper class for L3 skip info
+ class L3Skip : public L2Skip
+ {
+ public:
+ const uint8_t *_l2Pos;
+
+ L3Skip()
+ : L2Skip(),
+ _l2Pos(nullptr)
+ {
+ }
+
+ void postSetup(const L2Skip &l2) {
+ _docIdPos = l2._docIdPos;
+ _l1Pos = l2._l1Pos;
+ _l2Pos = l2._valIBase;
+ }
+ void decodeSkipEntry() {
+ L2Skip::decodeSkipEntry();
+ ZCDECODE(_valI, _l2Pos += 1 + );
+ }
+ };
+
+ // Helper class for L4 skip info
+ class L4Skip : public L3Skip
+ {
+ public:
+ const uint8_t *_l3Pos;
+
+ L4Skip()
+ : L3Skip(),
+ _l3Pos(nullptr)
+ {
+ }
+
+ void postSetup(const L3Skip &l3) {
+ _docIdPos = l3._docIdPos;
+ _l1Pos = l3._l1Pos;
+ _l2Pos = l3._l2Pos;
+ _l3Pos = l3._valIBase;
+ }
+
+ void decodeSkipEntry() {
+ L3Skip::decodeSkipEntry();
+ ZCDECODE(_valI, _l3Pos += 1 + );
+ }
+ };
+
+ // Helper class for chunk skip info
+ class ChunkSkip {
+ public:
+ uint32_t _lastDocId;
+
+ ChunkSkip()
+ : _lastDocId(0)
+ {
+ }
+ };
+
+ L1Skip _l1;
+ L2Skip _l2;
+ L3Skip _l3;
+ L4Skip _l4;
+ ChunkSkip _chunk;
+ uint64_t _featuresSize;
+ bool _hasMore;
+ uint32_t _chunkNo;
+
+ void nextDocId(uint32_t prevDocId) {
+ uint32_t docId = prevDocId + 1;
+ ZCDECODE(_valI, docId +=);
+ setDocId(docId);
+ }
+ virtual void featureSeek(uint64_t offset) = 0;
+ VESPA_DLL_LOCAL void doChunkSkipSeek(uint32_t docId);
+ VESPA_DLL_LOCAL void doL4SkipSeek(uint32_t docId);
+ VESPA_DLL_LOCAL void doL3SkipSeek(uint32_t docId);
+ VESPA_DLL_LOCAL void doL2SkipSeek(uint32_t docId);
+ VESPA_DLL_LOCAL void doL1SkipSeek(uint32_t docId);
+ void doSeek(uint32_t docId) override;
+public:
+ ZcPostingIteratorBase(const fef::TermFieldMatchDataArray &matchData, Position start, uint32_t docIdLimit);
+};
+
template <bool bigEndian>
-class ZcPostingIterator : public ZcIteratorBase
+class ZcPostingIterator : public ZcPostingIteratorBase
{
private:
- typedef ZcIteratorBase ParentClass;
+ typedef ZcPostingIteratorBase ParentClass;
using ParentClass::getDocId;
public:
- // Pointer to compressed data
- const uint8_t *_valI;
- uint32_t _lastDocId;
- uint32_t _l1SkipDocId;
- uint32_t _l2SkipDocId;
- uint32_t _l3SkipDocId;
- uint32_t _l4SkipDocId;
- const uint8_t *_l1SkipDocIdPos;
- const uint8_t *_l1SkipValI;
- uint64_t _l1SkipFeaturePos;
- const uint8_t *_valIBase;
- const uint8_t *_l1SkipValIBase;
- const uint8_t *_l2SkipDocIdPos;
- const uint8_t *_l2SkipValI;
- uint64_t _l2SkipFeaturePos;
- const uint8_t *_l2SkipL1SkipPos;
- const uint8_t *_l2SkipValIBase;
- const uint8_t *_l3SkipDocIdPos;
- const uint8_t *_l3SkipValI;
- uint64_t _l3SkipFeaturePos;
- const uint8_t *_l3SkipL1SkipPos;
- const uint8_t *_l3SkipL2SkipPos;
- const uint8_t *_l3SkipValIBase;
- const uint8_t *_l4SkipDocIdPos;
- const uint8_t *_l4SkipValI;
- uint64_t _l4SkipFeaturePos;
- const uint8_t *_l4SkipL1SkipPos;
- const uint8_t *_l4SkipL2SkipPos;
- const uint8_t *_l4SkipL3SkipPos;
typedef bitcompression::FeatureDecodeContext<bigEndian> DecodeContextBase;
typedef index::DocIdAndFeatures DocIdAndFeatures;
@@ -147,12 +274,8 @@ public:
DecodeContextBase *_decodeContext;
uint32_t _minChunkDocs;
uint32_t _docIdK;
- bool _hasMore;
bool _dynamicK;
- uint32_t _chunkNo;
uint32_t _numDocs;
- uint64_t _featuresSize;
- uint64_t _featureSeekPos;
// Start of current features block, needed for seeks
const uint64_t *_featuresValI;
int _featuresBitOffset;
@@ -167,16 +290,10 @@ public:
void doUnpack(uint32_t docId) override;
- void doSeek(uint32_t docId) override;
void readWordStart(uint32_t docIdLimit) override;
void rewind(Position start) override;
- VESPA_DLL_LOCAL void doChunkSkipSeek(uint32_t docId);
- VESPA_DLL_LOCAL void doL4SkipSeek(uint32_t docId);
- VESPA_DLL_LOCAL void doL3SkipSeek(uint32_t docId);
- VESPA_DLL_LOCAL void doL2SkipSeek(uint32_t docId);
- VESPA_DLL_LOCAL void doL1SkipSeek(uint32_t docId);
- void featureSeek(uint64_t offset) {
+ virtual void featureSeek(uint64_t offset) override {
_decodeContext->_valI = _featuresValI + (_featuresBitOffset + offset) / 64;
_decodeContext->setupBits((_featuresBitOffset + offset) & 63);
}