summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirstorli@yahoo.no>2018-04-16 11:18:33 +0200
committerGitHub <noreply@github.com>2018-04-16 11:18:33 +0200
commit0151c5266ff31979ab79b3d13dfac96ca0a2676b (patch)
treea5fb3481aacce34930a3f69ac65b43c46dc060ec
parent8765c58c09b2699f8414d51f9bdec93aecf8dff1 (diff)
parentee859d3effc13d7b3e5e4a412930266a9a114ab9 (diff)
Merge pull request #5574 from vespa-engine/toregge/validate-pagedict4-pages-during-sequential-read
Validate pagedict4 pages during sequential read
-rw-r--r--searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/diskindex/pagedict4/pagedict4_hugeword_cornercase_test.cpp190
-rw-r--r--searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp207
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp435
-rw-r--r--searchlib/src/vespa/searchlib/bitcompression/pagedict4.h109
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt5
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_decoders.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_decoders.h24
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_encoders.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_encoders.h24
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.cpp73
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.h38
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h34
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.cpp50
-rw-r--r--searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.h52
16 files changed, 865 insertions, 456 deletions
diff --git a/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt
index 0ef5805c85a..eceb961bcfc 100644
--- a/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt
+++ b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt
@@ -7,3 +7,12 @@ vespa_add_executable(searchlib_pagedict4_test_app TEST
searchlib
)
vespa_add_test(NAME searchlib_pagedict4_test_app COMMAND searchlib_pagedict4_test_app)
+
+vespa_add_executable(searchlib_pagedict4_hugeword_cornercase_test_app TEST
+ SOURCES
+ pagedict4_hugeword_cornercase_test.cpp
+ DEPENDS
+ searchlib_test
+ searchlib
+)
+vespa_add_test(NAME searchlib_pagedict4_hugeword_cornercase_test_app COMMAND searchlib_pagedict4_hugeword_cornercase_test_app)
diff --git a/searchlib/src/tests/diskindex/pagedict4/pagedict4_hugeword_cornercase_test.cpp b/searchlib/src/tests/diskindex/pagedict4/pagedict4_hugeword_cornercase_test.cpp
new file mode 100644
index 00000000000..400108e91b0
--- /dev/null
+++ b/searchlib/src/tests/diskindex/pagedict4/pagedict4_hugeword_cornercase_test.cpp
@@ -0,0 +1,190 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/bitcompression/compression.h>
+#include <vespa/searchlib/bitcompression/countcompression.h>
+#include <vespa/searchlib/bitcompression/pagedict4.h>
+#include <vespa/searchlib/test/diskindex/threelevelcountbuffers.h>
+#include <vespa/searchlib/test/diskindex/pagedict4_mem_writer.h>
+#include <vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h>
+#include <vespa/searchlib/index/postinglistcounts.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("pagedict4_hugeword_cornercase_test");
+
+using search::index::PostingListCounts;
+using search::ComprFileWriteContext;
+
+constexpr uint32_t minChunkDocs = 262144;
+constexpr uint32_t numWordIds = 65536;
+
+struct BitBuffer
+{
+ using EncodeContext = search::bitcompression::PostingListCountFileEncodeContext;
+
+ EncodeContext encodeCtx;
+ ComprFileWriteContext writeCtx;
+
+ BitBuffer()
+ : encodeCtx(),
+ writeCtx(encodeCtx)
+ {
+ encodeCtx._minChunkDocs = minChunkDocs;
+ encodeCtx._numWordIds = numWordIds;
+ writeCtx.allocComprBuf();
+ encodeCtx.setWriteContext(&writeCtx);
+ encodeCtx.setupWrite(writeCtx);
+ assert(encodeCtx.getWriteOffset() == 0);
+ }
+
+ void clear() { encodeCtx.setupWrite(writeCtx); }
+
+ uint64_t getSize(const PostingListCounts &counts)
+ {
+ clear();
+ encodeCtx.writeCounts(counts);
+ return encodeCtx.getWriteOffset();
+ }
+
+ ~BitBuffer() = default;
+};
+
+void addSegment(PostingListCounts &counts)
+{
+ PostingListCounts::Segment lastseg = counts._segments.back();
+ PostingListCounts::Segment fillseg;
+ fillseg._bitLength = 4000000;
+ fillseg._numDocs = minChunkDocs;
+ fillseg._lastDoc = minChunkDocs;
+ counts._bitLength += fillseg._bitLength;
+ counts._numDocs += fillseg._numDocs;
+ counts._segments.back() = fillseg;
+ uint32_t lastDoc = counts._segments.size() * fillseg._numDocs;
+ counts._segments.back()._lastDoc = lastDoc;
+ counts._segments.push_back(lastseg);
+ counts._segments.back()._lastDoc = lastDoc + lastseg._numDocs;
+}
+
+PostingListCounts makeBaseCounts()
+{
+ PostingListCounts counts;
+ PostingListCounts::Segment lastseg;
+ lastseg._bitLength = 100;
+ lastseg._numDocs = 10;
+ lastseg._lastDoc = 10;
+ counts._bitLength = lastseg._bitLength;
+ counts._numDocs = lastseg._numDocs;
+ counts._segments.push_back(lastseg);
+ addSegment(counts);
+ return counts;
+}
+
+PostingListCounts makeSegmentedCounts(uint32_t segments)
+{
+ PostingListCounts counts = makeBaseCounts();
+ while (counts._segments.size() < segments) {
+ addSegment(counts);
+ }
+ return counts;
+}
+
+uint32_t
+calcSegments(uint32_t maxLen)
+{
+ BitBuffer bb;
+ PostingListCounts counts = makeBaseCounts();
+ uint32_t len = bb.getSize(counts);
+ unsigned int i = 0;
+ while (len <= maxLen) {
+ addSegment(counts);
+ ++i;
+ len = bb.getSize(counts);
+ }
+ return counts._segments.size() - 1;
+}
+
+/*
+ * Calculate posting list counts that compresses to wantLen bits.
+ */
+PostingListCounts makeCounts(uint32_t wantLen)
+{
+ BitBuffer bb;
+ uint32_t segments = calcSegments(wantLen);
+ PostingListCounts counts = makeSegmentedCounts(segments);
+ PostingListCounts counts2 = makeSegmentedCounts(segments - 1);
+ uint32_t len = bb.getSize(counts);
+ uint32_t len2 = bb.getSize(counts2);
+ for (uint32_t i = 1; i + 2 < counts._segments.size(); ++i) {
+ counts._bitLength += counts._segments[0]._bitLength;
+ counts._segments[i]._bitLength += counts._segments[0]._bitLength;
+ counts2._bitLength += counts2._segments[0]._bitLength;
+ counts2._segments[i]._bitLength += counts2._segments[0]._bitLength;
+ len = bb.getSize(counts);
+ len2 = bb.getSize(counts2);
+ if (len == wantLen) {
+ return counts;
+ }
+ if (len2 == wantLen) {
+ return counts2;
+ }
+ }
+ LOG(info, "Could not calculate counts with wanted compressed length");
+ abort();
+}
+
+using StartOffset = search::bitcompression::PageDict4StartOffset;
+using Writer = search::diskindex::test::PageDict4MemWriter;
+using SeqReader = search::diskindex::test::PageDict4MemSeqReader;
+
+/*
+ * Test corner case where a dictionary page has a single word, and the
+ * page header and compressed counts completely fills the page.
+ */
+void testPageSizedCounts()
+{
+ uint32_t pageBitSize = 32768;
+ uint32_t startBits = 15 * 3 + 12;
+
+ uint32_t ssPad = 64;
+ uint32_t spPad = 64;
+ uint32_t pPad = 64;
+ Writer w(minChunkDocs, numWordIds, ssPad, spPad, pPad);
+ PostingListCounts baseCounts = makeBaseCounts();
+ PostingListCounts largeCounts = makeCounts(pageBitSize - startBits);
+ w.addCounts("a", baseCounts);
+ w.addCounts("b", baseCounts);
+ w.addCounts("c", largeCounts);
+ w.addCounts("d", baseCounts);
+ w.addCounts("e", baseCounts);
+ w.flush();
+
+ SeqReader r(minChunkDocs, numWordIds, w._buffers);
+
+ uint64_t checkWordNum = 0;
+ PostingListCounts counts;
+ for (uint64_t wordNum = 1; wordNum < 7; ++wordNum) {
+ vespalib::string word;
+ counts.clear();
+ r.readCounts(word, checkWordNum, counts);
+ if (wordNum < 6) {
+ EXPECT_EQUAL(checkWordNum, wordNum);
+ if (wordNum == 3) {
+ EXPECT_TRUE(counts == largeCounts);
+ } else {
+ EXPECT_TRUE(counts == baseCounts);
+ }
+ } else {
+ EXPECT_GREATER(checkWordNum, 100u);
+ }
+ }
+}
+
+
+
+TEST("require that counts exactly filling dictionary page works")
+{
+ testPageSizedCounts();
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp
index cf90356c949..e914fe7c559 100644
--- a/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp
+++ b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp
@@ -6,6 +6,9 @@
#include <vespa/searchlib/bitcompression/countcompression.h>
#include <vespa/searchlib/bitcompression/pagedict4.h>
#include <vespa/searchlib/test/diskindex/threelevelcountbuffers.h>
+#include <vespa/searchlib/test/diskindex/pagedict4_mem_writer.h>
+#include <vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h>
+#include <vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.h>
#include <vespa/searchlib/index/postinglistcounts.h>
#include <vespa/searchlib/index/dummyfileheadercontext.h>
#include <vespa/searchlib/diskindex/pagedict4file.h>
@@ -44,164 +47,9 @@ using search::index::schema::DataType;
using namespace search::index;
using StartOffset = search::bitcompression::PageDict4StartOffset;
-
-namespace
-{
-
-
-class Writer : public search::diskindex::ThreeLevelCountWriteBuffers
-{
-public:
- PageDict4SSWriter *_ssw;
- PageDict4SPWriter *_spw;
- PageDict4PWriter *_pw;
-
- Writer(EC &sse,
- EC &spe,
- EC &pe)
- : ThreeLevelCountWriteBuffers(sse, spe, pe),
- _ssw(NULL),
- _spw(NULL),
- _pw(NULL)
- {
- }
-
- ~Writer()
- {
- delete _ssw;
- delete _spw;
- delete _pw;
- }
-
- void allocWriters()
- {
- _ssw = new PageDict4SSWriter(_sse);
- _spw = new PageDict4SPWriter(*_ssw, _spe);
- _pw = new PageDict4PWriter(*_spw, _pe);
- _spw->setup();
- _pw->setup();
- }
-
- void flush()
- {
- _pw->flush();
- ThreeLevelCountWriteBuffers::flush();
- }
-
- void addCounts(const std::string &word,
- const PostingListCounts &counts)
- {
- _pw->addCounts(word, counts);
- }
-};
-
-
-class SeqReader : public search::diskindex::ThreeLevelCountReadBuffers
-{
-public:
- PageDict4SSReader _ssr;
- PageDict4Reader _pr;
-
- SeqReader(DC &ssd,
- DC &spd,
- DC &pd,
- search::diskindex::ThreeLevelCountWriteBuffers &wb)
- : ThreeLevelCountReadBuffers(ssd, spd, pd, wb),
- _ssr(_rcssd,
- wb._ssHeaderLen, wb._ssFileBitSize,
- wb._spHeaderLen, wb._spFileBitSize,
- wb._pHeaderLen, wb._pFileBitSize),
- _pr(_ssr, spd, pd)
- {
- _ssr.setup(ssd);
- _pr.setup();
- }
-
- void readCounts(vespalib::string &word,
- uint64_t &wordNum,
- PostingListCounts &counts)
- {
- _pr.readCounts(word, wordNum, counts);
- }
-};
-
-class RandReader : public search::diskindex::ThreeLevelCountReadBuffers
-{
-public:
- PageDict4SSReader _ssr;
- const char *_spData;
- const char *_pData;
- size_t _pageSize;
-
- RandReader(DC &ssd,
- DC &spd,
- DC &pd,
- search::diskindex::ThreeLevelCountWriteBuffers &wb)
- : ThreeLevelCountReadBuffers(ssd, spd, pd, wb),
- _ssr(_rcssd,
- wb._ssHeaderLen, wb._ssFileBitSize,
- wb._spHeaderLen, wb._spFileBitSize,
- wb._pHeaderLen, wb._pFileBitSize),
- _spData(static_cast<const char *>(_rcspd._comprBuf)),
- _pData(static_cast<const char *>(_rcpd._comprBuf)),
- _pageSize(search::bitcompression::PageDict4PageParams::getPageByteSize())
- {
- _ssr.setup(ssd);
- }
-
- bool
- lookup(const std::string &key,
- uint64_t &wordNum,
- PostingListCounts &counts,
- StartOffset &offsets)
- {
- PageDict4SSLookupRes sslr;
-
- sslr = _ssr.lookup(key);
- if (!sslr._res) {
- counts.clear();
- offsets = sslr._l6StartOffset;
- wordNum = sslr._l6WordNum;
- return false;
- }
-
- if (sslr._overflow) {
- wordNum = sslr._l6WordNum;
- counts = sslr._counts;
- offsets = sslr._startOffset;
- return true;
- }
- PageDict4SPLookupRes splr;
- splr.lookup(_ssr,
- _spData +
- _pageSize * sslr._sparsePageNum,
- key,
- sslr._l6Word,
- sslr._lastWord,
- sslr._l6StartOffset,
- sslr._l6WordNum,
- sslr._pageNum);
-
- PageDict4PLookupRes plr;
- plr.lookup(_ssr,
- _pData + _pageSize * splr._pageNum,
- key,
- splr._l3Word,
- splr._lastWord,
- splr._l3StartOffset,
- splr._l3WordNum);
- wordNum = plr._wordNum;
- offsets = plr._startOffset;
- if (plr._res) {
- counts = plr._counts;
- return true;
- }
- counts.clear();
- return false;
- }
-};
-
-}
+using Writer = search::diskindex::test::PageDict4MemWriter;
+using SeqReader = search::diskindex::test::PageDict4MemSeqReader;
+using RandReader = search::diskindex::test::PageDict4MemRandReader;
class PageDict4TestApp : public FastOS_Application
{
@@ -518,9 +366,6 @@ testWords(const std::string &logname,
bool firstWordForcedCommon,
bool lastWordForcedCommon)
{
- typedef search::bitcompression::PostingListCountFileEncodeContext EC;
- typedef search::bitcompression::PostingListCountFileDecodeContext DC;
-
LOG(info, "%s: word test start", logname.c_str());
std::vector<WordCounts> myrand;
makeWords(myrand, rnd, numWordIds, tupleCount,
@@ -536,17 +381,7 @@ testWords(const std::string &logname,
}
LOG(info, "%s: word counts generated", logname.c_str());
- EC pe;
- EC spe;
- EC sse;
-
- sse._minChunkDocs = chunkSize;
- sse._numWordIds = numWordIds;
- spe.copyParams(sse);
- pe.copyParams(sse);
- Writer w(sse, spe, pe);
- w.startPad(ssPad, spPad, pPad);
- w.allocWriters();
+ Writer w(chunkSize, numWordIds, ssPad, spPad, pPad);
PostingListCounts counts;
for (std::vector<WordCounts>::const_iterator
@@ -563,23 +398,15 @@ testWords(const std::string &logname,
"%s: Used %" PRIu64 "+%" PRIu64 "+%" PRIu64
" bits for %d words",
logname.c_str(),
- w._pFileBitSize,
- w._spFileBitSize,
- w._ssFileBitSize,
+ w._buffers._pFileBitSize,
+ w._buffers._spFileBitSize,
+ w._buffers._ssFileBitSize,
(int) myrand.size());
StartOffset checkOffset;
{
- DC ssd;
- ssd._minChunkDocs = chunkSize;
- ssd._numWordIds = numWordIds;
- DC spd;
- spd.copyParams(ssd);
- DC pd;
- pd.copyParams(ssd);
-
- SeqReader r(ssd, spd, pd, w);
+ SeqReader r(chunkSize, numWordIds, w._buffers);
uint64_t wordNum = 1;
uint64_t checkWordNum = 0;
@@ -596,20 +423,12 @@ testWords(const std::string &logname,
checkOffset._fileOffset += counts._bitLength;
checkOffset._accNumDocs += counts._numDocs;
}
- assert(pd.getReadOffset() == w._pFileBitSize);
+ assert(r._decoders.pd.getReadOffset() == w._buffers._pFileBitSize);
LOG(info, "%s: words seqRead test OK", logname.c_str());
}
{
- DC ssd;
- ssd._minChunkDocs = chunkSize;
- ssd._numWordIds = numWordIds;
- DC spd;
- spd.copyParams(ssd);
- DC pd;
- pd.copyParams(ssd);
-
- RandReader rr(ssd, spd, pd, w);
+ RandReader rr(chunkSize, numWordIds, w._buffers);
uint64_t wordNum = 1;
uint64_t checkWordNum = 0;
diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
index 82110d354d3..50abcca96a7 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
+++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.cpp
@@ -63,13 +63,6 @@ PageDict4PageParams::getFileHeaderPad(uint32_t offset)
}
-std::ostream &
-operator<<(std::ostream &stream, const index::PostingListCounts &counts)
-{
- stream << "(d=" << counts._numDocs << ",b=" << counts._bitLength << ")";
- return stream;
-}
-
typedef index::PostingListCounts Counts;
typedef PageDict4StartOffset StartOffset;
@@ -202,19 +195,6 @@ PageDict4SSWriter::addL6Skip(const vespalib::stringref &word,
uint64_t pageNum,
uint32_t sparsePageNum)
{
-#if 0
- LOG(info,
- "addL6SKip, \"%s\" -> wordnum %d, page (%d,%d) startOffset %" PRId64
- ", SS bitOffset %" PRIu64,
- word.c_str(),
- (int) wordNum,
- (int) pageNum,
- (int) sparsePageNum,
- startOffset.empty() ?
- static_cast<int64_t>(0) :
- startOffset[0]._fileOffset,
- _eL6.getWriteOffset());
-#endif
_eL6.writeBits(0, 1); // Selector bit
writeStartOffset(_eL6,
startOffset,
@@ -227,12 +207,6 @@ PageDict4SSWriter::addL6Skip(const vespalib::stringref &word,
size_t lcp = getLCP(word, _l6Word);
vespalib::stringref wordSuffix = word.substr(lcp);
_eL6.smallAlign(8);
-#if 0
- LOG(info,
- "lcp=%d, at offset %" PRIu64 ,
- (int) lcp,
- _eL6.getWriteOffset());
-#endif
_eL6.writeBits(lcp, 8);
_eL6.writeComprBufferIfNeeded();
_eL6.writeString(wordSuffix);
@@ -247,10 +221,6 @@ PageDict4SSWriter::addL6Skip(const vespalib::stringref &word,
_l6StartOffset = startOffset;
_l6Word = word;
_l6WordNum = wordNum;
-#if 0
- LOG(info, "after .. SS bit Offset %" PRId64,
- _eL6.getWriteOffset());
-#endif
}
@@ -261,21 +231,6 @@ addOverflowCounts(const vespalib::stringref &word,
const StartOffset &startOffset,
uint64_t wordNum)
{
-#if 0
- std::ostringstream txtCounts;
- std::ostringstream txtStartOffset;
- std::ostringstream txtL6StartOffset;
- txtCounts << counts;
- txtStartOffset << startOffset;
- txtL6StartOffset << _l6StartOffset;
- LOG(info,
- "addL6Overflow, \"%s\" wordNum %d, counts %s fileoffset %s l6startOffset %s",
- word.c_str(),
- (int) wordNum,
- txtCounts.str().c_str(),
- txtStartOffset.str().c_str(),
- txtL6StartOffset.str().c_str());
-#endif
_eL6.writeBits(1, 1); // Selector bit
writeStartOffset(_eL6,
startOffset,
@@ -504,11 +459,6 @@ PageDict4SPWriter::addL3Skip(const vespalib::stringref &word,
uint64_t wordNum,
uint64_t pageNum)
{
-#if 0
- LOG(info,
- "addL3Skip(\"%s\"), wordNum=%d pageNum=%d",
- word.c_str(), (int) wordNum, (int) pageNum);
-#endif
assert(_l3WordOffset == _words.size());
/*
* Update notion of previous size, converting tentative writes to
@@ -530,10 +480,6 @@ PageDict4SPWriter::addL3Skip(const vespalib::stringref &word,
_l3StartOffset,
K_VALUE_COUNTFILE_L3_FILEOFFSET,
K_VALUE_COUNTFILE_L3_ACCNUMDOCS);
-#if 0
- LOG(info,
- "Adding l3 delta %d", (int) (wordNum - _l3WordNum));
-#endif
_eL3.encodeExpGolomb(wordNum - _l3WordNum,
K_VALUE_COUNTFILE_L3_WORDNUM);
_eL3.writeComprBufferIfNeeded();
@@ -555,7 +501,7 @@ PageDict4SPWriter::addL3Skip(const vespalib::stringref &word,
// Flush existing full writes.
flushPage();
- // Compensate for elided entry.
+ // Promote elided L3 entry to L6 entry
_l6Word = word;
_l6StartOffset = startOffset;
_l6WordNum = wordNum;
@@ -572,11 +518,6 @@ PageDict4SPWriter::addL3Skip(const vespalib::stringref &word,
void
PageDict4SPWriter::addL4Skip(size_t &lcp)
{
-#if 0
- LOG(info,
- "addL4Skip(\"%s\")",
- _l3Word.c_str());
-#endif
size_t tlcp = getLCP(_l3Word, _l4Word);
assert(tlcp <= lcp);
if (tlcp < lcp)
@@ -613,11 +554,6 @@ PageDict4SPWriter::addL4Skip(size_t &lcp)
void
PageDict4SPWriter::addL5Skip(size_t &lcp)
{
-#if 0
- LOG(info,
- "addL5Skip(\"%s\")",
- _l3Word.c_str());
-#endif
size_t tlcp = getLCP(_l3Word, _l5Word);
assert(tlcp <= lcp);
if (tlcp < lcp)
@@ -838,15 +774,6 @@ PageDict4PWriter::
addCounts(const vespalib::stringref &word,
const Counts &counts)
{
-#if 0
- std::ostringstream txtcounts;
- txtcounts << counts;
- LOG(info,
- "addCounts(\"%s\", %s), wordNum=%d",
- word.c_str(),
- txtcounts.str().c_str(),
- (int) _wordNum);
-#endif
assert(_countsWordOffset == _words.size());
size_t lcp = getLCP(_pendingCountsWord, _countsWord);
if (_l1StrideCheck >= getL1SkipStride())
@@ -858,14 +785,6 @@ addCounts(const vespalib::stringref &word,
if (eCountsOffset + _l1Size + _l2Size + _headerSize +
8 * (_countsWordOffset + 2 + _pendingCountsWord.size() - lcp) >
getPageBitSize()) {
-#if 0
- LOG(info,
- "Backtrack: eCountsOffset=%d, l1size=%d, l2size=%d, hdrsize=%d",
- (int) eCountsOffset,
- (int) _l1Size,
- (int) _l2Size,
- (int) _headerSize);
-#endif
if (_l1StrideCheck == 0u) {
_l1Size = _prevL1Size; // Undo L1
_l2Size = _prevL2Size; // Undo L2
@@ -890,11 +809,6 @@ addCounts(const vespalib::stringref &word,
_l3WordNum,
getPageNum());
resetPage();
-#if 0
- std::ostringstream txtoffsets;
- txtoffsets << _countsStartOffset;
- LOG(info, "countsStartOffsets=%s", txtoffsets.str().c_str());
-#endif
return;
}
}
@@ -902,11 +816,6 @@ addCounts(const vespalib::stringref &word,
++_countsEntries;
++_l1StrideCheck;
_countsStartOffset.adjust(counts);
-#if 0
- std::ostringstream txtoffsets;
- txtoffsets << _countsStartOffset;
- LOG(info, "countsStartOffsets=%s", txtoffsets.str().c_str());
-#endif
_countsWord = _pendingCountsWord;
_countsWordOffset = _words.size();
_pendingCountsWord = word;
@@ -936,10 +845,6 @@ PageDict4PWriter::addOverflowCounts(const vespalib::stringref &word,
e.smallAlign(64);
e.writeComprBufferIfNeeded();
e.writeBits(_wordNum, 64); // Identifies overflow for later read
-#if 0
- LOG(info,
- "AddOverflowCounts wordnum %d", (int) _wordNum);
-#endif
uint32_t alignedHeaderSize = (_headerSize + 63) & -64;
uint32_t padding = getPageBitSize() - alignedHeaderSize - 64;
e.padBits(padding);
@@ -963,14 +868,6 @@ PageDict4PWriter::addL1Skip(size_t &lcp)
if (tlcp < lcp)
lcp = tlcp;
_l1StrideCheck = 0u;
-#if 0
- LOG(info,
- "addL1SKip(\"%s\"), lcp=%d, offset=%d -> %d",
- _pendingCountsWord.c_str(),
- (int) lcp,
- (int) _l1WordOffset,
- (int) _countsWordOffset);
-#endif
_eL1.encodeExpGolomb(_countsWordOffset - _l1WordOffset,
K_VALUE_COUNTFILE_L1_WORDOFFSET);
_eL1.writeComprBufferIfNeeded();
@@ -1000,14 +897,6 @@ PageDict4PWriter::addL2Skip(size_t &lcp)
if (tlcp < lcp)
lcp = tlcp;
_l2StrideCheck = 0;
-#if 0
- LOG(info,
- "addL2SKip(\"%s\"), lcp=%d, offset=%d -> %d",
- _pendingCountsWord.c_str(),
- (int) lcp,
- (int) _l2WordOffset,
- (int) _countsWordOffset);
-#endif
_eL2.encodeExpGolomb(_countsWordOffset - _l2WordOffset,
K_VALUE_COUNTFILE_L2_WORDOFFSET);
_eL2.writeComprBufferIfNeeded();
@@ -1101,12 +990,6 @@ PageDict4SSReader::setup(DC &ssd)
DC dL6;
-#if 0
- LOG(info,
- "comprBuf=%p, comprBufSize=%d",
- static_cast<const void *>(_cb._comprBuf),
- (int) _cb._comprBufSize);
-#endif
setDecoderPosition(dL6, _cb, _ssStartOffset);
dL6.copyParams(_ssd);
@@ -1127,12 +1010,6 @@ PageDict4SSReader::setup(DC &ssd)
bool overflow = false;
while (l6Offset < _ssFileBitLen) {
-#if 0
- LOG(info,
- "L6Offset=%" PRIu32 ", bitLen=%" PRIu64,
- l6Offset,
- _ssFileBitLen);
-#endif
UC64_DECODECONTEXT(o);
uint32_t length;
uint64_t val64;
@@ -1166,11 +1043,6 @@ PageDict4SSReader::setup(DC &ssd)
UC64_DECODEEXPGOLOMB_NS(o,
K_VALUE_COUNTFILE_L6_WORDNUM,
EC);
-#if 0
- LOG(info,
- "Bumping l6wordnum from %d to %d (delta %d)",
- (int) l6WordNum, (int) (l6WordNum + val64) , (int) val64);
-#endif
l6WordNum += val64;
UC64_DECODECONTEXT_STORE(o, dL6._);
dL6.smallAlign(8);
@@ -1182,10 +1054,6 @@ PageDict4SSReader::setup(DC &ssd)
word += reinterpret_cast<const char *>(bytes);
dL6.setByteCompr(bytes + word.size() + 1 - lcp);
if (overflow) {
-#if 0
- LOG(info,
- "AddOverflowRef2 wordnum %d", (int) (l6WordNum - 1));
-#endif
_overflows.push_back(OverflowRef(l6WordNum - 1, _l7.size()));
dL6.readCounts(counts);
startOffset.adjust(counts);
@@ -1199,18 +1067,6 @@ PageDict4SSReader::setup(DC &ssd)
++sparsePageNum;
UC64_DECODECONTEXT_STORE(o, dL6._);
}
-#if 0
- std::ostringstream txtfileoffset;
- txtfileoffset << startOffset;
- LOG(info,
- "ssreader::setup "
- "word=%s, l6offset=%d->%d, startOffsets=%s overflow=%s",
- word.c_str(),
- (int) l6Offset,
- (int) dL6.getReadOffset(),
- txtfileoffset.str().c_str(),
- overflow ? "true" : "false");
-#endif
++l7StrideCheck;
l6Offset = dL6.getReadOffset();
}
@@ -1281,14 +1137,6 @@ lookup(const vespalib::stringref &key)
l6WordNum = l7e._l7WordNum;
}
-#if 0
- LOG(info,
- "sslookup1: l6WordNum=%d, l6Word=\"%s\", key=\"%s\", l6Offset=%d",
- (int) l6WordNum,
- l6Word.c_str(),
- key.c_str(),
- (int) l6Offset);
-#endif
setDecoderPosition(dL6, _cb, l6Offset);
@@ -1325,13 +1173,6 @@ lookup(const vespalib::stringref &key)
word += reinterpret_cast<const char *>(bytes);
dL6.setByteCompr(bytes + word.size() + 1 - lcp);
if (overflow) {
-#if 0
- LOG(info,
- "sslookup: wordNum=%d, word=\"%s\", key=\"%s\"",
- (int) wordNum,
- word.c_str(),
- key.c_str());
-#endif
bool l6NotLessThanKey = !(word < key);
if (l6NotLessThanKey) {
if (key == word) {
@@ -1398,13 +1239,6 @@ lookupOverflow(uint64_t wordNum) const
assert(l7Ref < _l7.size());
const vespalib::string &word = _l7[l7Ref]._l7Word;
-#if 0
- LOG(info,
- "lookupOverflow: wordNum %d -> word %s, next l7 Pos is %d",
- (int) wordNum,
- word.c_str(),
- (int) l7Ref);
-#endif
uint64_t l6Offset = _ssStartOffset;
StartOffset startOffset;
if (l7Ref > 0) {
@@ -1428,18 +1262,6 @@ lookupOverflow(uint64_t wordNum) const
dL6.copyParams(_ssd);
setDecoderPosition(dL6, _cb, l6Offset);
-#if 0
- std::ostringstream txtStartOffset;
- std::ostringstream txtL6StartOffset;
- txtStartOffset << startOffset;
- txtL6StartOffset << l6StartOffset;
- LOG(info,
- "Lookupoverflow l6Offset=%d, l6fileoffset=%s, fileoffset=%s",
- (int) l6Offset,
- txtL6StartOffset.str().c_str(),
- txtStartOffset.str().c_str());
-#endif
-
UC64_DECODECONTEXT(o);
uint32_t length;
const bool bigEndian = true;
@@ -1474,16 +1296,6 @@ lookupOverflow(uint64_t wordNum) const
(void) lcp;
Counts counts;
dL6.readCounts(counts);
-#if 0
- std::ostringstream txtCounts;
- txtStartOffset.str("");
- txtStartOffset << startOffset;
- txtCounts << counts;
- LOG(info,
- "Lookupoverflow fileoffset=%s, counts=%s",
- txtStartOffset.str().c_str(),
- txtCounts.str().c_str());
-#endif
res._overflow = true;
res._counts = counts;
res._startOffset = startOffset;
@@ -1911,7 +1723,12 @@ PageDict4Reader::PageDict4Reader(const SSReader &ssReader,
_spwc(),
_spwe(),
_ssd(),
- _wordNum(1u)
+ _wordNum(1u),
+ _l1SkipChecks(),
+ _l2SkipChecks(),
+ _l3SkipChecks(),
+ _l4SkipChecks(),
+ _l5SkipChecks()
{
}
@@ -1929,8 +1746,8 @@ PageDict4Reader::setup()
_spd.skipBits(getFileHeaderPad(_ssReader._spStartOffset));
assert(_pFileBitLen >= _pd.getReadOffset());
if (_pFileBitLen > _pd.getReadOffset()) {
- setupPage();
setupSPage();
+ setupPage();
}
const ComprBuffer &sscb = _ssReader._cb;
@@ -1943,25 +1760,62 @@ PageDict4Reader::~PageDict4Reader()
{
}
+namespace
+{
+
+template <typename CheckVector>
+void checkWordOffset(CheckVector &skip, uint32_t &skipAdjust, uint32_t wordOffset, uint32_t wordEntryLen)
+{
+ if (skip.valid() && skip->wordOffset + skipAdjust <= wordOffset) {
+ assert(skip->wordOffset + skipAdjust == wordOffset);
+ skipAdjust += wordEntryLen;
+ skip.step();
+ }
+}
+
+}
+
+
+template <typename Entry1, typename Entry2>
+void
+PageDict4Reader::checkWordOffsets(const std::vector<char> &words,
+ CheckVector<Entry1> &skip1,
+ CheckVector<Entry2> &skip2)
+{
+ skip1.setup();
+ skip2.setup();
+ uint32_t wordOffset = 0;
+ uint32_t skip1Adjust = 0;
+ uint32_t skip2Adjust = 0;
+ auto c = words.cbegin();
+ auto ce = words.cend();
+ while (c != ce) {
+ wordOffset = c - words.cbegin();
+ ++c; // skip lcp
+ assert(c != ce);
+ while (*c != '\0') {
+ ++c;
+ assert(c != ce);
+ }
+ assert(c != ce);
+ ++c;
+ uint32_t wordEntryLen = c - words.cbegin() - wordOffset;
+ checkWordOffset(skip1, skip1Adjust, wordOffset, wordEntryLen);
+ checkWordOffset(skip2, skip2Adjust, wordOffset, wordEntryLen);
+ }
+ assert(!skip1.valid());
+ assert(!skip2.valid());
+}
void
PageDict4Reader::setupPage()
{
-#if 0
- LOG(info,
- "setupPage(%ld), "
- (long int) _pd.getReadOffset());
-#endif
uint32_t l2Size = _pd.readBits(15);
uint32_t l1Size = _pd.readBits(15);
uint32_t countsEntries = _pd.readBits(15);
uint32_t wordsSize = _pd.readBits(12);
_countsResidue = countsEntries;
-#if 0
- _pd.skipBits(l2Size + l1Size);
- Counts counts;
-#else
if (countsEntries == 0 && l1Size == 0 && l2Size == 0) {
_pd.smallAlign(64);
_overflowPage = true;
@@ -1974,138 +1828,214 @@ PageDict4Reader::setupPage()
uint64_t beforePos = _pd.getReadOffset();
Counts counts;
- StartOffset startOffset;
+ _l2SkipChecks.clear();
+ L2SkipCheck l2SkipCheck(_startOffset);
while (l2Residue > 0) {
UC64_DECODECONTEXT(o);
uint32_t length;
+ uint64_t val64;
const bool bigEndian = true;
UC64_DECODECONTEXT_LOAD(o, _pd._);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_WORDOFFSET, EC);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_WORDOFFSET, EC);
+ l2SkipCheck.wordOffset += val64;
UC64_DECODECONTEXT_STORE(o, _pd._);
readStartOffset(_pd,
- startOffset,
+ l2SkipCheck.startOffset,
K_VALUE_COUNTFILE_L2_FILEOFFSET,
K_VALUE_COUNTFILE_L2_ACCNUMDOCS);
UC64_DECODECONTEXT_LOAD(o, _pd._);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_COUNTOFFSET, EC);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_L1OFFSET, EC);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_COUNTOFFSET, EC);
+ l2SkipCheck.countOffset += val64;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L2_L1OFFSET, EC);
+ l2SkipCheck.l1Offset += val64;
UC64_DECODECONTEXT_STORE(o, _pd._);
--l2Residue;
+ _l2SkipChecks.push_back(l2SkipCheck);
}
+ _l2SkipChecks.setup();
assert(_pd.getReadOffset() == beforePos + l2Size);
+ _l1SkipChecks.clear();
+ L1SkipCheck l1SkipCheck(_startOffset);
while (l1Residue > 0) {
UC64_DECODECONTEXT(o);
uint32_t length;
+ uint64_t val64;
const bool bigEndian = true;
UC64_DECODECONTEXT_LOAD(o, _pd._);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_WORDOFFSET, EC);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_WORDOFFSET, EC);
+ l1SkipCheck.wordOffset += val64;
UC64_DECODECONTEXT_STORE(o, _pd._);
readStartOffset(_pd,
- startOffset,
+ l1SkipCheck.startOffset,
K_VALUE_COUNTFILE_L1_FILEOFFSET,
K_VALUE_COUNTFILE_L1_ACCNUMDOCS);
UC64_DECODECONTEXT_LOAD(o, _pd._);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_COUNTOFFSET, EC);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L1_COUNTOFFSET, EC);
+ l1SkipCheck.countOffset += val64;
UC64_DECODECONTEXT_STORE(o, _pd._);
--l1Residue;
+ _l1SkipChecks.push_back(l1SkipCheck);
+ if (_l2SkipChecks.valid()) {
+ uint64_t l1CheckOffset = beforePos + l2Size + _l2SkipChecks->l1Offset;
+ uint64_t l1Offset = _pd.getReadOffset();
+ if (l1Offset >= l1CheckOffset) {
+ assert(l1Offset == l1CheckOffset);
+ assert(_l2SkipChecks->check(l1SkipCheck));
+ _l2SkipChecks.step();
+ }
+ }
}
+ assert(!_l2SkipChecks.valid());
+ _l1SkipChecks.setup();
assert(_pd.getReadOffset() == beforePos + l2Size + l1Size);
(void) beforePos;
-#endif
_counts.clear();
+ StartOffset startOffset(_startOffset);
while (countsEntries > 0) {
_pd.readCounts(counts);
_counts.push_back(counts);
+ startOffset.adjust(counts);
--countsEntries;
+ if (_l1SkipChecks.valid()) {
+ uint64_t countsCheckOffset = beforePos + l2Size + l1Size + _l1SkipChecks->countOffset;
+ uint64_t countsOffset = _pd.getReadOffset();
+ if (countsOffset >= countsCheckOffset) {
+ assert(countsOffset == countsCheckOffset);
+ assert(startOffset == _l1SkipChecks->startOffset);
+ _l1SkipChecks.step();
+ }
+ }
+ }
+ assert(!_l1SkipChecks.valid());
+ if (_l3SkipChecks.valid()) {
+ assert(_l3SkipChecks->startOffset == startOffset);
+ assert(_l3SkipChecks->wordNum == _wordNum + _countsResidue);
+ _l3SkipChecks.step();
}
_cc = _counts.begin();
_ce = _counts.end();
uint32_t pageOffset = _pd.getReadOffset() & (getPageBitSize() - 1);
- uint32_t padding = getPageBitSize() - wordsSize * 8 - pageOffset;
+ uint32_t padding = (getPageBitSize() - wordsSize * 8 - pageOffset) & (getPageBitSize() - 1);
_pd.skipBits(padding);
_words.resize(wordsSize);
_pd.readBytes(reinterpret_cast<uint8_t *>(&_words[0]), wordsSize);
_wc = _words.begin();
_we = _words.end();
+ checkWordOffsets(_words, _l1SkipChecks, _l2SkipChecks);
}
void
PageDict4Reader::setupSPage()
{
-#if 0
- LOG(info, "setupSPage(%d),", (int) _spd.getReadOffset());
-#endif
uint32_t l5Size = _spd.readBits(15);
uint32_t l4Size = _spd.readBits(15);
uint32_t l3Entries = _spd.readBits(15);
uint32_t wordsSize = _spd.readBits(12);
_l3Residue = l3Entries;
-
-#if 0
- _spd.skipBits(l5Size + l4Size);
-#else
+ assert(!_l3SkipChecks.valid());
assert(l3Entries > 0);
uint32_t l4Residue = getL4Entries(l3Entries);
uint32_t l5Residue = getL5Entries(l4Residue);
uint64_t beforePos = _spd.getReadOffset();
- StartOffset startOffset;
+ _l5SkipChecks.clear();
+ L5SkipCheck l5SkipCheck(_startOffset, _wordNum);
while (l5Residue > 0) {
UC64_DECODECONTEXT(o);
uint32_t length;
+ uint64_t val64;
const bool bigEndian = true;
UC64_DECODECONTEXT_LOAD(o, _spd._);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDOFFSET, EC);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDOFFSET, EC);
+ l5SkipCheck.wordOffset += val64;
UC64_DECODECONTEXT_STORE(o, _spd._);
readStartOffset(_spd,
- startOffset,
+ l5SkipCheck.startOffset,
K_VALUE_COUNTFILE_L5_FILEOFFSET,
K_VALUE_COUNTFILE_L5_ACCNUMDOCS);
UC64_DECODECONTEXT_LOAD(o, _spd._);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDNUM, EC);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L3OFFSET, EC);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L4OFFSET, EC);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_WORDNUM, EC);
+ l5SkipCheck.wordNum += val64;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L3OFFSET, EC);
+ l5SkipCheck.l3Offset += val64;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L5_L4OFFSET, EC);
+ l5SkipCheck.l4Offset += val64;
UC64_DECODECONTEXT_STORE(o, _spd._);
--l5Residue;
+ _l5SkipChecks.push_back(l5SkipCheck);
}
+ _l5SkipChecks.setup();
assert(_spd.getReadOffset() == beforePos + l5Size);
+ _l4SkipChecks.clear();
+ L4SkipCheck l4SkipCheck(_startOffset, _wordNum);
while (l4Residue > 0) {
UC64_DECODECONTEXT(o);
uint32_t length;
+ uint64_t val64;
const bool bigEndian = true;
UC64_DECODECONTEXT_LOAD(o, _spd._);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDOFFSET, EC);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDOFFSET, EC);
+ l4SkipCheck.wordOffset += val64;
UC64_DECODECONTEXT_STORE(o, _spd._);
readStartOffset(_spd,
- startOffset,
+ l4SkipCheck.startOffset,
K_VALUE_COUNTFILE_L4_FILEOFFSET,
K_VALUE_COUNTFILE_L4_ACCNUMDOCS);
UC64_DECODECONTEXT_LOAD(o, _spd._);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDNUM, EC);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_L3OFFSET, EC);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_WORDNUM, EC);
+ l4SkipCheck.wordNum += val64;
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L4_L3OFFSET, EC);
+ l4SkipCheck.l3Offset += val64;
UC64_DECODECONTEXT_STORE(o, _spd._);
--l4Residue;
+ _l4SkipChecks.push_back(l4SkipCheck);
+ if (_l5SkipChecks.valid()) {
+ uint64_t l4CheckOffset = beforePos + l5Size + _l5SkipChecks->l4Offset;
+ uint64_t l4Offset = _spd.getReadOffset();
+ if (l4Offset >= l4CheckOffset) {
+ assert(l4Offset == l4CheckOffset);
+ assert(_l5SkipChecks->check(l4SkipCheck));
+ _l5SkipChecks.step();
+ }
+ }
}
+ assert(!_l5SkipChecks.valid());
+ _l4SkipChecks.setup();
assert(_spd.getReadOffset() == beforePos + l5Size + l4Size);
(void) l4Size;
(void) l5Size;
(void) beforePos;
-#endif
+ _l3SkipChecks.clear();
+ L3SkipCheck l3SkipCheck(_startOffset, _wordNum);
while (l3Entries > 1) {
readStartOffset(_spd,
- startOffset,
+ l3SkipCheck.startOffset,
K_VALUE_COUNTFILE_L3_FILEOFFSET,
K_VALUE_COUNTFILE_L3_ACCNUMDOCS);
UC64_DECODECONTEXT(o);
uint32_t length;
+ uint64_t val64;
const bool bigEndian = true;
UC64_DECODECONTEXT_LOAD(o, _spd._);
- UC64_SKIPEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L3_WORDNUM, EC);
+ UC64_DECODEEXPGOLOMB_NS(o, K_VALUE_COUNTFILE_L3_WORDNUM, EC);
+ l3SkipCheck.wordNum += val64;
UC64_DECODECONTEXT_STORE(o, _spd._);
--l3Entries;
+ _l3SkipChecks.push_back(l3SkipCheck);
+ if (_l4SkipChecks.valid()) {
+ uint64_t l3CheckOffset = beforePos + l5Size + l4Size + _l4SkipChecks->l3Offset;
+ uint64_t l3Offset = _spd.getReadOffset();
+ if (l3Offset >= l3CheckOffset) {
+ assert(l3Offset == l3CheckOffset);
+ assert(_l4SkipChecks->check(l3SkipCheck));
+ _l4SkipChecks.step();
+ }
+ }
}
+ assert(!_l4SkipChecks.valid());
+ _l3SkipChecks.setup();
uint32_t pageOffset = _spd.getReadOffset() & (getPageBitSize() - 1);
uint32_t padding = getPageBitSize() - wordsSize * 8 - pageOffset;
_spd.skipBits(padding);
@@ -2113,6 +2043,7 @@ PageDict4Reader::setupSPage()
_spd.readBytes(reinterpret_cast<uint8_t *>(&_spwords[0]), wordsSize);
_spwc = _spwords.begin();
_spwe = _spwords.end();
+ checkWordOffsets(_spwords, _l4SkipChecks, _l5SkipChecks);
}
@@ -2188,13 +2119,6 @@ PageDict4Reader::decodeSSWord(vespalib::string &word)
word += reinterpret_cast<const char *>(bytes);
_ssd.setByteCompr(bytes + word.size() + 1 - lcp);
_lastSSWord = word;
-#if 0
- LOG(info,
- "word is %s LCP %d, overflow=%s",
- word.c_str(),
- (int) lcp,
- overflow ? "true" : "false");
-#endif
if (overflow) {
Counts counts;
_ssd.readCounts(counts);
@@ -2243,17 +2167,18 @@ PageDict4Reader::readCounts(vespalib::string &word,
--_l3Residue;
}
--_countsResidue;
+ wordNum = _wordNum++;
if (_countsResidue == 0) {
assert((_pd.getReadOffset() & (getPageBitSize() - 1)) == 0);
if (_pd.getReadOffset() < _pFileBitLen) {
- setupPage();
- if (_l3Residue == 0)
+ if (_l3Residue == 0) {
setupSPage();
+ }
+ setupPage();
} else {
assert(_pd.getReadOffset() == _pFileBitLen);
}
}
- wordNum = _wordNum++;
} else if (_overflowPage) {
readOverflowCounts(word, counts);
_overflowPage = false;
@@ -2266,15 +2191,16 @@ PageDict4Reader::readCounts(vespalib::string &word,
assert(tword == word);
--_l3Residue;
_lastWord = word;
+ wordNum = _wordNum++;
_pd.align(getPageBitSize());
if (_pd.getReadOffset() < _pFileBitLen) {
- setupPage();
- if (_l3Residue == 0)
+ if (_l3Residue == 0) {
setupSPage();
+ }
+ setupPage();
} else {
assert(_pd.getReadOffset() == _pFileBitLen);
}
- wordNum = _wordNum++;
} else {
// Mark end of file.
word.clear();
@@ -2298,25 +2224,14 @@ PageDict4Reader::readOverflowCounts(vespalib::string &word,
word = wtsslr._lastWord;
counts = wtsslr._counts;
-#if 0
- std::ostringstream txtCounts;
- std::ostringstream txtStartOffset;
- std::ostringstream txtLRStartOffset;
-
- txtCounts << counts;
- txtStartOffset << _startOffset;
- txtLRStartOffset << wtsslr._startOffset;
- LOG(info,
- "readOverflowCounts _wordNum=%" PRIu64
- ", counts=%s, startOffset=%s (should be %s)",
- _wordNum,
- txtCounts.str().c_str(),
- txtLRStartOffset.str().c_str(),
- txtStartOffset.str().c_str());
-#endif
-
+ assert(wordNum == _wordNum);
assert(wtsslr._startOffset == _startOffset);
_startOffset.adjust(counts);
+ if (_l3SkipChecks.valid()) {
+ assert(_l3SkipChecks->startOffset == _startOffset);
+ assert(_l3SkipChecks->wordNum == _wordNum + 1);
+ _l3SkipChecks.step();
+ }
}
} // namespace bitcompression
diff --git a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h
index 47f2354bcc6..717702d4ef7 100644
--- a/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h
+++ b/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h
@@ -201,7 +201,7 @@ private:
uint32_t _l4WordOffset; // Offset for last L4 word written
uint32_t _l5WordOffset; // Offset for last L5 word written
- // file offsets
+ // Offsets in data files for last L3 entry
StartOffset _l3StartOffset;
// Offsets in data files for last L4 entry
@@ -213,7 +213,7 @@ private:
// Offsets in data files for last L6 entry
StartOffset _l6StartOffset;
- uint64_t _l3WordNum; // word number next L3 entry to write
+ uint64_t _l3WordNum; // word number last L3 entry
uint64_t _l4WordNum; // word number last L4 entry
uint64_t _l5WordNum; // word number last L5 entry
uint64_t _l6WordNum; // word number last L6 entry
@@ -663,6 +663,106 @@ public:
StartOffset _startOffset;
bool _overflowPage;
typedef std::vector<Counts> PCV;
+ struct L1SkipCheck
+ {
+ uint32_t wordOffset;
+ StartOffset startOffset;
+ uint32_t countOffset;
+ L1SkipCheck(const StartOffset &startOffset_)
+ : wordOffset(0),
+ startOffset(startOffset_),
+ countOffset(0)
+ {
+ }
+ };
+ struct L2SkipCheck : public L1SkipCheck
+ {
+ uint32_t l1Offset;
+ L2SkipCheck(const StartOffset &startOffset_)
+ : L1SkipCheck(startOffset_),
+ l1Offset(0)
+ {
+ }
+
+ bool check(const L1SkipCheck &rhs) const {
+ return startOffset == rhs.startOffset &&
+ countOffset == rhs.countOffset;
+ }
+ };
+ struct L3SkipCheck
+ {
+ StartOffset startOffset;
+ uint64_t wordNum;
+ L3SkipCheck(const StartOffset &startOffset_, uint64_t wordNum_)
+ : startOffset(startOffset_),
+ wordNum(wordNum_)
+ {
+ }
+ };
+ struct L4SkipCheck : public L3SkipCheck
+ {
+ uint32_t wordOffset;
+ uint32_t l3Offset;
+ L4SkipCheck(const StartOffset &startOffset_, uint64_t wordNum_)
+ : L3SkipCheck(startOffset_, wordNum_),
+ wordOffset(0),
+ l3Offset(0)
+ {
+ }
+ bool check(const L3SkipCheck &rhs) const {
+ return startOffset == rhs.startOffset &&
+ wordNum == rhs.wordNum;
+ }
+ };
+ struct L5SkipCheck : public L4SkipCheck
+ {
+ uint32_t l4Offset;
+ L5SkipCheck(const StartOffset &startOffset_, uint64_t wordNum_)
+ : L4SkipCheck(startOffset_, wordNum_),
+ l4Offset(0)
+ {
+ }
+ bool check(const L4SkipCheck &rhs) const {
+ return startOffset == rhs.startOffset &&
+ wordNum == rhs.wordNum &&
+ l3Offset == rhs.l3Offset;
+ }
+ };
+ template <typename Element>
+ class CheckVector
+ {
+ using Vector = std::vector<Element>;
+ Vector _vector;
+ typename Vector::const_iterator _cur;
+ typename Vector::const_iterator _end;
+ public:
+ CheckVector()
+ : _vector(),
+ _cur(),
+ _end()
+ {
+ }
+ void clear() {
+ _vector.clear();
+ }
+ void setup() {
+ _cur = _vector.cbegin();
+ _end = _vector.cend();
+ }
+ bool valid() const { return _cur != _end; }
+ const Element *operator->() const { return _cur.operator->(); }
+ void step() {
+ ++_cur;
+ }
+ void push_back(const Element &element) {
+ _vector.push_back(element);
+ }
+ };
+ template <typename Entry1, typename Entry2>
+ void
+ checkWordOffsets(const std::vector<char> &words,
+ CheckVector<Entry1> &skip1,
+ CheckVector<Entry2> &skip2);
PCV _counts;
PCV::const_iterator _cc;
PCV::const_iterator _ce;
@@ -681,6 +781,11 @@ public:
DC _ssd;
uint64_t _wordNum;
+ CheckVector<L1SkipCheck> _l1SkipChecks;
+ CheckVector<L2SkipCheck> _l2SkipChecks;
+ CheckVector<L3SkipCheck> _l3SkipChecks;
+ CheckVector<L4SkipCheck> _l4SkipChecks;
+ CheckVector<L5SkipCheck> _l5SkipChecks;
PageDict4Reader(const SSReader &ssReader,
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt
index 5b698234b90..f4d8853aeca 100644
--- a/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/test/diskindex/CMakeLists.txt
@@ -1,6 +1,11 @@
# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
vespa_add_library(searchlib_searchlib_test_diskindex OBJECT
SOURCES
+ pagedict4_decoders.cpp
+ pagedict4_encoders.cpp
+ pagedict4_mem_seq_reader.cpp
+ pagedict4_mem_rand_reader.cpp
+ pagedict4_mem_writer.cpp
threelevelcountbuffers.cpp
testdiskindex.cpp
DEPENDS
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_decoders.cpp b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_decoders.cpp
new file mode 100644
index 00000000000..122ce582850
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_decoders.cpp
@@ -0,0 +1,20 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "pagedict4_decoders.h"
+
+namespace search::diskindex::test {
+
+PageDict4Decoders::PageDict4Decoders(uint32_t chunkSize, uint64_t numWordIds)
+ : ssd(),
+ spd(),
+ pd()
+{
+ ssd._minChunkDocs = chunkSize;
+ ssd._numWordIds = numWordIds;
+ spd.copyParams(ssd);
+ pd.copyParams(ssd);
+}
+
+PageDict4Decoders::~PageDict4Decoders() = default;
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_decoders.h b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_decoders.h
new file mode 100644
index 00000000000..c92364ba585
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_decoders.h
@@ -0,0 +1,24 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/bitcompression/countcompression.h>
+
+namespace search::diskindex::test {
+
+/*
+ * Class for writing to memory based pagedict4 structure
+ */
+struct PageDict4Decoders
+{
+ using DC = search::bitcompression::PostingListCountFileDecodeContext;
+
+ DC ssd;
+ DC spd;
+ DC pd;
+
+ PageDict4Decoders(uint32_t chunkSize, uint64_t numWordIds);
+ ~PageDict4Decoders();
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_encoders.cpp b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_encoders.cpp
new file mode 100644
index 00000000000..9d5cc7be9d4
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_encoders.cpp
@@ -0,0 +1,20 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "pagedict4_encoders.h"
+
+namespace search::diskindex::test {
+
+PageDict4Encoders::PageDict4Encoders(uint32_t chunkSize, uint64_t numWordIds)
+ : sse(),
+ spe(),
+ pe()
+{
+ sse._minChunkDocs = chunkSize;
+ sse._numWordIds = numWordIds;
+ spe.copyParams(sse);
+ pe.copyParams(sse);
+}
+
+PageDict4Encoders::~PageDict4Encoders() = default;
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_encoders.h b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_encoders.h
new file mode 100644
index 00000000000..78885150eff
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_encoders.h
@@ -0,0 +1,24 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/bitcompression/countcompression.h>
+
+namespace search::diskindex::test {
+
+/*
+ * Class for writing to memory based pagedict4 structure
+ */
+struct PageDict4Encoders
+{
+ using EC = search::bitcompression::PostingListCountFileEncodeContext;
+
+ EC sse;
+ EC spe;
+ EC pe;
+
+ PageDict4Encoders(uint32_t chunkSize, uint64_t numWordIds);
+ ~PageDict4Encoders();
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.cpp b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.cpp
new file mode 100644
index 00000000000..fdee620d12d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.cpp
@@ -0,0 +1,73 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "pagedict4_mem_rand_reader.h"
+
+namespace search::diskindex::test {
+
+PageDict4MemRandReader::PageDict4MemRandReader(uint32_t chunkSize, uint64_t numWordIds,
+ ThreeLevelCountWriteBuffers &wb)
+ : _decoders(chunkSize, numWordIds),
+ _buffers(_decoders.ssd, _decoders.spd, _decoders.pd, wb),
+ _ssr(_buffers._rcssd,
+ wb._ssHeaderLen, wb._ssFileBitSize,
+ wb._spHeaderLen, wb._spFileBitSize,
+ wb._pHeaderLen, wb._pFileBitSize),
+ _spData(static_cast<const char *>(_buffers._rcspd._comprBuf)),
+ _pData(static_cast<const char *>(_buffers._rcpd._comprBuf)),
+ _pageSize(search::bitcompression::PageDict4PageParams::getPageByteSize())
+{
+ _ssr.setup(_decoders.ssd);
+}
+
+PageDict4MemRandReader::~PageDict4MemRandReader() = default;
+
+bool
+PageDict4MemRandReader::lookup(const std::string &key, uint64_t &wordNum,
+ PostingListCounts &counts, StartOffset &offsets)
+{
+ PageDict4SSLookupRes sslr;
+
+ sslr = _ssr.lookup(key);
+ if (!sslr._res) {
+ counts.clear();
+ offsets = sslr._l6StartOffset;
+ wordNum = sslr._l6WordNum;
+ return false;
+ }
+
+ if (sslr._overflow) {
+ wordNum = sslr._l6WordNum;
+ counts = sslr._counts;
+ offsets = sslr._startOffset;
+ return true;
+ }
+ PageDict4SPLookupRes splr;
+ splr.lookup(_ssr,
+ _spData +
+ _pageSize * sslr._sparsePageNum,
+ key,
+ sslr._l6Word,
+ sslr._lastWord,
+ sslr._l6StartOffset,
+ sslr._l6WordNum,
+ sslr._pageNum);
+
+ PageDict4PLookupRes plr;
+ plr.lookup(_ssr,
+ _pData + _pageSize * splr._pageNum,
+ key,
+ splr._l3Word,
+ splr._lastWord,
+ splr._l3StartOffset,
+ splr._l3WordNum);
+ wordNum = plr._wordNum;
+ offsets = plr._startOffset;
+ if (plr._res) {
+ counts = plr._counts;
+ return true;
+ }
+ counts.clear();
+ return false;
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.h b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.h
new file mode 100644
index 00000000000..f51c6bfb6da
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_rand_reader.h
@@ -0,0 +1,38 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "pagedict4_decoders.h"
+#include "threelevelcountbuffers.h"
+#include <vespa/searchlib/bitcompression/pagedict4.h>
+
+namespace search::diskindex::test {
+
+/*
+ * Class for performing random lookups in memory based pagedict4 structure
+ */
+class PageDict4MemRandReader
+{
+public:
+ using PageDict4SSReader = search::bitcompression::PageDict4SSReader;
+ using PageDict4SSLookupRes = search::bitcompression::PageDict4SSLookupRes;
+ using PageDict4SPLookupRes = search::bitcompression::PageDict4SPLookupRes;
+ using PageDict4PLookupRes = search::bitcompression::PageDict4PLookupRes;
+ using StartOffset = search::bitcompression::PageDict4StartOffset;
+ using PostingListCounts = search::index::PostingListCounts;
+
+ PageDict4Decoders _decoders;
+ ThreeLevelCountReadBuffers _buffers;
+ PageDict4SSReader _ssr;
+ const char *_spData;
+ const char *_pData;
+ size_t _pageSize;
+
+ PageDict4MemRandReader(uint32_t chunkSize, uint64_t numWordIds,
+ ThreeLevelCountWriteBuffers &wb);
+ ~PageDict4MemRandReader();
+ bool lookup(const std::string &key, uint64_t &wordNum,
+ PostingListCounts &counts, StartOffset &offsets);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.cpp b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.cpp
new file mode 100644
index 00000000000..e33a0a1af0e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.cpp
@@ -0,0 +1,31 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "pagedict4_mem_seq_reader.h"
+
+namespace search::diskindex::test {
+
+PageDict4MemSeqReader::PageDict4MemSeqReader(uint32_t chunkSize, uint64_t numWordIds,
+ ThreeLevelCountWriteBuffers &wb)
+ : _decoders(chunkSize, numWordIds),
+ _buffers(_decoders.ssd, _decoders.spd, _decoders.pd, wb),
+ _ssr(_buffers._rcssd,
+ wb._ssHeaderLen, wb._ssFileBitSize,
+ wb._spHeaderLen, wb._spFileBitSize,
+ wb._pHeaderLen, wb._pFileBitSize),
+ _pr(_ssr, _decoders.spd, _decoders.pd)
+{
+ _ssr.setup(_decoders.ssd);
+ _pr.setup();
+}
+
+PageDict4MemSeqReader::~PageDict4MemSeqReader() = default;
+
+void
+PageDict4MemSeqReader::readCounts(vespalib::string &word,
+ uint64_t &wordNum,
+ PostingListCounts &counts)
+{
+ _pr.readCounts(word, wordNum, counts);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h
new file mode 100644
index 00000000000..c9709f63796
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_seq_reader.h
@@ -0,0 +1,34 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "pagedict4_decoders.h"
+#include "threelevelcountbuffers.h"
+#include <vespa/searchlib/bitcompression/pagedict4.h>
+
+namespace search::diskindex::test {
+
+/*
+ * Class for performing sequential reads in memory based pagedict4 structure
+ */
+class PageDict4MemSeqReader
+{
+public:
+ using PageDict4SSReader = search::bitcompression::PageDict4SSReader;
+ using PageDict4Reader = search::bitcompression::PageDict4Reader;
+ using PostingListCounts = search::index::PostingListCounts;
+
+ PageDict4Decoders _decoders;
+ ThreeLevelCountReadBuffers _buffers;
+ PageDict4SSReader _ssr;
+ PageDict4Reader _pr;
+
+ PageDict4MemSeqReader(uint32_t chunkSize, uint64_t numWordIds,
+ ThreeLevelCountWriteBuffers &wb);
+ ~PageDict4MemSeqReader();
+ void readCounts(vespalib::string &word,
+ uint64_t &wordNum,
+ PostingListCounts &counts);
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.cpp b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.cpp
new file mode 100644
index 00000000000..d82f2967a0b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.cpp
@@ -0,0 +1,50 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "pagedict4_mem_writer.h"
+#include <vespa/searchlib/bitcompression/pagedict4.h>
+
+namespace search::diskindex::test {
+
+PageDict4MemWriter::PageDict4MemWriter(uint32_t chunkSize, uint64_t numWordIds, uint32_t ssPad, uint32_t spPad, uint32_t pPad)
+ : _encoders(chunkSize, numWordIds),
+ _buffers(_encoders.sse, _encoders.spe, _encoders.pe),
+ _ssw(NULL),
+ _spw(NULL),
+ _pw(NULL)
+{
+ _buffers.startPad(ssPad, spPad, pPad);
+ allocWriters();
+}
+
+PageDict4MemWriter::~PageDict4MemWriter()
+{
+ delete _ssw;
+ delete _spw;
+ delete _pw;
+}
+
+void
+PageDict4MemWriter::allocWriters()
+{
+ _ssw = new PageDict4SSWriter(_buffers._sse);
+ _spw = new PageDict4SPWriter(*_ssw, _buffers._spe);
+ _pw = new PageDict4PWriter(*_spw, _buffers._pe);
+ _spw->setup();
+ _pw->setup();
+}
+
+void
+PageDict4MemWriter::flush()
+{
+ _pw->flush();
+ _buffers.flush();
+}
+
+void
+PageDict4MemWriter::addCounts(const std::string &word,
+ const PostingListCounts &counts)
+{
+ _pw->addCounts(word, counts);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.h b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.h
new file mode 100644
index 00000000000..ae36883f844
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/diskindex/pagedict4_mem_writer.h
@@ -0,0 +1,52 @@
+// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "pagedict4_encoders.h"
+#include "threelevelcountbuffers.h"
+
+namespace search::bitcompression {
+
+class PageDict4SSWriter;
+class PageDict4SPWriter;
+class PageDict4PWriter;
+
+}
+
+namespace search::index { class PostingListCounts; }
+
+namespace search::diskindex::test {
+
+/*
+ * Class for writing to memory based pagedict4 structure
+ */
+class PageDict4MemWriter
+{
+public:
+ using PageDict4SSWriter = search::bitcompression::PageDict4SSWriter;
+ using PageDict4SPWriter = search::bitcompression::PageDict4SPWriter;
+ using PageDict4PWriter = search::bitcompression::PageDict4PWriter;
+ using PostingListCounts = search::index::PostingListCounts;
+
+private:
+ PageDict4Encoders _encoders;
+public:
+ ThreeLevelCountWriteBuffers _buffers;
+private:
+ PageDict4SSWriter *_ssw;
+ PageDict4SPWriter *_spw;
+ PageDict4PWriter *_pw;
+
+ void allocWriters();
+public:
+ PageDict4MemWriter(uint32_t chunkSize, uint64_t numWordIds, uint32_t ssPad, uint32_t spPad, uint32_t pPad);
+ ~PageDict4MemWriter();
+ void flush();
+ void addCounts(const std::string &word, const PostingListCounts &counts);
+ void startPad(uint32_t ssHeaderLen, uint32_t spHeaderLen, uint32_t pHeaderLen)
+ {
+ _buffers.startPad(ssHeaderLen, spHeaderLen, pHeaderLen);
+ }
+};
+
+}