aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/bitcompression/pagedict4.h
blob: ba53b41536827075f97ae377be1e0db58a64d040 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "countcompression.h"
#include <vespa/vespalib/stllike/string.h>
#include <cassert>

namespace search::bitcompression {

class PageDict4StartOffset
{
public:
    uint64_t _fileOffset;
    uint64_t _accNumDocs;

    PageDict4StartOffset()
        : _fileOffset(0u),
          _accNumDocs(0u)
    {
    }

    PageDict4StartOffset(uint64_t fileOffset, uint64_t accNumDocs)
        : _fileOffset(fileOffset),
          _accNumDocs(accNumDocs)
    {
    }

    bool
    operator>=(const PageDict4StartOffset &rhs) const
    {
        if (_fileOffset >= rhs._fileOffset) {
            assert(_accNumDocs >= rhs._accNumDocs);
            return true;
        }
        assert(_accNumDocs < rhs._accNumDocs);
        return false;
    }

    bool
    operator>(const PageDict4StartOffset &rhs) const
    {
        if (_fileOffset > rhs._fileOffset) {
            assert(_accNumDocs > rhs._accNumDocs);
            return true;
        }
        assert(_accNumDocs <= rhs._accNumDocs);
        return false;
    }

    bool
    operator==(const PageDict4StartOffset &rhs) const
    {
        if (_fileOffset == rhs._fileOffset) {
            assert(_accNumDocs == rhs._accNumDocs);
            return true;
        }
        assert(_accNumDocs != rhs._accNumDocs);
        if (_fileOffset < rhs._fileOffset) {
            assert(_accNumDocs < rhs._accNumDocs);
        } else {
            assert(_accNumDocs > rhs._accNumDocs);
        }
        return false;
    }

    void
    adjust(const index::PostingListCounts &counts)
    {
        _fileOffset += counts._bitLength;
        _accNumDocs += counts._numDocs;
    }
};

std::ostream &
operator<<(std::ostream &stream, const index::PostingListCounts &counts);

class PageDict4PageParams
{
public:
    using Counts = index::PostingListCounts;
    using StartOffset = PageDict4StartOffset;

    static uint32_t getPageByteSize()      { return 4096; }
    static uint32_t getPageBitSize()       { return getPageByteSize() * 8; }
    static uint32_t getPageHeaderBitSize() { return 15u + 15u + 15u + 12u; }
    static uint32_t getMaxFileHeaderPad()  { return 999u; }
    static uint32_t getFileHeaderPad(uint32_t offset);
    static uint32_t getL1SkipStride()      { return 16; }
    static uint32_t getL2SkipStride()      { return 8; }
    static uint32_t getL4SkipStride()      { return 16; }
    static uint32_t getL5SkipStride()      { return 8; }
    static uint32_t getL7SkipStride()      { return 8; }
    static uint32_t noL7Ref() { return std::numeric_limits<uint32_t>::max(); }
    static uint32_t getL1Entries(uint32_t countsEntries) { return (countsEntries - 1) / getL1SkipStride(); }
    static uint32_t getL2Entries(uint32_t l1Entries) { return l1Entries / getL2SkipStride(); }
    static uint32_t getL4Entries(uint32_t l3Entries) { return (l3Entries - 1) / getL4SkipStride(); }
    static uint32_t getL5Entries(uint32_t l4Entries) { return l4Entries / getL5SkipStride();
    }
};
/*
 * Sparse sparse layout for random access word counts:
 *
 * selector bit
 * 0 => L6 entry, with word, data file deltas
 * 1 => overflow entry, with word, data file deltas, sparse counts
 *
 * Read from file to memory (compressed mix of L6 entries and overflow entries)
 *
 * Uncompressed L7 array in memory, usable for binary search.
 *
 * File header should contain number of entries
 */

class PageDict4SSWriter : public PageDict4PageParams
{
    using EC = PostingListCountFileEncodeContext;
    using SSEC = EC;

private:
    EC &_eL6;           // L6 stream
    vespalib::string _l6Word;   // L6 word
    StartOffset _l6StartOffset; // file offsets + accnum
    uint64_t _l6PageNum;    // Page number for last L6 entry
    uint32_t _l6SparsePageNum;  // Sparse page number for last L6 entry
    uint64_t _l6WordNum;

public:
    PageDict4SSWriter(SSEC &sse);

    ~PageDict4SSWriter();

    /*
     * Add L6 skip entry.
     *
     * startOffset represents file position / accNumDocs after word.
     */
    void
    addL6Skip(vespalib::stringref word,
              const StartOffset &startOffset,
              uint64_t wordNum,
              uint64_t pageNum,
              uint32_t sparsePageNum);

    /*
     * Add overflow counts entry.
     *
     * startOffset represents file position / accNumDocs at start of entry.
     */
    void
    addOverflowCounts(vespalib::stringref word,
                      const Counts &counts,
                      const StartOffset &startOffset,
                      uint64_t wordNum);

    void flush();
};


/*
 * Sparse page layout for random access word counts:
 *
 * 15 bits L5 size
 * 15 bits L4 size
 * 15 bits number of L3 entries in page
 *     this can be used to derive number of L4 and L5 entries, using
 *     skip stride info.
 * 12 bits word string size
 * L5 data (word ref delta, offset to L4 and L3 data, data file delta)
 * L4 data (word ref delta, offset to L3 data, data file delta)
 * L3 data (word ref delta, offset to full page file is implicit, data file delta)
 * padding
 * word strings (LCP + suffix + NUL)
 *
 * File header should be defined
 */

class PageDict4SPWriter : public PageDict4PageParams
{
    using EC = PostingListCountFileEncodeContext;
    using SSWriter = PageDict4SSWriter;

private:
    EC _eL3;            // L3 stream
    ComprFileWriteContext _wcL3;// L3 buffer
    EC _eL4;            // L4 stream
    ComprFileWriteContext _wcL4;// L4 buffer
    EC _eL5;            // L5 stream
    ComprFileWriteContext _wcL5;// L5 buffer
    vespalib::string _l3Word;   // last L3 word written
    vespalib::string _l4Word;   // last L4 word written
    vespalib::string _l5Word;   // last L5 word written
    vespalib::string _l6Word;   // word before this sparse page
    uint32_t _l3WordOffset; // Offset for next L3 word to write
    uint32_t _l4WordOffset; // Offset for last L4 word written
    uint32_t _l5WordOffset; // Offset for last L5 word written

    // Offsets in data files for last L3 entry
    StartOffset _l3StartOffset;

    // Offsets in data files for last L4 entry
    StartOffset _l4StartOffset;

    // Offsets in data files for last L5 entry
    StartOffset _l5StartOffset;

    // Offsets in data files for last L6 entry
    StartOffset _l6StartOffset;

    uint64_t _l3WordNum;    // word number last L3 entry
    uint64_t _l4WordNum;    // word number last L4 entry
    uint64_t _l5WordNum;    // word number last L5 entry
    uint64_t _l6WordNum;    // word number last L6 entry

    uint32_t _curL3OffsetL4;    // Offset in L3 for last L4 entry
    uint32_t _curL3OffsetL5;    // Offset in L3 for last L5 entry
    uint32_t _curL4OffsetL5;    // Offset in L4 for last L5 entry

    uint32_t _headerSize;   // Size of page header

    uint32_t _l3Entries;    // Number of L3 entries on page
    uint32_t _l4StrideCheck;    // L3 entries since last L4 entry
    uint32_t _l5StrideCheck;    // L4 entries since last L5 entry

    uint32_t _l3Size;       // Size of L3 entries
    uint32_t _l4Size;       // Size of L4 entries
    uint32_t _l5Size;       // Size of L5 entries
    uint32_t _prevL3Size;   // Previous size of L3 entries
    uint32_t _prevL4Size;   // Previous size of L4 entries
    uint32_t _prevL5Size;   // Previous size of L5 entries
    uint32_t _prevWordsSize;    // previous size of words
    uint32_t _sparsePageNum;
    uint32_t _l3PageNum;    // Page number for last L3 entry
    std::vector<char> _words;   // Word buffer

    // Sparse sparse entries and counts that don't fit in a page
    SSWriter &_ssWriter;
    // Encode context where paged sparse counts go
    EC &_spe;

public:
    PageDict4SPWriter(SSWriter &sparseSparsewriter, EC &spe);

    ~PageDict4SPWriter();
    void setup();
    void flushPage();
    void flush();
    void resetPage();
    void addL3Skip(vespalib::stringref word, const StartOffset &startOffset, uint64_t wordNum, uint64_t pageNum);
    void addL4Skip(size_t &lcp);
    void addL5Skip(size_t &lcp);

    bool empty() const { return _l3Entries == 0; }
    uint32_t getSparsePageNum() const { return _sparsePageNum; }

    /*
     * Add overflow counts entry.
     *
     * startOffset represents file position / accNumDocs at start of entry.
     */
    void addOverflowCounts(vespalib::stringref word, const Counts &counts, const StartOffset &startOffset, uint64_t wordNum)
    {
        _ssWriter.addOverflowCounts(word, counts, startOffset, wordNum);
    }
};

/*
 * Page layout for random access word counts:
 *
 * 15 bits L2 size
 * 15 bits L1 size
 * 15 bits number of words in page
 *     this can be used to derive number of L1 and L2 entries, using
 *     skip stride info.
 * 12 bits word string size
 * L2 data (word ref delta, offset to L1 and counts data, data file delta)
 * L1 data (word ref delta, offset to counts, data file delta)
 * counts (sparse count)
 * padding
 * word strings (LCP + suffix + NUL)
 *
 * Alternate layout for overflow page:
 *
 * 15 bits L2 size hardcoded to 0
 * 15 bits L1 size hardcoded to 0
 * 15 bits number of words in page, hardcoded to 0
 * 12 bits word string size, hardcoded to 0
 * More info in sparse sparse file.
 *
 * File header should be defined
 */

class PageDict4PWriter : public PageDict4PageParams
{
public:
    using SPWriter = PageDict4SPWriter;
    using EC = PostingListCountFileEncodeContext;

private:
    EC _eCounts;    // counts stream (sparse counts)
    ComprFileWriteContext _wcCounts;// counts buffer
    EC _eL1;            // L1 stream
    ComprFileWriteContext _wcL1;// L1 buffer
    EC _eL2;            // L2 stream
    ComprFileWriteContext _wcL2;// L2 buffer
    vespalib::string _countsWord;   // last counts on page
    vespalib::string _l1Word;   // Last L1 word written
    vespalib::string _l2Word;   // Last L2 word written
    vespalib::string _l3Word;   // word before this page
    vespalib::string _pendingCountsWord; // pending counts word (counts written)
    uint32_t _countsWordOffset; // Offset for next counts word to write
    uint32_t _l1WordOffset; // Offset of last L1 word written
    uint32_t _l2WordOffset; // Offset of last L2 word written

    // file offsets
    StartOffset _countsStartOffset;

    // Offsets in data files for last L1 entry
    StartOffset _l1StartOffset;

    // Offsets in data files for last L2 entry
    StartOffset _l2StartOffset;

    // Offsets in data files for last L3 entry
    StartOffset _l3StartOffset;

    uint32_t _curCountOffsetL1; // Offset in eCounts for last L1 entry
    uint32_t _curCountOffsetL2; // Offset in eCounts for last L2 entry
    uint32_t _curL1OffsetL2;    // Offset in eL1 for last L2 entry

    uint32_t _headerSize;   // Size of page header

    uint32_t _countsEntries;    // Number of count entries on page
    uint32_t _l1StrideCheck;    // Count entries since last L1 entry
    uint32_t _l2StrideCheck;    // L1 entries since last L2 entry

    uint32_t _countsSize;   // Size of counts
    uint32_t _l1Size;       // Size of L1 entries
    uint32_t _l2Size;       // Size of L2 entries
    uint32_t _prevL1Size;   // Previous size of L1 entries
    uint32_t _prevL2Size;   // Previous size of L2 entries
    uint64_t _pageNum;      // Page number.
    uint64_t _l3WordNum;    // last L3 word num written
    uint64_t _wordNum;      // current word number
    std::vector<char> _words;   // Word buffer
    SPWriter &_spWriter;
    // Encode context where paged counts go
    EC &_pe;

    void
    addOverflowCounts(vespalib::stringref word,
                      const Counts &counts);

public:
    PageDict4PWriter(SPWriter &spWriter, EC &pe);
    ~PageDict4PWriter();

    void setup();
    void flushPage();
    void flush();
    void resetPage();
    void addCounts(vespalib::stringref word, const Counts &counts);
    void addL1Skip(size_t &lcp);
    void addL2Skip(size_t &lcp);
    bool empty() const { return _countsEntries == 0;}
    uint64_t getPageNum() const { return _pageNum; }
    uint64_t getWordNum() const { return _wordNum - 1; }
};


class PageDict4SSLookupRes
{
public:
    using Counts = index::PostingListCounts;
    using StartOffset = PageDict4StartOffset;

    vespalib::string _l6Word;   // last L6 word before key
    vespalib::string _lastWord; // L6 or overflow word >= key
    StartOffset      _l6StartOffset;    // File offsets
    Counts      _counts;    // Counts valid if overflow
    uint64_t _pageNum;
    uint64_t _sparsePageNum;
    uint64_t _l6WordNum;            // wordnum if overflow
    StartOffset      _startOffset;      // valid if overflow
    bool _res;
    bool _overflow;

    PageDict4SSLookupRes();
    ~PageDict4SSLookupRes();
};

/* Reader for sparse sparse file.
 *
 * Read from file to memory (compressed mix of L6 entries and overflow entries)
 *
 * Uncompressed L7 array in memory, usable for binary search.
 */

class PageDict4SSReader : public PageDict4PageParams
{
    using EC = PostingListCountFileEncodeContext;
    using DC = PostingListCountFileDecodeContext;
public:
    class L7Entry
    {
    public:
        vespalib::string _l7Word;
        StartOffset      _l7StartOffset;    // Offsets in data files
        uint64_t _l7WordNum;
        uint64_t _l6Offset; // Offset in L6+overflow stream
        uint32_t _sparsePageNum;// page number for sparse file
        uint64_t _pageNum;  // page number in full file
        uint32_t _l7Ref;    // L7 entry before overflow, or self-ref if L6

        L7Entry()
            : _l7Word(),
              _l7StartOffset(),
              _l7WordNum(0),
              _l6Offset(0),
              _sparsePageNum(0),
              _pageNum(0),
              _l7Ref(0)
        {
        }

        L7Entry(vespalib::stringref l7Word,
                const StartOffset         &l7StartOffset,
                uint64_t l7WordNum,
                uint64_t l6Offset,
                uint32_t sparsePageNum,
                uint64_t pageNum,
                uint32_t l7Ref)
            : _l7Word(l7Word),
              _l7StartOffset(l7StartOffset),
              _l7WordNum(l7WordNum),
              _l6Offset(l6Offset),
              _sparsePageNum(sparsePageNum),
              _pageNum(pageNum),
              _l7Ref(l7Ref)
        {
        }

        bool operator<(vespalib::stringref word) const {
            return _l7Word < word;
        }
    };

    class OverflowRef
    {
    public:
        uint64_t _wordNum;
        uint32_t _l7Ref;    // overflow entry in L7 table

        OverflowRef()
            : _wordNum(0),
              _l7Ref(0)
        {
        }

        OverflowRef(uint64_t wordNum, uint32_t l7Ref)
            : _wordNum(wordNum),
              _l7Ref(l7Ref)
        {
        }

        bool operator<(uint64_t wordNum) const {
            return _wordNum < wordNum;
        }
    };

    ComprBuffer _cb;
    uint64_t _ssFileBitLen; // File size in bits
    uint32_t _ssStartOffset;    // Header size in bits

    using L7Vector = std::vector<L7Entry>;
    L7Vector _l7;// Uncompressed skip list for sparse sparse file

    DC _ssd;    // used to store compression parameters
    uint64_t _spFileBitLen;
    uint64_t _pFileBitLen;
    uint32_t _spStartOffset;
    uint32_t _pStartOffset;
    uint32_t _spFirstPageNum;
    uint32_t _spFirstPageOffset;
    uint32_t _pFirstPageNum;
    uint32_t _pFirstPageOffset;

    using OverflowVector = std::vector<OverflowRef>;
    OverflowVector _overflows;

    PageDict4SSReader(ComprBuffer &cb,
                      uint32_t ssFileHeaderSize,
                      uint64_t ssFileBitLen,
                      uint32_t spFileHeaderSize,
                      uint64_t spFileBitLen,
                      uint32_t pFileHeaderSize,
                      uint64_t pFileBitLen);
    ~PageDict4SSReader();

    void setup(DC &ssd);
    PageDict4SSLookupRes lookup(vespalib::stringref key);
    PageDict4SSLookupRes lookupOverflow(uint64_t wordNum) const;
    const DC & getSSD() const { return _ssd; }
};


class PageDict4SPLookupRes : public PageDict4PageParams
{
    using EC = PostingListCountFileEncodeContext;
    using DC = PostingListCountFileDecodeContext;
    using SSReader = PageDict4SSReader;

public:
    vespalib::string _l3Word;
    vespalib::string _lastWord; // L3 word >= key
    StartOffset      _l3StartOffset;
    uint64_t _pageNum;
    uint64_t _l3WordNum;

public:
    PageDict4SPLookupRes();

    ~PageDict4SPLookupRes();

    void
    lookup(const SSReader &ssReader,
           const void *sparsePage,
           vespalib::stringref key,
           vespalib::stringref l6Word,
           vespalib::stringref lastSPWord,
           const StartOffset         &l6StartOffset,
           uint64_t l6WordNum,
           uint64_t lowestPageNum);
};


class PageDict4PLookupRes : public PageDict4PageParams
{
public:
    using EC = PostingListCountFileEncodeContext;
    using DC = PostingListCountFileDecodeContext;
    using SSReader = PageDict4SSReader;

public:
    Counts _counts;
    StartOffset _startOffset;
    uint64_t _wordNum;
    bool _res;
    vespalib::string *_nextWord;

public:
    PageDict4PLookupRes();
    ~PageDict4PLookupRes();

    bool
    lookup(const SSReader &ssReader,
           const void *page,
           vespalib::stringref key,
           vespalib::stringref l3Word,
           vespalib::stringref lastPWord,
           const StartOffset &l3StartOffset,
           uint64_t l3WordNum);
};


class PageDict4Reader : public PageDict4PageParams
{
public:
    using DC = PostingListCountFileDecodeContext;
    using EC = PostingListCountFileEncodeContext;
    using SSReader = PageDict4SSReader;

    DC &_pd;
    uint32_t _countsResidue;
    const SSReader &_ssReader;
    uint64_t _pFileBitLen;
    StartOffset _startOffset;
    bool _overflowPage;
    using PCV = std::vector<Counts>;
    struct L1SkipCheck
    {
        uint32_t    wordOffset;
        StartOffset startOffset;
        uint32_t    countOffset;
        L1SkipCheck(const StartOffset &startOffset_)
            : wordOffset(0),
              startOffset(startOffset_),
              countOffset(0)
        {
        }
    };
    struct L2SkipCheck : public L1SkipCheck
    {
        uint32_t    l1Offset;
        L2SkipCheck(const StartOffset &startOffset_)
            : L1SkipCheck(startOffset_),
              l1Offset(0)
        {
        }

        bool check(const L1SkipCheck &rhs) const {
            return startOffset == rhs.startOffset &&
                countOffset == rhs.countOffset;
        }
    };
    struct L3SkipCheck
    {
        StartOffset startOffset;
        uint64_t    wordNum;
        L3SkipCheck(const StartOffset &startOffset_, uint64_t wordNum_)
            : startOffset(startOffset_),
              wordNum(wordNum_)
        {
        }
    };
    struct L4SkipCheck : public L3SkipCheck
    {
        uint32_t    wordOffset;
        uint32_t    l3Offset;
        L4SkipCheck(const StartOffset &startOffset_, uint64_t wordNum_)
            : L3SkipCheck(startOffset_, wordNum_),
              wordOffset(0),
              l3Offset(0)
        {
        }
        bool check(const L3SkipCheck &rhs) const {
            return startOffset == rhs.startOffset &&
                wordNum == rhs.wordNum;
        }
    };
    struct L5SkipCheck : public L4SkipCheck
    {
        uint32_t    l4Offset;
        L5SkipCheck(const StartOffset &startOffset_, uint64_t wordNum_)
            : L4SkipCheck(startOffset_, wordNum_),
              l4Offset(0)
        {
        }
        bool check(const L4SkipCheck &rhs) const {
            return startOffset == rhs.startOffset &&
                wordNum == rhs.wordNum &&
                l3Offset == rhs.l3Offset;
        }
    };
    template <typename Element>
    class CheckVector
    {
        using Vector = std::vector<Element>;
        Vector _vector;
        typename Vector::const_iterator _cur;
        typename Vector::const_iterator _end;
    public:
        CheckVector()
            : _vector(),
              _cur(),
              _end()
        {}
        void clear() { _vector.clear(); }
        void setup() {
            _cur = _vector.cbegin();
            _end = _vector.cend();
        }
        bool valid() const { return _cur != _end; }
        const Element *operator->() const { return _cur.operator->(); }
        void step() { ++_cur; }
        void push_back(const Element &element) {
            _vector.push_back(element);
        }
    };
    template <typename Entry1, typename Entry2>
    void
    checkWordOffsets(const std::vector<char> &words,
                     CheckVector<Entry1> &skip1,
                     CheckVector<Entry2> &skip2);
    PCV _counts;
    PCV::const_iterator _cc;
    PCV::const_iterator _ce;
    using WV = std::vector<char>;
    WV _words;
    WV::const_iterator _wc;
    WV::const_iterator _we;
    vespalib::string _lastWord;
    vespalib::string _lastSSWord;

    DC &_spd;
    uint32_t _l3Residue;
    WV _spwords;
    WV::const_iterator _spwc;
    WV::const_iterator _spwe;

    DC _ssd;
    uint64_t _wordNum;
    CheckVector<L1SkipCheck> _l1SkipChecks;
    CheckVector<L2SkipCheck> _l2SkipChecks;
    CheckVector<L3SkipCheck> _l3SkipChecks;
    CheckVector<L4SkipCheck> _l4SkipChecks;
    CheckVector<L5SkipCheck> _l5SkipChecks;

    PageDict4Reader(const SSReader &ssReader, DC &spd,DC &pd);
    ~PageDict4Reader();

    void setup();
    void setupPage();
    void setupSPage();
    void decodePWord(vespalib::string &word);
    void decodeSPWord(vespalib::string &word);
    void decodeSSWord(vespalib::string &word);
    void readCounts(vespalib::string &word, uint64_t &wordNum, Counts &counts);
    void readOverflowCounts(vespalib::string &word, Counts &counts);
};

}