aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/diskindex/fieldwriter.h
blob: 45ab32bc91748eb2c9f3483bbb3fa448ea2a103e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include "bitvectorfile.h"
#include <vespa/searchlib/index/dictionaryfile.h>
#include <vespa/searchlib/index/postinglistfile.h>
#include <vespa/searchlib/bitcompression/posocccompression.h>
#include <vespa/searchlib/bitcompression/countcompression.h>

namespace search::index { class Schema; }

namespace search::diskindex {

/**
 * FieldWriter is used to write a dictionary and posting list file together.
 *
 * It is used by the fusion code to write the merged output for a field,
 * and by the memory index dump code to write a field to disk.
 */
class FieldWriter {
public:
    using DocIdAndFeatures = index::DocIdAndFeatures;
    using Schema = index::Schema;
    using PostingListParams = index::PostingListParams;

    FieldWriter(const FieldWriter &rhs) = delete;
    FieldWriter(const FieldWriter &&rhs) = delete;
    FieldWriter &operator=(const FieldWriter &rhs) = delete;
    FieldWriter &operator=(const FieldWriter &&rhs) = delete;
    FieldWriter(uint32_t docIdLimit, uint64_t numWordIds, vespalib::stringref prefix);
    ~FieldWriter();

    void newWord(uint64_t wordNum, vespalib::stringref word);
    void newWord(vespalib::stringref word);

    void add(const DocIdAndFeatures &features) {
        assert(features.doc_id() < _docIdLimit);
        assert(features.doc_id() > _prevDocId);
        _posoccfile->writeDocIdAndFeatures(features);
        _bvc.add(features.doc_id());
        _prevDocId = features.doc_id();
    }

    uint64_t getSparseWordNum() const { return _wordNum; }

    bool open(uint32_t minSkipDocs, uint32_t minChunkDocs,
              bool dynamicKPosOccFormat,
              bool encode_interleaved_features,
              const Schema &schema, uint32_t indexId,
              const index::FieldLengthInfo &field_length_info,
              const TuneFileSeqWrite &tuneFileWrite,
              const search::common::FileHeaderContext &fileHeaderContext);

    bool close();

    void getFeatureParams(PostingListParams &params);
    static void remove(const vespalib::string &prefix);
private:
    using DictionaryFileSeqWrite = index::DictionaryFileSeqWrite;
    using PostingListFileSeqWrite = index::PostingListFileSeqWrite;
    using PostingListCounts = index::PostingListCounts;
    std::unique_ptr<DictionaryFileSeqWrite>  _dictFile;
    std::unique_ptr<PostingListFileSeqWrite> _posoccfile;
    BitVectorCandidate      _bvc;
    BitVectorFileWrite      _bmapfile;
    const vespalib::string  _prefix;
    vespalib::string        _word;
    const uint64_t          _numWordIds;
    uint64_t                _compactWordNum;
    uint64_t                _wordNum;
    uint32_t                _prevDocId;
    const uint32_t          _docIdLimit;
    void flush();
    static uint64_t noWordNum() { return 0u; }
};

}