blob: 45ab32bc91748eb2c9f3483bbb3fa448ea2a103e (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
#include "bitvectorfile.h"
#include <vespa/searchlib/index/dictionaryfile.h>
#include <vespa/searchlib/index/postinglistfile.h>
#include <vespa/searchlib/bitcompression/posocccompression.h>
#include <vespa/searchlib/bitcompression/countcompression.h>
namespace search::index { class Schema; }
namespace search::diskindex {
/**
* FieldWriter is used to write a dictionary and posting list file together.
*
* It is used by the fusion code to write the merged output for a field,
* and by the memory index dump code to write a field to disk.
*/
class FieldWriter {
public:
using DocIdAndFeatures = index::DocIdAndFeatures;
using Schema = index::Schema;
using PostingListParams = index::PostingListParams;
FieldWriter(const FieldWriter &rhs) = delete;
FieldWriter(const FieldWriter &&rhs) = delete;
FieldWriter &operator=(const FieldWriter &rhs) = delete;
FieldWriter &operator=(const FieldWriter &&rhs) = delete;
FieldWriter(uint32_t docIdLimit, uint64_t numWordIds, vespalib::stringref prefix);
~FieldWriter();
void newWord(uint64_t wordNum, vespalib::stringref word);
void newWord(vespalib::stringref word);
void add(const DocIdAndFeatures &features) {
assert(features.doc_id() < _docIdLimit);
assert(features.doc_id() > _prevDocId);
_posoccfile->writeDocIdAndFeatures(features);
_bvc.add(features.doc_id());
_prevDocId = features.doc_id();
}
uint64_t getSparseWordNum() const { return _wordNum; }
bool open(uint32_t minSkipDocs, uint32_t minChunkDocs,
bool dynamicKPosOccFormat,
bool encode_interleaved_features,
const Schema &schema, uint32_t indexId,
const index::FieldLengthInfo &field_length_info,
const TuneFileSeqWrite &tuneFileWrite,
const search::common::FileHeaderContext &fileHeaderContext);
bool close();
void getFeatureParams(PostingListParams ¶ms);
static void remove(const vespalib::string &prefix);
private:
using DictionaryFileSeqWrite = index::DictionaryFileSeqWrite;
using PostingListFileSeqWrite = index::PostingListFileSeqWrite;
using PostingListCounts = index::PostingListCounts;
std::unique_ptr<DictionaryFileSeqWrite> _dictFile;
std::unique_ptr<PostingListFileSeqWrite> _posoccfile;
BitVectorCandidate _bvc;
BitVectorFileWrite _bmapfile;
const vespalib::string _prefix;
vespalib::string _word;
const uint64_t _numWordIds;
uint64_t _compactWordNum;
uint64_t _wordNum;
uint32_t _prevDocId;
const uint32_t _docIdLimit;
void flush();
static uint64_t noWordNum() { return 0u; }
};
}
|