1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
#include "bitvectoridxfile.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/tunefileinfo.h>
#include <vespa/vespalib/stllike/string.h>
#include <vespa/vespalib/stllike/allocator.h>
#include <vector>
class Fast_BufferedFile;
namespace search::diskindex {
class BitVectorFileWrite : public BitVectorIdxFileWrite
{
private:
using Parent = BitVectorIdxFileWrite;
std::unique_ptr<Fast_BufferedFile> _datFile;
uint32_t _datHeaderLen;
public:
BitVectorFileWrite(const BitVectorFileWrite &) = delete;
BitVectorFileWrite(const BitVectorFileWrite &&) = delete;
BitVectorFileWrite& operator=(const BitVectorFileWrite &) = delete;
BitVectorFileWrite& operator=(const BitVectorFileWrite &&) = delete;
BitVectorFileWrite(BitVectorKeyScope scope);
~BitVectorFileWrite() override;
void open(const vespalib::string &name, uint32_t docIdLimit,
const TuneFileSeqWrite &tuneFileWrite,
const common::FileHeaderContext &fileHeaderContext) override;
void addWordSingle(uint64_t wordNum, const BitVector &bitVector);
void flush() override;
void sync() override;
void close() override;
void makeDatHeader(const common::FileHeaderContext &fileHeaderContext);
void updateDatHeader(uint64_t fileBitSize);
};
/*
* Buffer document ids for a candidate bitvector.
*/
class BitVectorCandidate
{
private:
std::vector<uint32_t, vespalib::allocator_large<uint32_t>> _array;
BitVector::UP _bv;
uint64_t _numDocs;
const uint32_t _bitVectorLimit;
public:
BitVectorCandidate(uint32_t docIdLimit, uint32_t bitVectorLimit)
: _array(),
_bv(BitVector::create(docIdLimit)),
_numDocs(0u),
_bitVectorLimit(bitVectorLimit)
{
_array.reserve(_bitVectorLimit);
}
BitVectorCandidate(uint32_t docIdLimit)
: BitVectorCandidate(docIdLimit, BitVectorFileWrite::getBitVectorLimit(docIdLimit))
{ }
~BitVectorCandidate();
void clear() {
if (__builtin_expect(_numDocs > _bitVectorLimit, false)) {
_bv->clear();
}
_numDocs = 0;
_array.clear();
}
void flush(BitVector &obv) {
if (__builtin_expect(_numDocs > _bitVectorLimit, false)) {
obv.orWith(*_bv);
} else {
for (uint32_t i : _array) {
obv.setBit(i);
}
}
clear();
}
void add(uint32_t docId) {
if (_numDocs < _bitVectorLimit) {
_array.push_back(docId);
} else {
if (__builtin_expect(_numDocs == _bitVectorLimit, false)) {
for (uint32_t i : _array) {
_bv->setBit(i);
}
_array.clear();
}
_bv->setBit(docId);
}
++_numDocs;
}
/*
* Get number of documents buffered. This might include duplicates.
*/
uint64_t getNumDocs() const { return _numDocs; }
bool empty() const { return _numDocs == 0; }
/*
* Return true if array limit has been exceeded and bitvector has been
* populated.
*/
bool getCrossedBitVectorLimit() const {
return _numDocs > _bitVectorLimit;
}
BitVector &getBitVector() { return *_bv; }
};
}
|