aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/attribute/postingstore.h
blob: 033deb4e2805fb1a6b6df1a8e846f0f9234fb5fa (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "enum_store_dictionary.h"
#include "postinglisttraits.h"
#include "posting_store_compaction_spec.h"
#include <set>

namespace search {
    class BitVector;
    class GrowableBitVector;
}

namespace search::fef       { class TermFieldMatchData; }
namespace search::queryeval { class SearchIterator; }

namespace search::attribute {

class Status;
class Config;

class BitVectorEntry
{
public:
    vespalib::datastore::EntryRef _tree; // Daisy chained reference to tree based posting list
    std::shared_ptr<GrowableBitVector> _bv; // bitvector

public:
    BitVectorEntry() noexcept
        : _tree(),
          _bv()
    { }
};


class PostingStoreBase2
{
protected:
    static constexpr uint32_t BUFFERTYPE_BITVECTOR = 9u;
    uint32_t _bvSize;
    uint32_t _bvCapacity;
    uint32_t _minBvDocFreq; // Less than this ==> destroy bv
    uint32_t _maxBvDocFreq; // Greater than or equal to this ==> create bv
    std::set<uint32_t>         _bvs; // Current bitvectors
    IEnumStoreDictionary&      _dictionary;
    Status                    &_status;
    uint64_t                   _bvExtraBytes;
    PostingStoreCompactionSpec _compaction_spec;
private:
    bool                       _isFilter;

public:
    bool isFilter() const noexcept { return _isFilter; }
    PostingStoreBase2(IEnumStoreDictionary& dictionary, Status &status, const Config &config);
    virtual ~PostingStoreBase2();
    bool resizeBitVectors(uint32_t newSize, uint32_t newCapacity);
    virtual bool removeSparseBitVectors() = 0;
};

template <typename DataT>
class PostingStore : public PostingListTraits<DataT>::PostingStoreBase,
    public PostingStoreBase2
{
    vespalib::datastore::BufferType<BitVectorEntry> _bvType;
public:
    using DataType = DataT;
    using Parent = typename PostingListTraits<DataT>::PostingStoreBase;
    using AddIter = typename Parent::AddIter;
    using RemoveIter = typename Parent::RemoveIter;
    using RefType = typename Parent::RefType;
    using BTreeType = typename Parent::BTreeType;
    using Iterator = typename Parent::Iterator;
    using ConstIterator = typename Parent::ConstIterator;
    using KeyDataType = typename Parent::KeyDataType;
    using AggregatedType = typename Parent::AggregatedType;
    using BTreeTypeRefPair = typename Parent::BTreeTypeRefPair;
    using Builder = typename Parent::Builder;
    using CompactionSpec = vespalib::datastore::CompactionSpec;
    using CompactionStrategy = vespalib::datastore::CompactionStrategy;
    using EntryRef = vespalib::datastore::EntryRef;
    using CompareT = std::less<uint32_t>;
    using Parent::applyNewArray;
    using Parent::applyNewTree;
    using Parent::applyCluster;
    using Parent::applyTree;
    using Parent::normalizeTree;
    using Parent::getTypeId;
    using Parent::getClusterSize;
    using Parent::getWTreeEntry;
    using Parent::getTreeEntry;
    using Parent::getKeyDataEntry;
    using Parent::isBTree;
    using Parent::clusterLimit;
    using Parent::allocBTree;
    using Parent::allocBTreeCopy;
    using Parent::allocKeyDataCopy;
    using Parent::_builder;
    using Parent::_store;
    using Parent::_allocator;
    using Parent::_aggrCalc;
    using Parent::BUFFERTYPE_BTREE;
    using BitVectorRefPair = vespalib::datastore::Handle<BitVectorEntry>;


    PostingStore(IEnumStoreDictionary& dictionary, Status &status, const Config &config);
    ~PostingStore();

    bool removeSparseBitVectors() override;
    void consider_remove_sparse_bitvector(std::vector<EntryRef> &refs);
    static bool isBitVector(uint32_t typeId) noexcept { return typeId == BUFFERTYPE_BITVECTOR; }

    void applyNew(EntryRef &ref, AddIter a, AddIter ae);

    BitVectorRefPair allocBitVector() {
        return _store.template freeListAllocator<BitVectorEntry,
            vespalib::datastore::DefaultReclaimer<BitVectorEntry> >(BUFFERTYPE_BITVECTOR).alloc();
    }

    BitVectorRefPair allocBitVectorCopy(const BitVectorEntry& bve) {
        return _store.template freeListAllocator<BitVectorEntry,
            vespalib::datastore::DefaultReclaimer<BitVectorEntry> >(BUFFERTYPE_BITVECTOR).alloc(bve);
    }

    /*
     * Recreate btree from bitvector. Weight information is not recreated.
     */
    void makeDegradedTree(EntryRef &ref, const BitVector &bv);
    void dropBitVector(EntryRef &ref);
    void makeBitVector(EntryRef &ref);

    void applyNewBitVector(EntryRef &ref, AddIter aOrg, AddIter ae);
    void apply(BitVector &bv, AddIter a, AddIter ae, RemoveIter r, RemoveIter re);

    /**
     * Apply multiple changes at once.
     *
     * additions and removals should be sorted on key without duplicates.
     * Overlap between additions and removals indicates updates.
     */
    void apply(EntryRef &ref, AddIter a, AddIter ae, RemoveIter r, RemoveIter re);
    void clear(const EntryRef ref);
    size_t size(const EntryRef ref) const {
        if (!ref.valid())
            return 0;
        RefType iRef(ref);
        uint32_t typeId = getTypeId(iRef);
        uint32_t clusterSize = getClusterSize(typeId);
        if (clusterSize == 0) {
            return internalSize(typeId, iRef);
        }
        return clusterSize;
    }

    size_t frozenSize(const EntryRef ref) const {
        if (!ref.valid())
            return 0;
        RefType iRef(ref);
        uint32_t typeId = getTypeId(iRef);
        uint32_t clusterSize = getClusterSize(typeId);
        if (clusterSize == 0) {
            return internalFrozenSize(typeId, iRef);
        }
        return clusterSize;
    }

    Iterator begin(const EntryRef ref) const;
    ConstIterator beginFrozen(const EntryRef ref) const;
    void beginFrozen(const EntryRef ref, std::vector<ConstIterator> &where) const;

    template <typename FunctionType>
    VESPA_DLL_LOCAL void foreach_frozen_key(EntryRef ref, FunctionType func) const;

    template <typename FunctionType>
    VESPA_DLL_LOCAL void foreach_frozen(EntryRef ref, FunctionType func) const;

    AggregatedType getAggregated(const EntryRef ref) const;

    const BitVectorEntry *getBitVectorEntry(RefType ref) const {
        return _store.template getEntry<BitVectorEntry>(ref);
    }

    BitVectorEntry *getWBitVectorEntry(RefType ref) {
        return _store.template getEntry<BitVectorEntry>(ref);
    }
    bool has_btree(const EntryRef ref) const noexcept {
        return !ref.valid() || !isBitVector(getTypeId(RefType(ref))) || !isFilter();
    }

    std::unique_ptr<queryeval::SearchIterator> make_bitvector_iterator(RefType ref, uint32_t doc_id_limit, fef::TermFieldMatchData &match_data, bool strict) const;

    static inline DataT bitVectorWeight();
    vespalib::MemoryUsage getMemoryUsage() const;
    vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy);

    void move_btree_nodes(const std::vector<EntryRef> &refs);
    void move(std::vector<EntryRef>& refs);

    void compact_worst_btree_nodes(const CompactionStrategy& compaction_strategy);
    void compact_worst_buffers(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy);
    bool consider_compact_worst_btree_nodes(const CompactionStrategy& compaction_strategy);
    bool consider_compact_worst_buffers(const CompactionStrategy& compaction_strategy);
private:
    size_t internalSize(uint32_t typeId, const RefType & iRef) const;
    size_t internalFrozenSize(uint32_t typeId, const RefType & iRef) const;
};

template <>
inline vespalib::btree::BTreeNoLeafData
PostingStore<vespalib::btree::BTreeNoLeafData>::bitVectorWeight()
{
    return vespalib::btree::BTreeNoLeafData();
}

template <>
inline int32_t
PostingStore<int32_t>::bitVectorWeight()
{
    return 1;
}

}