aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/attribute/enumstore.h
blob: 3489afdd3f8fc797be303fa411ceb8b099b93290 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "enum_store_compaction_spec.h"
#include "enum_store_dictionary.h"
#include "enumcomparator.h"
#include "i_enum_store.h"
#include "loadedenumvalue.h"
#include <vespa/searchcommon/common/dictionary_config.h>
#include <vespa/vespalib/btree/btreenode.h>
#include <vespa/vespalib/btree/btreenodeallocator.h>
#include <vespa/vespalib/btree/btree.h>
#include <vespa/vespalib/btree/btreebuilder.h>
#include <vespa/vespalib/datastore/entryref.h>
#include <vespa/vespalib/datastore/unique_store.h>
#include <vespa/vespalib/datastore/unique_store_string_allocator.h>
#include <vespa/vespalib/util/buffer.h>
#include <vespa/vespalib/stllike/allocator.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <cmath>

namespace search {

/**
 * Class storing and providing access to all unique values stored in an enumerated attribute vector.
 *
 * It uses an instance of vespalib::datastore::UniqueStore to store the actual values.
 * It also exposes the dictionary used for fast lookups into the set of unique values.
 *
 * The default value is always present except for a short time window
 * during attribute vector load.
 *
 * @tparam EntryType The type of the entries/values stored.
 *                   It has special handling of type 'const char *' for strings.
 */
template <class EntryT>
class EnumStoreT final : public IEnumStore {
public:
    using EntryType = EntryT;
    static constexpr bool has_string_type = std::is_same_v<EntryType, const char *>;
    using ComparatorType = std::conditional_t<has_string_type,
                                              EnumStoreStringComparator,
                                              EnumStoreComparator<EntryT>>;
    using AllocatorType = std::conditional_t<has_string_type,
                                             vespalib::datastore::UniqueStoreStringAllocator<InternalIndex>,
                                             vespalib::datastore::UniqueStoreAllocator<EntryT, InternalIndex>>;
    using UniqueStoreType = vespalib::datastore::UniqueStore<EntryT, InternalIndex, ComparatorType, AllocatorType>;

    using EnumStoreType = EnumStoreT<EntryT>;
    using EntryRef = vespalib::datastore::EntryRef;
    using EntryComparator = vespalib::datastore::EntryComparator;
    using generation_t = vespalib::GenerationHandler::generation_t;

private:
    UniqueStoreType        _store;
    IEnumStoreDictionary*  _dict;
    bool                   _is_folded;
    ComparatorType         _foldedComparator;
    enumstore::EnumStoreCompactionSpec _compaction_spec;
    EntryType              _default_value;
    AtomicIndex            _default_value_ref;

    void free_value_if_unused(Index idx, IndexList &unused) override;

    const vespalib::datastore::UniqueStoreEntryBase& get_entry_base(Index idx) const {
        return _store.get_allocator().get_wrapped(idx);
    }

    ssize_t load_unique_values_internal(const void* src, size_t available, IndexVector& idx);
    ssize_t load_unique_value(const void* src, size_t available, Index& idx);

    std::unique_ptr<EntryComparator> allocate_optionally_folded_comparator(bool folded) const;
    ComparatorType make_optionally_folded_comparator(bool folded) const;
public:
    EnumStoreT(const EnumStoreT & rhs) = delete;
    EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
    EnumStoreT(bool has_postings, const search::DictionaryConfig& dict_cfg, std::shared_ptr<vespalib::alloc::MemoryAllocator> memory_allocator, EntryType default_value);
    EnumStoreT(bool has_postings, const search::DictionaryConfig & dict_cfg);
    ~EnumStoreT() override;

    uint32_t get_ref_count(Index idx) const { return get_entry_base(idx).get_ref_count(); }
    void inc_ref_count(Index idx) { return get_entry_base(idx).inc_ref_count(); }

    // Only use when reading from enumerated attribute save files
    void set_ref_count(Index idx, uint32_t ref_count) override {
        get_entry_base(idx).set_ref_count(ref_count);
    }

    uint32_t get_num_uniques() const override { return _dict->get_num_uniques(); }
    bool is_folded() const { return _is_folded;}

    vespalib::MemoryUsage get_values_memory_usage() const override {
        return _store.get_allocator().get_data_store().getMemoryUsage();
    }
    vespalib::MemoryUsage get_dynamic_values_memory_usage() const {
        return _store.get_allocator().get_data_store().getDynamicMemoryUsage();
    }
    vespalib::MemoryUsage get_dictionary_memory_usage() const override { return _dict->get_memory_usage(); }

    vespalib::AddressSpace get_values_address_space_usage() const override;

    void assign_generation(generation_t current_gen);
    void reclaim_memory(generation_t first_used);

    ssize_t load_unique_values(const void* src, size_t available, IndexVector& idx) override;

    void freeze_dictionary() { _store.freeze(); }

    IEnumStoreDictionary& get_dictionary() override { return *_dict; }
    const IEnumStoreDictionary& get_dictionary() const override { return *_dict; }

    bool get_value(Index idx, EntryType& value) const;
    EntryType get_value(uint32_t idx) const { return get_value(Index(EntryRef(idx))); }
    EntryType get_value(Index idx) const { return _store.get(idx); }

    /**
     * Helper class used to load an enum store from non-enumerated save files.
     */
    class NonEnumeratedLoader {
    private:
        AllocatorType& _allocator;
        vespalib::datastore::IUniqueStoreDictionary& _dict;
        std::vector<EntryRef, vespalib::allocator_large<EntryRef>> _refs;
        std::vector<EntryRef, vespalib::allocator_large<EntryRef>> _payloads;

    public:
        NonEnumeratedLoader(AllocatorType& allocator, vespalib::datastore::IUniqueStoreDictionary& dict)
            : _allocator(allocator),
              _dict(dict),
              _refs(),
              _payloads()
        {
        }
        ~NonEnumeratedLoader();
        Index insert(const EntryType& value, uint32_t posting_idx) {
            EntryRef new_ref = _allocator.allocate(value);
            _refs.emplace_back(new_ref);
            _payloads.emplace_back(posting_idx);
            return new_ref;
        }
        void set_ref_count_for_last_value(uint32_t ref_count) {
            assert(!_refs.empty());
            _allocator.get_wrapped(_refs.back()).set_ref_count(ref_count);
        }
        void build_dictionary() {
            _dict.build_with_payload(_refs, _payloads);
        }
    };

    NonEnumeratedLoader make_non_enumerated_loader() {
        return NonEnumeratedLoader(_store.get_allocator(), *_dict);
    }

    class BatchUpdater {
    private:
        EnumStoreType& _store;
        IndexList _possibly_unused;

    public:
        explicit BatchUpdater(EnumStoreType& store)
            : _store(store),
              _possibly_unused()
        {}
        Index insert(EntryType value);
        void inc_ref_count(Index idx) {
            _store.get_entry_base(idx).inc_ref_count();
        }
        void dec_ref_count(Index idx) {
            auto& entry = _store.get_entry_base(idx);
            entry.dec_ref_count();
            if (entry.get_ref_count() == 0) {
                _possibly_unused.push_back(idx);
            }
        }
        void commit() {
            _store.free_unused_values(std::move(_possibly_unused));
        }
    };

    BatchUpdater make_batch_updater() {
        return BatchUpdater(*this);
    }

    const EntryComparator & get_comparator() const noexcept {
        return _store.get_comparator();
    }

    ComparatorType make_comparator(const EntryType& lookup_value) const {
        return _store.get_comparator().make_for_lookup(lookup_value);
    }

    const EntryComparator & get_folded_comparator() const {
        return _foldedComparator;
    }

    void write_value(BufferWriter& writer, Index idx) const override;
    bool is_folded_change(Index idx1, Index idx2) const override;
    bool find_enum(EntryType value, IEnumStore::EnumHandle& e) const;
    Index insert(EntryType value);
    bool find_index(EntryType value, Index& idx) const;
    void free_unused_values() override;
    void free_unused_values(IndexList to_remove);
    void clear_default_value_ref() override;
    void setup_default_value_ref() override;
    const AtomicIndex& get_default_value_ref() const noexcept { return _default_value_ref; }
    vespalib::MemoryUsage update_stat(const CompactionStrategy& compaction_strategy) override;
    std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override;
    std::unique_ptr<EnumIndexRemapper> compact_worst_values(CompactionSpec compaction_spec, const CompactionStrategy& compaction_strategy) override;
    bool consider_compact_dictionary(const CompactionStrategy& compaction_strategy) override;
    uint64_t get_compaction_count() const override {
        return _store.get_data_store().get_compaction_count();
    }
    void inc_compaction_count() override {
        _store.get_allocator().get_data_store().inc_compaction_count();
    }
    std::unique_ptr<Enumerator> make_enumerator() override;
    std::unique_ptr<EntryComparator> allocate_comparator() const override;

    // Methods below are only relevant for strings, and are templated to only be instantiated on demand.
    template <typename Type>
    ComparatorType
    make_folded_comparator(const Type& lookup_value) const {
        return _foldedComparator.make_for_lookup(lookup_value);
    }
    template<typename Type>
    ComparatorType
    make_folded_comparator_prefix(const Type& lookup_value) const {
        return _foldedComparator.make_for_prefix_lookup(lookup_value);
    }
    template<typename Type>
    std::vector<IEnumStore::EnumHandle>
    find_folded_enums(Type value) const {
        auto cmp = make_folded_comparator(value);
        return _dict->find_matching_enums(cmp);
    }
    const vespalib::datastore::DataStoreT<IEnumStore::InternalIndex>& get_data_store() const noexcept {
        return _store.get_data_store();
    }
};

template <>
void
EnumStoreT<const char*>::write_value(BufferWriter& writer, Index idx) const;

template <>
ssize_t
EnumStoreT<const char*>::load_unique_value(const void* src, size_t available, Index& idx);

}

namespace vespalib::datastore {

extern template
class DataStoreT<search::IEnumStore::Index>;

}

namespace vespalib::btree {

extern template
class BTreeBuilder<search::IEnumStore::Index, BTreeNoLeafData, NoAggregated,
                   search::EnumTreeTraits::INTERNAL_SLOTS, search::EnumTreeTraits::LEAF_SLOTS>;
extern template
class BTreeBuilder<search::IEnumStore::Index, vespalib::datastore::EntryRef, NoAggregated,
                   search::EnumTreeTraits::INTERNAL_SLOTS, search::EnumTreeTraits::LEAF_SLOTS>;

}

namespace search {

extern template class EnumStoreT<const char*>;
extern template class EnumStoreT<int8_t>;
extern template class EnumStoreT<int16_t>;
extern template class EnumStoreT<int32_t>;
extern template class EnumStoreT<int64_t>;
extern template class EnumStoreT<float>;
extern template class EnumStoreT<double>;

} // namespace search