diff options
author | Tor Egge <Tor.Egge@online.no> | 2021-12-06 15:44:26 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2021-12-06 15:44:26 +0100 |
commit | 1330d2c3d3b8647b6053ac37e95503cd0278e2e3 (patch) | |
tree | 1434a85c72ac28563db3802f69fe99f50f86412e /searchlib | |
parent | 2ad949884ee12126f00b18d6e8890af8cbc61391 (diff) |
Add EntryRefFilter class.
Diffstat (limited to 'searchlib')
5 files changed, 64 insertions, 51 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index c2a3139d83a..40ff25ff976 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -8,6 +8,7 @@ LOG_SETUP("enumstore_test"); using Type = search::DictionaryConfig::Type; using vespalib::datastore::EntryRef; +using vespalib::datastore::EntryRefFilter; using RefT = vespalib::datastore::EntryRefT<22>; namespace vespalib::datastore { @@ -714,6 +715,20 @@ EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::clear_sample_values(uin } } +namespace { + +EntryRefFilter make_entry_ref_filter(bool one_filter) +{ + if (one_filter) { + EntryRefFilter filter(RefT::numBuffers(), RefT::offset_bits); + filter.add_buffer(3); + return filter; + } + return EntryRefFilter::create_all_filter(RefT::numBuffers(), RefT::offset_bits); +} + +} + template <typename EnumStoreTypeAndDictionaryType> void EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_normalize_posting_lists(bool use_filter, bool one_filter) @@ -734,18 +749,12 @@ EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_normalize_posting_ } EXPECT_EQ(exp_refs, get_sample_values(large_population)); if (use_filter) { - std::vector<bool> filter; - if (one_filter) { - filter = std::vector<bool>(RefT::numBuffers()); - filter[3] = true; - } else { - filter = std::vector<bool>(RefT::numBuffers(), true); - } + auto filter = make_entry_ref_filter(one_filter); auto dummy = [](std::vector<EntryRef>&) noexcept { }; auto adjust_refs = [](std::vector<EntryRef> &refs) noexcept { for (auto &ref : refs) { ref = adjust_fake_pidx(ref); } }; - EXPECT_FALSE(dict.normalize_posting_lists(dummy, filter, RefT::offset_bits)); + EXPECT_FALSE(dict.normalize_posting_lists(dummy, filter)); EXPECT_EQ(exp_refs, get_sample_values(large_population)); - EXPECT_TRUE(dict.normalize_posting_lists(adjust_refs, filter, RefT::offset_bits)); + EXPECT_TRUE(dict.normalize_posting_lists(adjust_refs, filter)); } else { auto dummy = [](EntryRef posting_idx) noexcept { return posting_idx; }; auto adjust_refs = [](EntryRef ref) noexcept { return adjust_fake_pidx(ref); }; @@ -761,21 +770,15 @@ template <typename EnumStoreTypeAndDictionaryType> void EnumStoreDictionaryTest<EnumStoreTypeAndDictionaryType>::test_foreach_posting_list(bool one_filter) { - std::vector<bool> filter; - if (one_filter) { - filter = std::vector<bool>(RefT::numBuffers()); - filter[3] = true; - } else { - filter = std::vector<bool>(RefT::numBuffers(), true); - } + auto filter = make_entry_ref_filter(one_filter); populate_sample_data(large_population); auto& dict = store.get_dictionary(); std::vector<EntryRef> exp_refs; auto save_exp_refs = [&exp_refs](std::vector<EntryRef>& refs) { exp_refs.insert(exp_refs.end(), refs.begin(), refs.end()); }; - EXPECT_FALSE(dict.normalize_posting_lists(save_exp_refs, filter, RefT::offset_bits)); + EXPECT_FALSE(dict.normalize_posting_lists(save_exp_refs, filter)); std::vector<EntryRef> act_refs; auto save_act_refs = [&act_refs](const std::vector<EntryRef>& refs) { act_refs.insert(act_refs.end(), refs.begin(), refs.end()); }; - dict.foreach_posting_list(save_act_refs, filter, RefT::offset_bits); + dict.foreach_posting_list(save_act_refs, filter); EXPECT_EQ(exp_refs, act_refs); clear_sample_values(large_population); } diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp index f77b9426b32..8bc28abc238 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp @@ -312,7 +312,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists( template <> bool -EnumStoreDictionary<EnumTree>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)>, const std::vector<bool> &, uint32_t) +EnumStoreDictionary<EnumTree>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)>, const EntryRefFilter&) { LOG_ABORT("should not be reached"); } @@ -399,7 +399,7 @@ ChangeWriter<HashDictionaryT>::write(const std::vector<EntryRef> &refs) template <typename BTreeDictionaryT, typename HashDictionaryT> bool -EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) +EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) { if constexpr (has_btree_dictionary) { std::vector<EntryRef> refs; @@ -413,8 +413,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists( for (auto itr = dict.begin(); itr.valid(); ++itr) { EntryRef ref(itr.getData()); if (ref.valid()) { - uint32_t buffer_id = ref.buffer_id(entry_ref_offset_bits); - if (filter[buffer_id]) { + if (filter.has(ref)) { refs.emplace_back(ref); change_writer.emplace_back(itr.getKey(), itr.getWData()); if (refs.size() >= refs.capacity()) { @@ -431,20 +430,20 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists( } return changed; } else { - return this->_hash_dict.normalize_values(normalize, filter, entry_ref_offset_bits); + return this->_hash_dict.normalize_values(normalize, filter); } } template <> void -EnumStoreDictionary<EnumTree>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)>, const std::vector<bool>&, uint32_t) +EnumStoreDictionary<EnumTree>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)>, const EntryRefFilter&) { LOG_ABORT("should not be reached"); } template <typename BTreeDictionaryT, typename HashDictionaryT> void -EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) +EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) { if constexpr (has_btree_dictionary) { std::vector<EntryRef> refs; @@ -453,8 +452,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std for (auto itr = dict.begin(); itr.valid(); ++itr) { EntryRef ref(itr.getData()); if (ref.valid()) { - uint32_t buffer_id = ref.buffer_id(entry_ref_offset_bits); - if (filter[buffer_id]) { + if (filter.has(ref)) { refs.emplace_back(ref); if (refs.size() >= refs.capacity()) { callback(refs); @@ -467,7 +465,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::foreach_posting_list(std callback(refs); } } else { - this->_hash_dict.foreach_value(callback, filter, entry_ref_offset_bits); + this->_hash_dict.foreach_value(callback, filter); } } diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h index a17ffd14864..db1176c5484 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h @@ -16,6 +16,7 @@ template <typename BTreeDictionaryT, typename HashDictionaryT = vespalib::datast class EnumStoreDictionary : public vespalib::datastore::UniqueStoreDictionary<BTreeDictionaryT, IEnumStoreDictionary, HashDictionaryT> { protected: using EntryRef = IEnumStoreDictionary::EntryRef; + using EntryRefFilter = IEnumStoreDictionary::EntryRefFilter; using Index = IEnumStoreDictionary::Index; using BTreeDictionaryType = BTreeDictionaryT; using EntryComparator = IEnumStoreDictionary::EntryComparator; @@ -54,8 +55,8 @@ public: void clear_all_posting_lists(std::function<void(EntryRef)> clearer) override; void update_posting_list(Index idx, const EntryComparator& cmp, std::function<EntryRef(EntryRef)> updater) override; bool normalize_posting_lists(std::function<EntryRef(EntryRef)> normalize) override; - bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) override; - void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) override; + bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) override; + void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) override; const EnumPostingTree& get_posting_dictionary() const override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h index 4a00c72d6ba..a9716ec5d05 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h @@ -30,6 +30,7 @@ class IEnumStoreDictionary : public vespalib::datastore::IUniqueStoreDictionary public: using EntryRef = vespalib::datastore::EntryRef; using EntryComparator = vespalib::datastore::EntryComparator; + using EntryRefFilter = vespalib::datastore::EntryRefFilter; using EnumVector = IEnumStore::EnumVector; using Index = IEnumStore::Index; using IndexList = IEnumStore::IndexList; @@ -52,9 +53,25 @@ public: virtual Index remap_index(Index idx) = 0; virtual void clear_all_posting_lists(std::function<void(EntryRef)> clearer) = 0; virtual void update_posting_list(Index idx, const EntryComparator& cmp, std::function<EntryRef(EntryRef)> updater) = 0; + /* + * Scan dictionary and call normalize function for each value. If + * returned value is different then write back the modified value to + * the dictionary. Only used by unit tests. + */ virtual bool normalize_posting_lists(std::function<EntryRef(EntryRef)> normalize) = 0; - virtual bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) = 0; - virtual void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const std::vector<bool>& filter, uint32_t entry_ref_offset_bits) = 0; + /* + * Scan dictionary and call normalize function for batches of values + * that pass the filter. Write back modified values to the dictionary. + * Used by compaction of posting lists when moving short arrays, + * bitvectors or btree roots. + */ + virtual bool normalize_posting_lists(std::function<void(std::vector<EntryRef>&)> normalize, const EntryRefFilter& filter) = 0; + /* + * Scan dictionary and call callback function for batches of values + * that pass the filter. Used by compaction of posting lists when + * moving btree nodes. + */ + virtual void foreach_posting_list(std::function<void(const std::vector<EntryRef>&)> callback, const EntryRefFilter& filter) = 0; virtual const EnumPostingTree& get_posting_dictionary() const = 0; }; diff --git a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp index 2592a2889e4..8ed8a0cfbee 100644 --- a/searchlib/src/vespa/searchlib/attribute/postingstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postingstore.cpp @@ -7,11 +7,13 @@ #include <vespa/vespalib/btree/btreeiterator.hpp> #include <vespa/vespalib/btree/btreerootbase.cpp> #include <vespa/vespalib/datastore/datastore.hpp> +#include <vespa/vespalib/datastore/entry_ref_filter.h> #include <vespa/vespalib/datastore/buffer_type.hpp> namespace search::attribute { using vespalib::btree::BTreeNoLeafData; +using vespalib::datastore::EntryRefFilter; // #define FORCE_BITVECTORS @@ -127,13 +129,11 @@ PostingStore<DataT>::removeSparseBitVectors() } } if (needscan) { - std::vector<bool> filter(RefType::numBuffers()); - for (uint32_t buffer_id : _bvType.get_active_buffers()) { - filter[buffer_id] = true; - } + EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits); + filter.add_buffers(_bvType.get_active_buffers()); res = _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs) { consider_remove_sparse_bitvector(refs); }, - filter, RefType::offset_bits); + filter); } return res; } @@ -727,16 +727,12 @@ void PostingStore<DataT>::compact_worst_btree_nodes() { auto to_hold = this->start_compact_worst_btree_nodes(); - std::vector<bool> filter(RefType::numBuffers()); + EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits); // Only look at buffers containing bitvectors and btree roots - for (uint32_t buffer_id : this->_treeType.get_active_buffers()) { - filter[buffer_id] = true; - } - for (uint32_t buffer_id : _bvType.get_active_buffers()) { - filter[buffer_id] = true; - } + filter.add_buffers(this->_treeType.get_active_buffers()); + filter.add_buffers(_bvType.get_active_buffers()); _dictionary.foreach_posting_list([this](const std::vector<EntryRef>& refs) - { move_btree_nodes(refs); }, filter, RefType::offset_bits); + { move_btree_nodes(refs); }, filter); this->finish_compact_worst_btree_nodes(to_hold); } @@ -746,24 +742,22 @@ PostingStore<DataT>::compact_worst_buffers() { auto to_hold = this->start_compact_worst_buffers(); bool compact_btree_roots = false; - std::vector<bool> filter(RefType::numBuffers()); + EntryRefFilter filter(RefType::numBuffers(), RefType::offset_bits); + filter.add_buffers(to_hold); // Start with looking at buffers being compacted for (uint32_t buffer_id : to_hold) { if (isBTree(_store.getBufferState(buffer_id).getTypeId())) { compact_btree_roots = true; } - filter[buffer_id] = true; } if (compact_btree_roots) { // If we are compacting btree roots then we also have to look at bitvector // buffers - for (uint32_t buffer_id : _bvType.get_active_buffers()) { - filter[buffer_id] = true; - } + filter.add_buffers(_bvType.get_active_buffers()); } _dictionary.normalize_posting_lists([this](std::vector<EntryRef>& refs) { return move(refs); }, - filter, RefType::offset_bits); + filter); this->finishCompact(to_hold); } |