diff options
author | Tor Egge <Tor.Egge@broadpark.no> | 2021-03-30 11:13:52 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@broadpark.no> | 2021-03-30 11:13:52 +0200 |
commit | d1024ae63d03f72b5053427753d0c5236ad6293a (patch) | |
tree | e44a0547f892e4544f999b1143057b8bd79d5e83 /vespalib | |
parent | fe03be062c4ce87aab521237f975b375e54b2906 (diff) |
Handle UniqueStoreDictionary without B-tree.
Diffstat (limited to 'vespalib')
15 files changed, 384 insertions, 77 deletions
diff --git a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp index 3e8b144cf32..57fe326342d 100644 --- a/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp +++ b/vespalib/src/tests/datastore/unique_store/unique_store_test.cpp @@ -150,9 +150,19 @@ TestBase<UniqueStoreTypeAndDictionaryType>::TestBase() { switch (UniqueStoreTypeAndDictionaryType::dictionary_type) { case DictionaryType::BTREE: + EXPECT_TRUE(store.get_dictionary().get_has_btree_dictionary()); + EXPECT_FALSE(store.get_dictionary().get_has_hash_dictionary()); break; - default: + case DictionaryType::BTREE_AND_HASH: store.set_dictionary(std::make_unique<UniqueStoreDictionary<uniquestore::DefaultDictionary, IUniqueStoreDictionary, ShardedHashMap>>(std::make_unique<CompareType>(store.get_data_store()))); + EXPECT_TRUE(store.get_dictionary().get_has_btree_dictionary()); + EXPECT_TRUE(store.get_dictionary().get_has_hash_dictionary()); + break; + case DictionaryType::HASH: + default: + store.set_dictionary(std::make_unique<UniqueStoreDictionary<NoBTreeDictionary, IUniqueStoreDictionary, ShardedHashMap>>(std::make_unique<CompareType>(store.get_data_store()))); + EXPECT_FALSE(store.get_dictionary().get_has_btree_dictionary()); + EXPECT_TRUE(store.get_dictionary().get_has_hash_dictionary()); } } @@ -204,37 +214,67 @@ struct OrderedSmallOffsetNumberUniqueStore static constexpr DictionaryType dictionary_type = DictionaryType::BTREE; }; -struct UnorderedNumberUniqueStore +struct HybridNumberUniqueStore { using UniqueStoreType = NumberUniqueStore; static constexpr DictionaryType dictionary_type = DictionaryType::BTREE_AND_HASH; }; -struct UnorderedStringUniqueStore +struct HybridStringUniqueStore { using UniqueStoreType = StringUniqueStore; static constexpr DictionaryType dictionary_type = DictionaryType::BTREE_AND_HASH; }; -struct UnorderedCStringUniqueStore +struct HybridCStringUniqueStore { using UniqueStoreType = CStringUniqueStore; static constexpr DictionaryType dictionary_type = DictionaryType::BTREE_AND_HASH; }; -struct UnorderedDoubleUniqueStore +struct HybridDoubleUniqueStore { using UniqueStoreType = DoubleUniqueStore; static constexpr DictionaryType dictionary_type = DictionaryType::BTREE_AND_HASH; }; -struct UnorderedSmallOffsetNumberUniqueStore +struct HybridSmallOffsetNumberUniqueStore { using UniqueStoreType = SmallOffsetNumberUniqueStore; static constexpr DictionaryType dictionary_type = DictionaryType::BTREE_AND_HASH; }; -using UniqueStoreTestTypes = ::testing::Types<OrderedNumberUniqueStore, OrderedStringUniqueStore, OrderedCStringUniqueStore, OrderedDoubleUniqueStore, UnorderedNumberUniqueStore, UnorderedStringUniqueStore, UnorderedCStringUniqueStore, UnorderedDoubleUniqueStore>; +struct UnorderedNumberUniqueStore +{ + using UniqueStoreType = NumberUniqueStore; + static constexpr DictionaryType dictionary_type = DictionaryType::HASH; +}; + +struct UnorderedStringUniqueStore +{ + using UniqueStoreType = StringUniqueStore; + static constexpr DictionaryType dictionary_type = DictionaryType::HASH; +}; + +struct UnorderedCStringUniqueStore +{ + using UniqueStoreType = CStringUniqueStore; + static constexpr DictionaryType dictionary_type = DictionaryType::HASH; +}; + +struct UnorderedDoubleUniqueStore +{ + using UniqueStoreType = DoubleUniqueStore; + static constexpr DictionaryType dictionary_type = DictionaryType::HASH; +}; + +struct UnorderedSmallOffsetNumberUniqueStore +{ + using UniqueStoreType = SmallOffsetNumberUniqueStore; + static constexpr DictionaryType dictionary_type = DictionaryType::HASH; +}; + +using UniqueStoreTestTypes = ::testing::Types<OrderedNumberUniqueStore, OrderedStringUniqueStore, OrderedCStringUniqueStore, OrderedDoubleUniqueStore, HybridNumberUniqueStore, HybridStringUniqueStore, HybridCStringUniqueStore, HybridDoubleUniqueStore, UnorderedNumberUniqueStore, UnorderedStringUniqueStore, UnorderedCStringUniqueStore, UnorderedDoubleUniqueStore>; VESPA_GTEST_TYPED_TEST_SUITE(TestBase, UniqueStoreTestTypes); // Disable warnings emitted by gtest generated files when using typed tests diff --git a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp index 664d4fe8acd..ce1ebe395ce 100644 --- a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp +++ b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp @@ -56,10 +56,12 @@ struct DictionaryReadTest : public ::testing::Test { void take_snapshot() { dict.freeze(); snapshot = dict.get_read_snapshot(); + snapshot->fill(); + snapshot->sort(); } }; -using DictionaryReadTestTypes = ::testing::Types<DefaultUniqueStoreDictionary, UniqueStoreDictionary<DefaultDictionary, IUniqueStoreDictionary, ShardedHashMap>>; +using DictionaryReadTestTypes = ::testing::Types<DefaultUniqueStoreDictionary, UniqueStoreDictionary<DefaultDictionary, IUniqueStoreDictionary, ShardedHashMap>, UniqueStoreDictionary<NoBTreeDictionary, IUniqueStoreDictionary, ShardedHashMap>>; VESPA_GTEST_TYPED_TEST_SUITE(DictionaryReadTest, DictionaryReadTestTypes); // Disable warnings emitted by gtest generated files when using typed tests @@ -79,6 +81,9 @@ TYPED_TEST(DictionaryReadTest, can_count_occurrences_of_a_key) TYPED_TEST(DictionaryReadTest, can_count_occurrences_of_keys_in_a_range) { + if (!this->dict.get_has_btree_dictionary()) { + return; + } this->add(3).add(5).add(7).add(9).take_snapshot(); EXPECT_EQ(1, this->snapshot->count_in_range(Comparator(3), Comparator(3))); EXPECT_EQ(1, this->snapshot->count_in_range(Comparator(3), Comparator(4))); diff --git a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp index cca61e3c852..a8d5fd5ac5f 100644 --- a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp +++ b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.cpp @@ -187,4 +187,54 @@ FixedSizeHashMap::get_memory_usage() const nodes_hold_size); } +void +FixedSizeHashMap::foreach_key(const std::function<void(EntryRef)>& callback) const +{ + for (auto& chain_head : _chain_heads) { + uint32_t node_idx = chain_head.load_relaxed(); + while (node_idx != no_node_idx) { + auto& node = _nodes[node_idx]; + callback(node.get_kv().first.load_relaxed()); + node_idx = node.get_next_node_idx().load(std::memory_order_relaxed); + } + } +} + +void +FixedSizeHashMap::move_keys(const std::function<EntryRef(EntryRef)>& callback) +{ + for (auto& chain_head : _chain_heads) { + uint32_t node_idx = chain_head.load_relaxed(); + while (node_idx != no_node_idx) { + auto& node = _nodes[node_idx]; + EntryRef old_ref = node.get_kv().first.load_relaxed(); + EntryRef new_ref = callback(old_ref); + if (new_ref != old_ref) { + node.get_kv().first.store_release(new_ref); + } + node_idx = node.get_next_node_idx().load(std::memory_order_relaxed); + } + } +} + +bool +FixedSizeHashMap::normalize_values(const std::function<EntryRef(EntryRef)>& normalize) +{ + bool changed = false; + for (auto& chain_head : _chain_heads) { + uint32_t node_idx = chain_head.load_relaxed(); + while (node_idx != no_node_idx) { + auto& node = _nodes[node_idx]; + EntryRef old_ref = node.get_kv().second.load_relaxed(); + EntryRef new_ref = normalize(old_ref); + if (new_ref != old_ref) { + node.get_kv().second.store_release(new_ref); + changed = true; + } + node_idx = node.get_next_node_idx().load(std::memory_order_relaxed); + } + } + return changed; +} + } diff --git a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h index a0ca1bf0568..05a1006a5d5 100644 --- a/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h +++ b/vespalib/src/vespa/vespalib/datastore/fixed_size_hash_map.h @@ -118,6 +118,9 @@ public: bool full() const noexcept { return _nodes.size() == _nodes.capacity() && _free_count == 0u; } size_t size() const noexcept { return _count; } MemoryUsage get_memory_usage() const; + void foreach_key(const std::function<void(EntryRef)>& callback) const; + void move_keys(const std::function<EntryRef(EntryRef)>& callback); + bool normalize_values(const std::function<EntryRef(EntryRef)>& normalize); }; } diff --git a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h index 3758dea9cfe..8493e8990e3 100644 --- a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h +++ b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h @@ -35,6 +35,7 @@ public: virtual void build(vespalib::ConstArrayRef<EntryRef> refs) = 0; virtual void build_with_payload(vespalib::ConstArrayRef<EntryRef> refs, vespalib::ConstArrayRef<uint32_t> payloads) = 0; virtual std::unique_ptr<IUniqueStoreDictionaryReadSnapshot> get_read_snapshot() const = 0; + virtual bool get_has_btree_dictionary() const = 0; virtual bool get_has_hash_dictionary() const = 0; }; diff --git a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary_read_snapshot.h b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary_read_snapshot.h index 62b2b330a67..b1ea05ffa4d 100644 --- a/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary_read_snapshot.h +++ b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary_read_snapshot.h @@ -17,6 +17,8 @@ class EntryComparator; class IUniqueStoreDictionaryReadSnapshot { public: virtual ~IUniqueStoreDictionaryReadSnapshot() = default; + virtual void fill() = 0; + virtual void sort() = 0; virtual size_t count(const EntryComparator& comp) const = 0; virtual size_t count_in_range(const EntryComparator& low, const EntryComparator& high) const = 0; virtual void foreach_key(std::function<void(EntryRef)> callback) const = 0; diff --git a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp index 7c609f89972..6bb372ff212 100644 --- a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp +++ b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.cpp @@ -165,4 +165,39 @@ ShardedHashMap::get_memory_usage() const return memory_usage; } +void +ShardedHashMap::foreach_key(std::function<void(EntryRef)> callback) const +{ + for (size_t i = 0; i < num_shards; ++i) { + auto map = _maps[i].load(std::memory_order_relaxed); + if (map != nullptr) { + map->foreach_key(callback); + } + } +} + +void +ShardedHashMap::move_keys(std::function<EntryRef(EntryRef)> callback) +{ + for (size_t i = 0; i < num_shards; ++i) { + auto map = _maps[i].load(std::memory_order_relaxed); + if (map != nullptr) { + map->move_keys(callback); + } + } +} + +bool +ShardedHashMap::normalize_values(std::function<EntryRef(EntryRef)> normalize) +{ + bool changed = false; + for (size_t i = 0; i < num_shards; ++i) { + auto map = _maps[i].load(std::memory_order_relaxed); + if (map != nullptr) { + changed |= map->normalize_values(normalize); + } + } + return changed; +} + } diff --git a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h index 7ea60646b06..30bbb2fbe4d 100644 --- a/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h +++ b/vespalib/src/vespa/vespalib/datastore/sharded_hash_map.h @@ -56,6 +56,9 @@ public: size_t size() const noexcept; const EntryComparator &get_default_comparator() const noexcept { return *_comp; } MemoryUsage get_memory_usage() const; + void foreach_key(std::function<void(EntryRef)> callback) const; + void move_keys(std::function<EntryRef(EntryRef)> callback); + bool normalize_values(std::function<EntryRef(EntryRef)> normalize); }; } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_btree_dictionary_read_snapshot.h b/vespalib/src/vespa/vespalib/datastore/unique_store_btree_dictionary_read_snapshot.h index 05406e5704d..2f958744f5f 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_btree_dictionary_read_snapshot.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_btree_dictionary_read_snapshot.h @@ -20,6 +20,8 @@ private: public: UniqueStoreBTreeDictionaryReadSnapshot(FrozenView frozen_view); + void fill() override; + void sort() override; size_t count(const EntryComparator& comp) const override; size_t count_in_range(const EntryComparator& low, const EntryComparator& high) const override; void foreach_key(std::function<void(EntryRef)> callback) const override; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_btree_dictionary_read_snapshot.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_btree_dictionary_read_snapshot.hpp index ec1ae63aa4b..8fb542b2999 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_btree_dictionary_read_snapshot.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_btree_dictionary_read_snapshot.hpp @@ -13,6 +13,18 @@ UniqueStoreBTreeDictionaryReadSnapshot<BTreeDictionaryT>::UniqueStoreBTreeDictio } template <typename BTreeDictionaryT> +void +UniqueStoreBTreeDictionaryReadSnapshot<BTreeDictionaryT>::fill() +{ +} + +template <typename BTreeDictionaryT> +void +UniqueStoreBTreeDictionaryReadSnapshot<BTreeDictionaryT>::sort() +{ +} + +template <typename BTreeDictionaryT> size_t UniqueStoreBTreeDictionaryReadSnapshot<BTreeDictionaryT>::count(const EntryComparator& comp) const { diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h index 66b82ace1c1..612188b4653 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h @@ -10,6 +10,7 @@ namespace vespalib::datastore { class EntryComparatorWrapper; class IUniqueStoreDictionaryReadSnapshot; +class NoBTreeDictionary { }; class NoHashDictionary; template <typename BTreeDictionaryT> @@ -25,6 +26,16 @@ public: } }; +template <> +class UniqueStoreBTreeDictionaryBase<NoBTreeDictionary> +{ +public: + static constexpr bool has_btree_dictionary = false; + UniqueStoreBTreeDictionaryBase() + { + } +}; + template <typename HashDictionaryT> class UniqueStoreHashDictionaryBase { @@ -55,8 +66,6 @@ template <typename BTreeDictionaryT, typename ParentT = IUniqueStoreDictionary, class UniqueStoreDictionary : public ParentT, public UniqueStoreBTreeDictionaryBase<BTreeDictionaryT>, public UniqueStoreHashDictionaryBase<HashDictionaryT> { protected: using BTreeDictionaryType = BTreeDictionaryT; - using DataType = typename BTreeDictionaryType::DataType; - using FrozenView = typename BTreeDictionaryType::FrozenView; using generation_t = typename ParentT::generation_t; public: @@ -77,6 +86,7 @@ public: void build(vespalib::ConstArrayRef<EntryRef> refs) override; void build_with_payload(vespalib::ConstArrayRef<EntryRef>, vespalib::ConstArrayRef<uint32_t> payloads) override; std::unique_ptr<IUniqueStoreDictionaryReadSnapshot> get_read_snapshot() const override; + bool get_has_btree_dictionary() const override; bool get_has_hash_dictionary() const override; }; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp index 9503a54c645..825db1a8cb5 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.hpp @@ -8,6 +8,7 @@ #include "unique_store_add_result.h" #include "unique_store_dictionary.h" #include "unique_store_btree_dictionary_read_snapshot.hpp" +#include "unique_store_hash_dictionary_read_snapshot.hpp" #include <vespa/vespalib/btree/btree.hpp> #include <vespa/vespalib/btree/btreebuilder.hpp> #include <vespa/vespalib/btree/btreeiterator.hpp> @@ -32,14 +33,18 @@ template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> void UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::freeze() { - this->_btree_dict.getAllocator().freeze(); + if constexpr (has_btree_dictionary) { + this->_btree_dict.getAllocator().freeze(); + } } template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> void UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::transfer_hold_lists(generation_t generation) { - this->_btree_dict.getAllocator().transferHoldLists(generation); + if constexpr (has_btree_dictionary) { + this->_btree_dict.getAllocator().transferHoldLists(generation); + } if constexpr (has_hash_dictionary) { this->_hash_dict.transfer_hold_lists(generation); } @@ -49,7 +54,9 @@ template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> void UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::trim_hold_lists(generation_t firstUsed) { - this->_btree_dict.getAllocator().trimHoldLists(firstUsed); + if constexpr (has_btree_dictionary) { + this->_btree_dict.getAllocator().trimHoldLists(firstUsed); + } if constexpr (has_hash_dictionary) { this->_hash_dict.trim_hold_lists(firstUsed); } @@ -60,23 +67,32 @@ UniqueStoreAddResult UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::add(const EntryComparator &comp, std::function<EntryRef(void)> insertEntry) { - auto itr = this->_btree_dict.lowerBound(EntryRef(), comp); - if (itr.valid() && !comp.less(EntryRef(), itr.getKey())) { - if constexpr (has_hash_dictionary) { - auto* result = this->_hash_dict.find(comp, EntryRef()); - assert(result != nullptr && result->first.load_relaxed() == itr.getKey()); + if constexpr (has_btree_dictionary) { + using DataType = typename BTreeDictionaryType::DataType; + auto itr = this->_btree_dict.lowerBound(EntryRef(), comp); + if (itr.valid() && !comp.less(EntryRef(), itr.getKey())) { + if constexpr (has_hash_dictionary) { + auto* result = this->_hash_dict.find(comp, EntryRef()); + assert(result != nullptr && result->first.load_relaxed() == itr.getKey()); + } + return UniqueStoreAddResult(itr.getKey(), false); + } else { + EntryRef newRef = insertEntry(); + this->_btree_dict.insert(itr, newRef, DataType()); + if constexpr (has_hash_dictionary) { + std::function<EntryRef(void)> insert_hash_entry([newRef]() noexcept -> EntryRef { return newRef; }); + auto& add_result = this->_hash_dict.add(comp, newRef, insert_hash_entry); + assert(add_result.first.load_relaxed() == newRef); + } + return UniqueStoreAddResult(newRef, true); } - return UniqueStoreAddResult(itr.getKey(), false); - } else { - EntryRef newRef = insertEntry(); - this->_btree_dict.insert(itr, newRef, DataType()); - if constexpr (has_hash_dictionary) { - std::function<EntryRef(void)> insert_hash_entry([newRef]() noexcept -> EntryRef { return newRef; }); - auto& add_result = this->_hash_dict.add(comp, newRef, insert_hash_entry); - assert(add_result.first.load_relaxed() == newRef); - } - return UniqueStoreAddResult(newRef, true); + bool inserted = false; + std::function<EntryRef(void)> insert_hash_entry([&inserted,&insertEntry]() { inserted = true; return insertEntry(); }); + auto& add_result = this->_hash_dict.add(comp, EntryRef(), insert_hash_entry); + EntryRef newRef = add_result.first.load_relaxed(); + assert(newRef.valid()); + return UniqueStoreAddResult(newRef, inserted); } } @@ -84,19 +100,24 @@ template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> EntryRef UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::find(const EntryComparator &comp) { - auto itr = this->_btree_dict.lowerBound(EntryRef(), comp); - if (itr.valid() && !comp.less(EntryRef(), itr.getKey())) { - if constexpr (has_hash_dictionary) { - auto* result = this->_hash_dict.find(comp, EntryRef()); - assert(result != nullptr && result->first.load_relaxed() == itr.getKey()); + if constexpr (has_btree_dictionary) { + auto itr = this->_btree_dict.lowerBound(EntryRef(), comp); + if (itr.valid() && !comp.less(EntryRef(), itr.getKey())) { + if constexpr (has_hash_dictionary) { + auto* result = this->_hash_dict.find(comp, EntryRef()); + assert(result != nullptr && result->first.load_relaxed() == itr.getKey()); + } + return itr.getKey(); + } else { + if constexpr (has_hash_dictionary) { + auto* result = this->_hash_dict.find(comp, EntryRef()); + assert(result == nullptr); + } + return EntryRef(); } - return itr.getKey(); } else { - if constexpr (has_hash_dictionary) { - auto* result = this->_hash_dict.find(comp, EntryRef()); - assert(result == nullptr); - } - return EntryRef(); + auto* result = this->_hash_dict.find(comp, EntryRef()); + return (result == nullptr) ? EntryRef() : result->first.load_relaxed(); } } @@ -105,9 +126,11 @@ void UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::remove(const EntryComparator &comp, EntryRef ref) { assert(ref.valid()); - auto itr = this->_btree_dict.lowerBound(ref, comp); - assert(itr.valid() && itr.getKey() == ref); - this->_btree_dict.remove(itr); + if constexpr (has_btree_dictionary) { + auto itr = this->_btree_dict.lowerBound(ref, comp); + assert(itr.valid() && itr.getKey() == ref); + this->_btree_dict.remove(itr); + } if constexpr (has_hash_dictionary) { auto *result = this->_hash_dict.remove(comp, ref); assert(result != nullptr && result->first.load_relaxed() == ref); @@ -118,20 +141,24 @@ template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> void UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::move_entries(ICompactable &compactable) { - auto itr = this->_btree_dict.begin(); - while (itr.valid()) { - EntryRef oldRef(itr.getKey()); - EntryRef newRef(compactable.move(oldRef)); - if (newRef != oldRef) { - this->_btree_dict.thaw(itr); - itr.writeKey(newRef); - if constexpr (has_hash_dictionary) { - auto result = this->_hash_dict.find(this->_hash_dict.get_default_comparator(), oldRef); - assert(result != nullptr && result->first.load_relaxed() == oldRef); - result->first.store_release(newRef); + if constexpr (has_btree_dictionary) { + auto itr = this->_btree_dict.begin(); + while (itr.valid()) { + EntryRef oldRef(itr.getKey()); + EntryRef newRef(compactable.move(oldRef)); + if (newRef != oldRef) { + this->_btree_dict.thaw(itr); + itr.writeKey(newRef); + if constexpr (has_hash_dictionary) { + auto result = this->_hash_dict.find(this->_hash_dict.get_default_comparator(), oldRef); + assert(result != nullptr && result->first.load_relaxed() == oldRef); + result->first.store_release(newRef); + } } + ++itr; } - ++itr; + } else { + this->_hash_dict.move_keys([&compactable](EntryRef old_ref) { return compactable.move(old_ref); }); } } @@ -139,7 +166,11 @@ template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> uint32_t UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::get_num_uniques() const { - return this->_btree_dict.size(); + if constexpr (has_btree_dictionary) { + return this->_btree_dict.size(); + } else { + return this->_hash_dict.size(); + } } template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> @@ -164,15 +195,18 @@ UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::build(vespali { assert(refs.size() == ref_counts.size()); assert(!refs.empty()); - typename BTreeDictionaryType::Builder builder(this->_btree_dict.getAllocator()); - for (size_t i = 1; i < refs.size(); ++i) { - if (ref_counts[i] != 0u) { - builder.insert(refs[i], DataType()); - } else { - hold(refs[i]); + if constexpr (has_btree_dictionary) { + using DataType = typename BTreeDictionaryType::DataType; + typename BTreeDictionaryType::Builder builder(this->_btree_dict.getAllocator()); + for (size_t i = 1; i < refs.size(); ++i) { + if (ref_counts[i] != 0u) { + builder.insert(refs[i], DataType()); + } else { + hold(refs[i]); + } } + this->_btree_dict.assign(builder); } - this->_btree_dict.assign(builder); if constexpr (has_hash_dictionary) { for (size_t i = 1; i < refs.size(); ++i) { if (ref_counts[i] != 0u) { @@ -180,6 +214,8 @@ UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::build(vespali std::function<EntryRef(void)> insert_hash_entry([ref]() noexcept -> EntryRef { return ref; }); auto& add_result = this->_hash_dict.add(this->_hash_dict.get_default_comparator(), ref, insert_hash_entry); assert(add_result.first.load_relaxed() == ref); + } else if constexpr (!has_btree_dictionary) { + hold(refs[i]); } } } @@ -189,11 +225,14 @@ template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> void UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::build(vespalib::ConstArrayRef<EntryRef> refs) { - typename BTreeDictionaryType::Builder builder(this->_btree_dict.getAllocator()); - for (const auto& ref : refs) { - builder.insert(ref, DataType()); + if constexpr (has_btree_dictionary) { + using DataType = typename BTreeDictionaryType::DataType; + typename BTreeDictionaryType::Builder builder(this->_btree_dict.getAllocator()); + for (const auto& ref : refs) { + builder.insert(ref, DataType()); + } + this->_btree_dict.assign(builder); } - this->_btree_dict.assign(builder); if constexpr (has_hash_dictionary) { for (const auto& ref : refs) { std::function<EntryRef(void)> insert_hash_entry([ref]() noexcept -> EntryRef { return ref; }); @@ -209,24 +248,25 @@ UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::build_with_pa vespalib::ConstArrayRef<uint32_t> payloads) { assert(refs.size() == payloads.size()); - typename BTreeDictionaryType::Builder builder(this->_btree_dict.getAllocator()); - for (size_t i = 0; i < refs.size(); ++i) { - if constexpr (std::is_same_v<DataType, uint32_t>) { - builder.insert(refs[i], payloads[i]); - } else { - builder.insert(refs[i], DataType()); + if constexpr (has_btree_dictionary) { + using DataType = typename BTreeDictionaryType::DataType; + typename BTreeDictionaryType::Builder builder(this->_btree_dict.getAllocator()); + for (size_t i = 0; i < refs.size(); ++i) { + if constexpr (std::is_same_v<DataType, uint32_t>) { + builder.insert(refs[i], payloads[i]); + } else { + builder.insert(refs[i], DataType()); + } } + this->_btree_dict.assign(builder); } - this->_btree_dict.assign(builder); if constexpr (has_hash_dictionary) { for (size_t i = 0; i < refs.size(); ++i) { EntryRef ref = refs[i]; std::function<EntryRef(void)> insert_hash_entry([ref]() noexcept -> EntryRef { return ref; }); auto& add_result = this->_hash_dict.add(this->_hash_dict.get_default_comparator(), ref, insert_hash_entry); assert(add_result.first.load_relaxed() == refs[i]); - if constexpr (std::is_same_v<DataType, uint32_t>) { - add_result.second.store_relaxed(EntryRef(payloads[i])); - } + add_result.second.store_relaxed(EntryRef(payloads[i])); } } } @@ -235,7 +275,20 @@ template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> std::unique_ptr<IUniqueStoreDictionaryReadSnapshot> UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::get_read_snapshot() const { - return std::make_unique<UniqueStoreBTreeDictionaryReadSnapshot<BTreeDictionaryT>>(this->_btree_dict.getFrozenView()); + if constexpr (has_btree_dictionary) { + return std::make_unique<UniqueStoreBTreeDictionaryReadSnapshot<BTreeDictionaryT>>(this->_btree_dict.getFrozenView()); + } + if constexpr (has_hash_dictionary) { + return std::make_unique<UniqueStoreHashDictionaryReadSnapshot<HashDictionaryT>>(this->_hash_dict); + } + return std::unique_ptr<IUniqueStoreDictionaryReadSnapshot>(); +} + +template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> +bool +UniqueStoreDictionary<BTreeDictionaryT, ParentT, HashDictionaryT>::get_has_btree_dictionary() const +{ + return has_btree_dictionary; } template <typename BTreeDictionaryT, typename ParentT, typename HashDictionaryT> diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp index dbcb2c10b86..7a43b16e66a 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp @@ -15,6 +15,8 @@ UniqueStoreEnumerator<RefT>::UniqueStoreEnumerator(const IUniqueStoreDictionary _enumValues(), _next_enum_val(1) { + _dict_snapshot->fill(); + _dict_snapshot->sort(); allocate_enum_values(); } diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.h b/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.h new file mode 100644 index 00000000000..947d8e00f95 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.h @@ -0,0 +1,34 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "i_unique_store_dictionary_read_snapshot.h" + +namespace vespalib::datastore { + +/** + * Class that provides a read snapshot of a unique store dictionary using a hash. + * + * A generation guard that must be taken and held while the snapshot is considered valid. + * + * fill() must be called by the writer thread. + * sort() must be called if order of refs should correspond to sorted dictionary order. + * + */ +template <typename HashDictionaryT> +class UniqueStoreHashDictionaryReadSnapshot : public IUniqueStoreDictionaryReadSnapshot { +private: + using HashDictionaryType = HashDictionaryT; + const HashDictionaryType& _hash; + std::vector<EntryRef> _refs; + +public: + UniqueStoreHashDictionaryReadSnapshot(const HashDictionaryType &hash); + void fill() override; + void sort() override; + size_t count(const EntryComparator& comp) const override; + size_t count_in_range(const EntryComparator& low, const EntryComparator& high) const override; + void foreach_key(std::function<void(EntryRef)> callback) const override; +}; + +} diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.hpp new file mode 100644 index 00000000000..4353b415aa4 --- /dev/null +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_hash_dictionary_read_snapshot.hpp @@ -0,0 +1,55 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "unique_store_hash_dictionary_read_snapshot.h" + +namespace vespalib::datastore { + +template <typename HashDictionaryT> +UniqueStoreHashDictionaryReadSnapshot<HashDictionaryT>::UniqueStoreHashDictionaryReadSnapshot(const HashDictionaryType &hash) + : _hash(hash), + _refs() +{ +} + +template <typename HashDictionaryT> +void +UniqueStoreHashDictionaryReadSnapshot<HashDictionaryT>::fill() +{ + _hash.foreach_key([this](EntryRef ref) { _refs.push_back(ref); }); +} + +template <typename HashDictionaryT> +void +UniqueStoreHashDictionaryReadSnapshot<HashDictionaryT>::sort() +{ + auto& comp = _hash.get_default_comparator(); + std::sort(_refs.begin(), _refs.end(), [&comp](EntryRef lhs, EntryRef rhs) { return comp.less(lhs, rhs); }); +} + +template <typename HashDictionaryT> +size_t +UniqueStoreHashDictionaryReadSnapshot<HashDictionaryT>::count(const EntryComparator& comp) const +{ + auto result = _hash.find(comp, EntryRef()); + return ((result != nullptr) ? 1u : 0u); +} + +template <typename HashDictionaryT> +size_t +UniqueStoreHashDictionaryReadSnapshot<HashDictionaryT>::count_in_range(const EntryComparator&, const EntryComparator&) const +{ + return 1u; +} + +template <typename HashDictionaryT> +void +UniqueStoreHashDictionaryReadSnapshot<HashDictionaryT>::foreach_key(std::function<void(EntryRef)> callback) const +{ + for (auto ref : _refs) { + callback(ref); + } +} + +} |