diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-30 14:48:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-30 14:48:04 +0200 |
commit | 10a837db4266489ad1f9bc7e91779bba37e3ccb2 (patch) | |
tree | 274fa449f0e0666b4275dddfc2b5c43e650cad83 /searchlib | |
parent | cd172bdb107e9bf34ac24ae8165d237c1ed1b960 (diff) | |
parent | d1024ae63d03f72b5053427753d0c5236ad6293a (diff) |
Merge pull request #17234 from vespa-engine/oregge/hash-only-unique-store-dictionary
Handle UniqueStoreDictionary without B-tree.
Diffstat (limited to 'searchlib')
6 files changed, 171 insertions, 75 deletions
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp index 6eb13ea8f7d..61d5b3795e4 100644 --- a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -164,7 +164,7 @@ private: void testReload(AttributePtr v0, AttributePtr v1, AttributePtr v2, MemAttr::SP mv0, MemAttr::SP mv1, MemAttr::SP mv2, MemAttr::SP emv0, MemAttr::SP emv1, MemAttr::SP emv2, - Config cfg, const vespalib::string &pref, bool fastSearch); + Config cfg, const vespalib::string &pref, bool fastSearch, search::DictionaryConfig dictionary_config); public: template <typename VectorType, typename BufferType> @@ -596,7 +596,8 @@ EnumeratedSaveTest::testReload(AttributePtr v0, MemAttr::SP emv2, Config cfg, const vespalib::string &pref, - bool fastSearch) + bool fastSearch, + search::DictionaryConfig dictionary_config) { // typedef AttributePtr AVP; @@ -611,6 +612,7 @@ EnumeratedSaveTest::testReload(AttributePtr v0, Config check_cfg(cfg); check_cfg.setFastSearch(fastSearch); + check_cfg.set_dictionary_config(dictionary_config); TEST_DO((checkLoad<VectorType, BufferType>(check_cfg, pref + "0", v0))); TEST_DO((checkLoad<VectorType, BufferType>(check_cfg, pref + "1", v1))); TEST_DO((checkLoad<VectorType, BufferType>(check_cfg, pref + "2", v2))); @@ -695,11 +697,27 @@ EnumeratedSaveTest::test(BasicType bt, CollectionType ct, TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, mv0, mv1, mv2, emv0, emv1, emv2, - cfg, pref, false))); + cfg, pref, false, search::DictionaryConfig(search::DictionaryConfig::Type::BTREE)))); TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, mv0, mv1, mv2, emv0, emv1, emv2, - cfg, pref, true))); + cfg, pref, true, search::DictionaryConfig(search::DictionaryConfig::Type::BTREE)))); + TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, false, search::DictionaryConfig(search::DictionaryConfig::Type::BTREE_AND_HASH)))); + TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, true, search::DictionaryConfig(search::DictionaryConfig::Type::BTREE_AND_HASH)))); + TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, false, search::DictionaryConfig(search::DictionaryConfig::Type::HASH)))); + TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, true, search::DictionaryConfig(search::DictionaryConfig::Type::HASH)))); } TEST_F("Test enumerated save with single value int8", EnumeratedSaveTest) diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index e3dce6d8b09..2499a219de3 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -22,41 +22,61 @@ struct OrderedDoubleEnumStore { static constexpr Type type = Type::BTREE; }; -struct UnorderedDoubleEnumStore { +struct HybridDoubleEnumStore { using EnumStoreType = DoubleEnumStore; static constexpr Type type = Type::BTREE_AND_HASH; }; +struct UnorderedDoubleEnumStore { + using EnumStoreType = DoubleEnumStore; + static constexpr Type type = Type::HASH; +}; + struct OrderedFloatEnumStore { using EnumStoreType = FloatEnumStore; static constexpr Type type = Type::BTREE; }; -struct UnorderedFloatEnumStore { +struct HybridFloatEnumStore { using EnumStoreType = FloatEnumStore; static constexpr Type type = Type::BTREE_AND_HASH; }; +struct UnorderedFloatEnumStore { + using EnumStoreType = FloatEnumStore; + static constexpr Type type = Type::HASH; +}; + struct OrderedNumericEnumStore { using EnumStoreType = NumericEnumStore; static constexpr Type type = Type::BTREE; }; -struct UnorderedNumericEnumStore { +struct HybridNumericEnumStore { using EnumStoreType = NumericEnumStore; static constexpr Type type = Type::BTREE_AND_HASH; }; +struct UnorderedNumericEnumStore { + using EnumStoreType = NumericEnumStore; + static constexpr Type type = Type::HASH; +}; + struct OrderedStringEnumStore { using EnumStoreType = StringEnumStore; static constexpr Type type = Type::BTREE; }; -struct UnorderedStringEnumStore { +struct HybridStringEnumStore { using EnumStoreType = StringEnumStore; static constexpr Type type = Type::BTREE_AND_HASH; }; +struct UnorderedStringEnumStore { + using EnumStoreType = StringEnumStore; + static constexpr Type type = Type::HASH; +}; + using StringVector = std::vector<std::string>; using generation_t = vespalib::GenerationHandler::generation_t; @@ -116,7 +136,7 @@ public: #pragma GCC diagnostic ignored "-Wsuggest-override" #endif -using FloatEnumStoreTestTypes = ::testing::Types<OrderedFloatEnumStore, OrderedDoubleEnumStore, UnorderedFloatEnumStore, UnorderedDoubleEnumStore>; +using FloatEnumStoreTestTypes = ::testing::Types<OrderedFloatEnumStore, OrderedDoubleEnumStore, HybridFloatEnumStore, HybridDoubleEnumStore, UnorderedFloatEnumStore, UnorderedDoubleEnumStore>; VESPA_GTEST_TYPED_TEST_SUITE(FloatEnumStoreTest, FloatEnumStoreTestTypes); TYPED_TEST(FloatEnumStoreTest, numbers_can_be_inserted_and_retrieved) @@ -180,6 +200,8 @@ void testUniques(const StringEnumStore& ses, const std::vector<std::string>& unique) { auto read_snapshot = ses.get_dictionary().get_read_snapshot(); + read_snapshot->fill(); + read_snapshot->sort(); std::vector<EnumIndex> saved_indexes; read_snapshot->foreach_key([&saved_indexes](EntryRef idx) { saved_indexes.push_back(idx); }); uint32_t i = 0; @@ -503,7 +525,7 @@ public: #pragma GCC diagnostic ignored "-Wsuggest-override" #endif -using LoaderTestTypes = ::testing::Types<OrderedNumericEnumStore, OrderedFloatEnumStore, OrderedStringEnumStore, UnorderedNumericEnumStore, UnorderedFloatEnumStore, UnorderedStringEnumStore>; +using LoaderTestTypes = ::testing::Types<OrderedNumericEnumStore, OrderedFloatEnumStore, OrderedStringEnumStore, HybridNumericEnumStore, HybridFloatEnumStore, HybridStringEnumStore, UnorderedNumericEnumStore, UnorderedFloatEnumStore, UnorderedStringEnumStore>; VESPA_GTEST_TYPED_TEST_SUITE(LoaderTest, LoaderTestTypes); TYPED_TEST(LoaderTest, store_is_instantiated_with_enumerated_loader) @@ -604,7 +626,7 @@ EnumStoreDictionaryTest<EnumStoreTypeAndOrdering>::insert_value(size_t value_idx #pragma GCC diagnostic ignored "-Wsuggest-override" #endif -using EnumStoreDictionaryTestTypes = ::testing::Types<OrderedNumericEnumStore, UnorderedNumericEnumStore>; +using EnumStoreDictionaryTestTypes = ::testing::Types<OrderedNumericEnumStore, HybridNumericEnumStore, UnorderedNumericEnumStore>; VESPA_GTEST_TYPED_TEST_SUITE(EnumStoreDictionaryTest, EnumStoreDictionaryTestTypes); TYPED_TEST(EnumStoreDictionaryTest, find_frozen_index_works) diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp index 5a65d4937e4..8bb4e3fc47c 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp @@ -53,8 +53,12 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::free_unused_values(const IndexSet unused; // find unused enums - for (auto iter = this->_btree_dict.begin(); iter.valid(); ++iter) { - _enumStore.free_value_if_unused(iter.getKey(), unused); + if constexpr (has_btree_dictionary) { + for (auto iter = this->_btree_dict.begin(); iter.valid(); ++iter) { + _enumStore.free_value_if_unused(iter.getKey(), unused); + } + } else { + this->_hash_dict.foreach_key([this, &unused](EntryRef ref) { _enumStore.free_value_if_unused(ref, unused); }); } remove_unused_values(unused, cmp); } @@ -76,12 +80,14 @@ void EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::remove(const EntryComparator &comp, EntryRef ref) { assert(ref.valid()); - auto itr = this->_btree_dict.lowerBound(ref, comp); - assert(itr.valid() && itr.getKey() == ref); - if constexpr (std::is_same_v<BTreeDictionaryT, EnumPostingTree>) { - assert(EntryRef(itr.getData()) == EntryRef()); + if constexpr (has_btree_dictionary) { + auto itr = this->_btree_dict.lowerBound(ref, comp); + assert(itr.valid() && itr.getKey() == ref); + if constexpr (std::is_same_v<BTreeDictionaryT, EnumPostingTree>) { + assert(EntryRef(itr.getData()) == EntryRef()); + } + this->_btree_dict.remove(itr); } - this->_btree_dict.remove(itr); if constexpr (has_hash_dictionary) { auto *result = this->_hash_dict.remove(comp, ref); assert(result != nullptr && result->first.load_relaxed() == ref); @@ -93,12 +99,21 @@ bool EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_index(const vespalib::datastore::EntryComparator& cmp, Index& idx) const { - auto itr = this->_btree_dict.find(Index(), cmp); - if (!itr.valid()) { + if constexpr (has_hash_dictionary) { + auto find_result = this->_hash_dict.find(cmp, EntryRef()); + if (find_result != nullptr) { + idx = find_result->first.load_acquire(); + return true; + } return false; + } else { + auto itr = this->_btree_dict.find(Index(), cmp); + if (!itr.valid()) { + return false; + } + idx = itr.getKey(); + return true; } - idx = itr.getKey(); - return true; } template <typename BTreeDictionaryT, typename HashDictionaryT> @@ -113,13 +128,14 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_frozen_index(const return true; } return false; + } else { + auto itr = this->_btree_dict.getFrozenView().find(Index(), cmp); + if (!itr.valid()) { + return false; + } + idx = itr.getKey(); + return true; } - auto itr = this->_btree_dict.getFrozenView().find(Index(), cmp); - if (!itr.valid()) { - return false; - } - idx = itr.getKey(); - return true; } template <typename BTreeDictionaryT, typename HashDictionaryT> @@ -127,10 +143,17 @@ std::vector<IEnumStore::EnumHandle> EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_matching_enums(const vespalib::datastore::EntryComparator& cmp) const { std::vector<IEnumStore::EnumHandle> result; - auto itr = this->_btree_dict.getFrozenView().find(Index(), cmp); - while (itr.valid() && !cmp.less(Index(), itr.getKey())) { - result.push_back(itr.getKey().ref()); - ++itr; + if constexpr (has_btree_dictionary) { + auto itr = this->_btree_dict.getFrozenView().find(Index(), cmp); + while (itr.valid() && !cmp.less(Index(), itr.getKey())) { + result.push_back(itr.getKey().ref()); + ++itr; + } + } else { + auto find_result = this->_hash_dict.find(cmp, EntryRef()); + if (find_result != nullptr) { + result.push_back(find_result->first.load_acquire().ref()); + } } return result; } @@ -139,7 +162,11 @@ template <typename BTreeDictionaryT, typename HashDictionaryT> EntryRef EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::get_frozen_root() const { - return this->_btree_dict.getFrozenView().getRoot(); + if constexpr (has_btree_dictionary) { + return this->_btree_dict.getFrozenView().getRoot(); + } else { + return EntryRef(); + } } template <> @@ -154,18 +181,20 @@ std::pair<IEnumStore::Index, EntryRef> EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_posting_list(const vespalib::datastore::EntryComparator& cmp, EntryRef root) const { if constexpr (has_hash_dictionary) { + (void) root; auto find_result = this->_hash_dict.find(cmp, EntryRef()); if (find_result != nullptr) { return std::make_pair(find_result->first.load_acquire(), find_result->second.load_acquire()); } return std::make_pair(Index(), EntryRef()); + } else { + typename BTreeDictionaryType::ConstIterator itr(vespalib::btree::BTreeNode::Ref(), this->_btree_dict.getAllocator()); + itr.lower_bound(root, Index(), cmp); + if (itr.valid() && !cmp.less(Index(), itr.getKey())) { + return std::make_pair(itr.getKey(), EntryRef(itr.getData())); + } + return std::make_pair(Index(), EntryRef()); } - typename BTreeDictionaryType::ConstIterator itr(vespalib::btree::BTreeNode::Ref(), this->_btree_dict.getAllocator()); - itr.lower_bound(root, Index(), cmp); - if (itr.valid() && !cmp.less(Index(), itr.getKey())) { - return std::make_pair(itr.getKey(), EntryRef(itr.getData())); - } - return std::make_pair(Index(), EntryRef()); } template <typename BTreeDictionaryT, typename HashDictionaryT> @@ -193,19 +222,23 @@ template <typename BTreeDictionaryT, typename HashDictionaryT> void EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::clear_all_posting_lists(std::function<void(EntryRef)> clearer) { - auto& dict = this->_btree_dict; - auto itr = dict.begin(); - EntryRef prev; - while (itr.valid()) { - EntryRef ref(itr.getData()); - if (ref.ref() != prev.ref()) { - if (ref.valid()) { - clearer(ref); + if constexpr (has_btree_dictionary) { + auto& dict = this->_btree_dict; + auto itr = dict.begin(); + EntryRef prev; + while (itr.valid()) { + EntryRef ref(itr.getData()); + if (ref.ref() != prev.ref()) { + if (ref.valid()) { + clearer(ref); + } + prev = ref; } - prev = ref; + itr.writeData(EntryRef().ref()); + ++itr; } - itr.writeData(EntryRef().ref()); - ++itr; + } else { + this->_hash_dict.normalize_values([&clearer](EntryRef ref) { clearer(ref); return EntryRef(); }); } } @@ -220,17 +253,25 @@ template <typename BTreeDictionaryT, typename HashDictionaryT> void EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::update_posting_list(Index idx, const vespalib::datastore::EntryComparator& cmp, std::function<EntryRef(EntryRef)> updater) { - auto& dict = this->_btree_dict; - auto itr = dict.lowerBound(idx, cmp); - assert(itr.valid() && itr.getKey() == idx); - EntryRef old_posting_idx(itr.getData()); - EntryRef new_posting_idx = updater(old_posting_idx); - dict.thaw(itr); - itr.writeData(new_posting_idx.ref()); - if constexpr (has_hash_dictionary) { + if constexpr (has_btree_dictionary) { + auto& dict = this->_btree_dict; + auto itr = dict.lowerBound(idx, cmp); + assert(itr.valid() && itr.getKey() == idx); + EntryRef old_posting_idx(itr.getData()); + EntryRef new_posting_idx = updater(old_posting_idx); + dict.thaw(itr); + itr.writeData(new_posting_idx.ref()); + if constexpr (has_hash_dictionary) { + auto find_result = this->_hash_dict.find(this->_hash_dict.get_default_comparator(), idx); + assert(find_result != nullptr && find_result->first.load_relaxed() == idx); + assert(find_result->second.load_relaxed() == old_posting_idx); + find_result->second.store_release(new_posting_idx); + } + } else { auto find_result = this->_hash_dict.find(this->_hash_dict.get_default_comparator(), idx); assert(find_result != nullptr && find_result->first.load_relaxed() == idx); - assert(find_result->second.load_relaxed() == old_posting_idx); + EntryRef old_posting_idx = find_result->second.load_relaxed(); + EntryRef new_posting_idx = updater(old_posting_idx); find_result->second.store_release(new_posting_idx); } } @@ -246,24 +287,28 @@ template <typename BTreeDictionaryT, typename HashDictionaryT> bool EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::normalize_posting_lists(std::function<EntryRef(EntryRef)> normalize) { - bool changed = false; - auto& dict = this->_btree_dict; - for (auto itr = dict.begin(); itr.valid(); ++itr) { - EntryRef old_posting_idx(itr.getData()); - EntryRef new_posting_idx = normalize(old_posting_idx); - if (new_posting_idx != old_posting_idx) { - changed = true; - dict.thaw(itr); - itr.writeData(new_posting_idx.ref()); - if constexpr (has_hash_dictionary) { - auto find_result = this->_hash_dict.find(this->_hash_dict.get_default_comparator(), itr.getKey()); - assert(find_result != nullptr && find_result->first.load_relaxed() == itr.getKey()); - assert(find_result->second.load_relaxed() == old_posting_idx); - find_result->second.store_release(new_posting_idx); + if constexpr (has_btree_dictionary) { + bool changed = false; + auto& dict = this->_btree_dict; + for (auto itr = dict.begin(); itr.valid(); ++itr) { + EntryRef old_posting_idx(itr.getData()); + EntryRef new_posting_idx = normalize(old_posting_idx); + if (new_posting_idx != old_posting_idx) { + changed = true; + dict.thaw(itr); + itr.writeData(new_posting_idx.ref()); + if constexpr (has_hash_dictionary) { + auto find_result = this->_hash_dict.find(this->_hash_dict.get_default_comparator(), itr.getKey()); + assert(find_result != nullptr && find_result->first.load_relaxed() == itr.getKey()); + assert(find_result->second.load_relaxed() == old_posting_idx); + find_result->second.store_release(new_posting_idx); + } } } + return changed; + } else { + return this->_hash_dict.normalize_values(normalize); } - return changed; } template <> @@ -273,6 +318,13 @@ EnumStoreDictionary<EnumTree>::get_posting_dictionary() const LOG_ABORT("should not be reached"); } +template <> +const EnumPostingTree & +EnumStoreDictionary<vespalib::datastore::NoBTreeDictionary, vespalib::datastore::ShardedHashMap>::get_posting_dictionary() const +{ + LOG_ABORT("should not be reached"); +} + template <typename BTreeDictionaryT, typename HashDictionaryT> const EnumPostingTree & EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::get_posting_dictionary() const @@ -357,6 +409,8 @@ template class EnumStoreDictionary<EnumPostingTree>; template class EnumStoreDictionary<EnumPostingTree, vespalib::datastore::ShardedHashMap>; +template class EnumStoreDictionary<vespalib::datastore::NoBTreeDictionary, vespalib::datastore::ShardedHashMap>; + } namespace vespalib::btree { diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h index 53ba81b1949..a39ff524618 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h @@ -25,6 +25,7 @@ private: using ParentUniqueStoreDictionary = vespalib::datastore::UniqueStoreDictionary<BTreeDictionaryT, IEnumStoreDictionary, HashDictionaryT>; using generation_t = IEnumStoreDictionary::generation_t; protected: + using ParentUniqueStoreDictionary::has_btree_dictionary; using ParentUniqueStoreDictionary::has_hash_dictionary; private: IEnumStore& _enumStore; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp index 5065a4a6cdc..12dd504bad8 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp @@ -50,6 +50,7 @@ make_enum_store_dictionary(IEnumStore &store, bool has_postings, search::Diction } else { switch (type) { case search::DictionaryConfig::Type::HASH: + return std::make_unique<EnumStoreDictionary<vespalib::datastore::NoBTreeDictionary, vespalib::datastore::ShardedHashMap>>(store, std::move(compare)); case search::DictionaryConfig::Type::BTREE_AND_HASH: return std::make_unique<EnumStoreDictionary<EnumPostingTree, vespalib::datastore::ShardedHashMap>>(store, std::move(compare)); default: diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp index 08db7b2b6b7..576506aeea9 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.cpp @@ -19,7 +19,7 @@ PostingListSearchContext(const IEnumStoreDictionary& dictionary, bool useBitVector, const ISearchContext &baseSearchCtx) : _dictionary(dictionary), - _frozenDictionary(_dictionary.get_posting_dictionary().getFrozenView()), + _frozenDictionary(_dictionary.get_has_btree_dictionary() ? _dictionary.get_posting_dictionary().getFrozenView() : FrozenDictionary(EnumIndex(), *((Dictionary::NodeAllocatorType *) 0))), _lowerDictItr(BTreeNode::Ref(), _frozenDictionary.getAllocator()), _upperDictItr(BTreeNode::Ref(), _frozenDictionary.getAllocator()), _uniqueValues(0u), |