diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-08-27 23:28:20 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-08-27 23:28:20 +0200 |
commit | bd65431ca39696ec242d63e4d378bae1a333e0c9 (patch) | |
tree | 31325629aa98b8ece0c8791a2b347f6987de0efe | |
parent | 40aa4999570c56de4af1fed7ef800761f8170062 (diff) | |
parent | ea038f953d5892fe1a61161b1da38290668e81a9 (diff) |
Merge pull request #10433 from vespa-engine/geirst/refactor-enum-store-dictionary
Refactor enum store dictionary
25 files changed, 611 insertions, 573 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 6fc90786c7d..29c35f11e65 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -361,10 +361,8 @@ void EnumStoreTest::testUniques (const EnumStoreType &ses, const std::vector<std::string> &unique) { - const EnumStoreDict<Dictionary> *enumDict = - dynamic_cast<const EnumStoreDict<Dictionary> *> - (&ses.getEnumStoreDict()); - assert(enumDict != NULL); + const auto* enumDict = dynamic_cast<const EnumStoreDictionary<Dictionary> *>(&ses.getEnumStoreDict()); + assert(enumDict != nullptr); const Dictionary &dict = enumDict->getDictionary(); uint32_t i = 0; EnumIndex idx; diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 41132bddc64..ea33a4d552c 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -39,6 +39,7 @@ vespa_add_library(searchlib_attribute OBJECT enumattributesaver.cpp enumcomparator.cpp enumhintsearchcontext.cpp + enum_store_dictionary.cpp enumstore.cpp enumstorebase.cpp extendableattributes.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp new file mode 100644 index 00000000000..690b267ec0c --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp @@ -0,0 +1,318 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "enum_store_dictionary.h" +#include "enumstore.h" +#include "enumstorebase.h" +#include <vespa/vespalib/btree/btree.hpp> +#include <vespa/vespalib/btree/btreeiterator.hpp> +#include <vespa/vespalib/btree/btreenode.hpp> +#include <vespa/vespalib/btree/btreenodeallocator.hpp> +#include <vespa/vespalib/btree/btreeroot.hpp> +#include <vespa/vespalib/datastore/datastore.hpp> +#include <vespa/vespalib/datastore/unique_store_dictionary.hpp> +#include <vespa/vespalib/util/bufferwriter.h> + +#include <vespa/log/log.h> +LOG_SETUP(".searchlib.attribute.enum_store_dictionary"); + +namespace search { + +using btree::BTreeNode; + +template <typename DictionaryT> +EnumStoreDictionary<DictionaryT>::EnumStoreDictionary(EnumStoreBase& enumStore) + : ParentUniqueStoreDictionary(), + _enumStore(enumStore) +{ +} + +template <typename DictionaryT> +EnumStoreDictionary<DictionaryT>::~EnumStoreDictionary() = default; + +template <typename DictionaryT> +uint32_t +EnumStoreDictionary<DictionaryT>::getNumUniques() const +{ + return this->_dict.size(); +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::writeAllValues(BufferWriter& writer, + BTreeNode::Ref rootRef) const +{ + constexpr size_t BATCHSIZE = 1000; + std::vector<Index> idxs; + idxs.reserve(BATCHSIZE); + typename DictionaryT::Iterator it(rootRef, this->_dict.getAllocator()); + while (it.valid()) { + if (idxs.size() >= idxs.capacity()) { + _enumStore.writeValues(writer, &idxs[0], idxs.size()); + idxs.clear(); + } + idxs.push_back(it.getKey()); + ++it; + } + if (!idxs.empty()) { + _enumStore.writeValues(writer, &idxs[0], idxs.size()); + } +} + +template <typename DictionaryT> +ssize_t +EnumStoreDictionary<DictionaryT>::deserialize(const void* src, + size_t available, + IndexVector& idx) +{ + return _enumStore.deserialize(src, available, idx, this->_dict); +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::fixupRefCounts(const EnumVector& hist) +{ + _enumStore.fixupRefCounts(hist, this->_dict); +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::removeUnusedEnums(const IndexSet& unused, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) +{ + using Iterator = typename DictionaryT::Iterator; + if (unused.empty()) { + return; + } + Iterator it(BTreeNode::Ref(), this->_dict.getAllocator()); + for (const auto& idx : unused) { + it.lower_bound(this->_dict.getRoot(), idx, cmp); + assert(it.valid() && !cmp(idx, it.getKey())); + if (Iterator::hasData() && fcmp != nullptr) { + typename DictionaryT::DataType pidx(it.getData()); + this->_dict.remove(it); + if (!it.valid() || (*fcmp)(idx, it.getKey())) { + continue; // Next entry does not use same posting list + } + --it; + if (it.valid() && !(*fcmp)(it.getKey(), idx)) { + continue; // Previous entry uses same posting list + } + if (it.valid()) { + ++it; + } else { + it.begin(); + } + this->_dict.thaw(it); + it.writeData(pidx); + } else { + this->_dict.remove(it); + } + } +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::freeUnusedEnums(const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) +{ + IndexSet unused; + + // find unused enums + for (auto iter = this->_dict.begin(); iter.valid(); ++iter) { + _enumStore.freeUnusedEnum(iter.getKey(), unused); + } + removeUnusedEnums(unused, cmp, fcmp); +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::freeUnusedEnums(const IndexSet& toRemove, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) +{ + IndexSet unused; + for (const auto& index : toRemove) { + _enumStore.freeUnusedEnum(index, unused); + } + removeUnusedEnums(unused, cmp, fcmp); +} + +template <typename DictionaryT> +bool +EnumStoreDictionary<DictionaryT>::findIndex(const datastore::EntryComparator& cmp, + Index& idx) const +{ + auto itr = this->_dict.find(Index(), cmp); + if (!itr.valid()) { + return false; + } + idx = itr.getKey(); + return true; +} + +template <typename DictionaryT> +bool +EnumStoreDictionary<DictionaryT>::findFrozenIndex(const datastore::EntryComparator& cmp, + Index& idx) const +{ + auto itr = this->_dict.getFrozenView().find(Index(), cmp); + if (!itr.valid()) { + return false; + } + idx = itr.getKey(); + return true; +} + +template <typename DictionaryT> +std::vector<EnumStoreBase::EnumHandle> +EnumStoreDictionary<DictionaryT>::findMatchingEnums(const datastore::EntryComparator& cmp) const +{ + std::vector<EnumStoreBase::EnumHandle> result; + auto itr = this->_dict.getFrozenView().find(Index(), cmp); + while (itr.valid() && !cmp(Index(), itr.getKey())) { + result.push_back(itr.getKey().ref()); + ++itr; + } + return result; +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::onReset() +{ + this->_dict.clear(); +} + +template <> +EnumPostingTree & +EnumStoreDictionary<EnumTree>::getPostingDictionary() +{ + LOG_ABORT("should not be reached"); +} + +template <> +EnumPostingTree & +EnumStoreDictionary<EnumPostingTree>::getPostingDictionary() +{ + return _dict; +} + +template <> +const EnumPostingTree & +EnumStoreDictionary<EnumTree>::getPostingDictionary() const +{ + LOG_ABORT("should not be reached"); +} + +template <> +const EnumPostingTree & +EnumStoreDictionary<EnumPostingTree>::getPostingDictionary() const +{ + return _dict; +} + +template <typename DictionaryT> +bool +EnumStoreDictionary<DictionaryT>::hasData() const +{ + return DictionaryT::LeafNodeType::hasData(); +} + + +template class EnumStoreDictionary<EnumTree>; + +template class EnumStoreDictionary<EnumPostingTree>; + +template +class btree::BTreeNodeT<EnumStoreBase::Index, EnumTreeTraits::INTERNAL_SLOTS>; + +template +class btree::BTreeNodeTT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; + +template +class btree::BTreeNodeTT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeInternalNode<EnumStoreBase::Index, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; + +template +class btree::BTreeLeafNode<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeLeafNode<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeNodeStore<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeNodeStore<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeRoot<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeRoot<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeRootT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeRootT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeRootBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeRootBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeNodeAllocator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeNodeAllocator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeIteratorBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; +template +class btree::BTreeIteratorBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; + +template class btree::BTreeConstIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template class btree::BTreeConstIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; +template +class btree::BTreeIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTree<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; +template +class btree::BTree<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h new file mode 100644 index 00000000000..cd28f10a3cd --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h @@ -0,0 +1,158 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "i_enum_store_dictionary.h" +#include <vespa/vespalib/btree/btree.h> + +namespace search { + +class EnumStoreBase; + +/** + * Concrete dictionary for an enum store that extends the functionality of a unique store dictionary. + */ +template <typename DictionaryT> +class EnumStoreDictionary : public datastore::UniqueStoreDictionary<DictionaryT, IEnumStoreDictionary> { +private: + using EnumVector = IEnumStoreDictionary::EnumVector; + using Index = IEnumStoreDictionary::Index; + using IndexSet = IEnumStoreDictionary::IndexSet; + using IndexVector = IEnumStoreDictionary::IndexVector; + using ParentUniqueStoreDictionary = datastore::UniqueStoreDictionary<DictionaryT, IEnumStoreDictionary>; + using generation_t = IEnumStoreDictionary::generation_t; + + EnumStoreBase& _enumStore; + +public: + EnumStoreDictionary(EnumStoreBase& enumStore); + + ~EnumStoreDictionary() override; + + const DictionaryT &getDictionary() const { return this->_dict; } + DictionaryT &getDictionary() { return this->_dict; } + + uint32_t getNumUniques() const override; + void writeAllValues(BufferWriter& writer, btree::BTreeNode::Ref rootRef) const override; + ssize_t deserialize(const void* src, size_t available, IndexVector& idx) override; + void fixupRefCounts(const EnumVector& hist) override; + + void removeUnusedEnums(const IndexSet& unused, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp); + + void freeUnusedEnums(const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) override; + + void freeUnusedEnums(const IndexSet& toRemove, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) override; + + bool findIndex(const datastore::EntryComparator& cmp, Index& idx) const override; + bool findFrozenIndex(const datastore::EntryComparator& cmp, Index& idx) const override; + std::vector<attribute::IAttributeVector::EnumHandle> + findMatchingEnums(const datastore::EntryComparator& cmp) const override; + + void onReset() override; + btree::BTreeNode::Ref getFrozenRootRef() const override { return this->get_frozen_root(); } + + EnumPostingTree & getPostingDictionary() override; + const EnumPostingTree & getPostingDictionary() const override; + + bool hasData() const override; +}; + +extern template +class btree::BTreeNodeT<EnumStoreIndex, EnumTreeTraits::INTERNAL_SLOTS>; + +extern template +class btree::BTreeNodeTT<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; + +extern template +class btree::BTreeNodeTT<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeInternalNode<EnumStoreIndex, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; + +extern template +class btree::BTreeLeafNode<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeLeafNode<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeLeafNodeTemp<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeLeafNodeTemp<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeNodeStore<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeNodeStore<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeRoot<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeRoot<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeRootT<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeRootT<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeRootBase<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeRootBase<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeNodeAllocator<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeNodeAllocator<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + + +extern template +class btree::BTreeIteratorBase<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; +extern template +class btree::BTreeIteratorBase<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; + +extern template class btree::BTreeConstIterator<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template class btree::BTreeConstIterator<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeIterator<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; +extern template +class btree::BTreeIterator<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTree<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; +extern template +class btree::BTree<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp index a1c7a343ac8..75cd504faad 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp @@ -24,7 +24,7 @@ EnumAttributeSaver::writeUdat(IAttributeSaveTarget &saveTarget) if (saveTarget.getEnumerated()) { std::unique_ptr<BufferWriter> udatWriter(saveTarget.udatWriter().allocBufferWriter()); - const EnumStoreDictBase &enumDict = _enumStore.getEnumStoreDict(); + const auto& enumDict = _enumStore.getEnumStoreDict(); enumDict.writeAllValues(*udatWriter, _enumerator.get_frozen_root()); udatWriter->flush(); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.h b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h index a9e76082a34..66dd9dd5e6c 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumcomparator.h +++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h @@ -3,6 +3,7 @@ #pragma once #include "enumstore.h" +#include <vespa/vespalib/datastore/entry_comparator.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp index 4fec660468b..f221ac858bd 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp @@ -10,7 +10,7 @@ using btree::BTreeNode; using fef::TermFieldMatchData; EnumHintSearchContext:: -EnumHintSearchContext(const EnumStoreDictBase &dictionary, +EnumHintSearchContext(const IEnumStoreDictionary &dictionary, uint32_t docIdLimit, uint64_t numValues) : _dict_snapshot(dictionary.get_read_snapshot()), diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h index 1844c228014..6f766bec386 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h @@ -15,13 +15,13 @@ namespace search::attribute { class EnumHintSearchContext : public IPostingListSearchContext { - const EnumStoreDictBase::ReadSnapshot::UP _dict_snapshot; + const IEnumStoreDictionary::ReadSnapshot::UP _dict_snapshot; uint32_t _uniqueValues; uint32_t _docIdLimit; uint64_t _numValues; // attr.getStatus().getNumValues(); protected: - EnumHintSearchContext(const EnumStoreDictBase &dictionary, + EnumHintSearchContext(const IEnumStoreDictionary &dictionary, uint32_t docIdLimit, uint64_t numValues); ~EnumHintSearchContext(); diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 7cf9ada8064..56bf257b046 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -2,6 +2,7 @@ #pragma once +#include "enum_store_dictionary.h" #include "enumstorebase.h" #include <vespa/searchlib/util/foldedstringcompare.h> #include <vespa/vespalib/btree/btreenode.h> diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index b826f1ca088..4627c7bc03e 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -274,9 +274,9 @@ void EnumStoreT<EntryType>::addEnum(Type value, Index & newIdx) { if (_enumDict->hasData()) { - addEnum(value, newIdx, static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->getDictionary()); + addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary()); } else { - addEnum(value, newIdx, static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->getDictionary()); + addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary()); } } @@ -339,9 +339,9 @@ void EnumStoreT<EntryType>::reset(Builder &builder) { if (_enumDict->hasData()) { - reset(builder, static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->getDictionary()); + reset(builder, static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary()); } else { - reset(builder, static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->getDictionary()); + reset(builder, static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary()); } } @@ -405,9 +405,9 @@ EnumStoreT<EntryType>::performCompaction(uint64_t bytesNeeded, EnumIndexMap & ol return false; } if (_enumDict->hasData()) { - performCompaction(static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->getDictionary(), old2New); + performCompaction(static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary(), old2New); } else { - performCompaction(static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->getDictionary(), old2New); + performCompaction(static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary(), old2New); } return true; } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp index f54f7a26941..71b55689048 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp @@ -2,6 +2,7 @@ #include "enumstorebase.h" #include "enumstore.h" +#include "enum_store_dictionary.h" #include <vespa/vespalib/btree/btree.hpp> #include <vespa/vespalib/btree/btreeiterator.hpp> #include <vespa/vespalib/btree/btreenode.hpp> @@ -74,9 +75,9 @@ EnumStoreBase::EnumStoreBase(uint64_t initBufferSize, bool hasPostings) _toHoldBuffers() { if (hasPostings) - _enumDict = new EnumStoreDict<EnumPostingTree>(*this); + _enumDict = new EnumStoreDictionary<EnumPostingTree>(*this); else - _enumDict = new EnumStoreDict<EnumTree>(*this); + _enumDict = new EnumStoreDictionary<EnumTree>(*this); _store.addType(&_type); _type.setSizeNeededAndDead(initBufferSize, 0); _store.initActiveBuffers(); @@ -249,220 +250,7 @@ vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStore } -EnumStoreDictBase::EnumStoreDictBase() - : datastore::UniqueStoreDictionaryBase() -{ -} - -EnumStoreDictBase::~EnumStoreDictBase() = default; - -template <typename Dictionary> -EnumStoreDict<Dictionary>::EnumStoreDict(EnumStoreBase &enumStore) - : ParentUniqueStoreDictionary(), - _enumStore(enumStore) -{ -} - -template <typename Dictionary> -EnumStoreDict<Dictionary>::~EnumStoreDict() = default; - -template <typename Dictionary> -uint32_t -EnumStoreDict<Dictionary>::getNumUniques() const -{ - return this->_dict.size(); -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>:: -writeAllValues(BufferWriter &writer, - btree::BTreeNode::Ref rootRef) const -{ - constexpr size_t BATCHSIZE = 1000; - std::vector<Index> idxs; - idxs.reserve(BATCHSIZE); - typename Dictionary::Iterator it(rootRef, this->_dict.getAllocator()); - while (it.valid()) { - if (idxs.size() >= idxs.capacity()) { - _enumStore.writeValues(writer, &idxs[0], idxs.size()); - idxs.clear(); - } - idxs.push_back(it.getKey()); - ++it; - } - if (!idxs.empty()) { - _enumStore.writeValues(writer, &idxs[0], idxs.size()); - } -} - -template <typename Dictionary> -ssize_t -EnumStoreDict<Dictionary>::deserialize(const void *src, - size_t available, - IndexVector &idx) -{ - return _enumStore.deserialize(src, available, idx, this->_dict); -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::fixupRefCounts(const EnumVector & hist) -{ - _enumStore.fixupRefCounts(hist, this->_dict); -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::removeUnusedEnums(const IndexSet &unused, - const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp) -{ - typedef typename Dictionary::Iterator Iterator; - if (unused.empty()) - return; - Iterator it(BTreeNode::Ref(), this->_dict.getAllocator()); - for (const auto& idx : unused) { - it.lower_bound(this->_dict.getRoot(), idx, cmp); - assert(it.valid() && !cmp(idx, it.getKey())); - if (Iterator::hasData() && fcmp != nullptr) { - typename Dictionary::DataType pidx(it.getData()); - this->_dict.remove(it); - if (!it.valid() || (*fcmp)(idx, it.getKey())) { - continue; // Next entry does not use same posting list - } - --it; - if (it.valid() && !(*fcmp)(it.getKey(), idx)) { - continue; // Previous entry uses same posting list - } - if (it.valid()) { - ++it; - } else { - it.begin(); - } - this->_dict.thaw(it); - it.writeData(pidx); - } else { - this->_dict.remove(it); - } - } -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::freeUnusedEnums(const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp) -{ - IndexSet unused; - - // find unused enums - for (auto iter = this->_dict.begin(); iter.valid(); ++iter) { - _enumStore.freeUnusedEnum(iter.getKey(), unused); - } - removeUnusedEnums(unused, cmp, fcmp); -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::freeUnusedEnums(const IndexSet& toRemove, - const datastore::EntryComparator& cmp, - const datastore::EntryComparator* fcmp) -{ - IndexSet unused; - for (const auto& index : toRemove) { - _enumStore.freeUnusedEnum(index, unused); - } - removeUnusedEnums(unused, cmp, fcmp); -} - -template <typename Dictionary> -bool -EnumStoreDict<Dictionary>::findIndex(const datastore::EntryComparator &cmp, - Index &idx) const -{ - auto itr = this->_dict.find(Index(), cmp); - if (!itr.valid()) { - return false; - } - idx = itr.getKey(); - return true; -} - -template <typename Dictionary> -bool -EnumStoreDict<Dictionary>::findFrozenIndex(const datastore::EntryComparator &cmp, - Index &idx) const -{ - auto itr = this->_dict.getFrozenView().find(Index(), cmp); - if (!itr.valid()) { - return false; - } - idx = itr.getKey(); - return true; -} - -template <typename Dictionary> -std::vector<EnumStoreBase::EnumHandle> -EnumStoreDict<Dictionary>::findMatchingEnums(const datastore::EntryComparator &cmp) const -{ - std::vector<EnumStoreBase::EnumHandle> result; - auto itr = this->_dict.getFrozenView().find(Index(), cmp); - while (itr.valid() && !cmp(Index(), itr.getKey())) { - result.push_back(itr.getKey().ref()); - ++itr; - } - return result; -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::onReset() -{ - this->_dict.clear(); -} - - -template <> -EnumPostingTree & -EnumStoreDict<EnumTree>::getPostingDictionary() -{ - LOG_ABORT("should not be reached"); -} - - -template <> -EnumPostingTree & -EnumStoreDict<EnumPostingTree>::getPostingDictionary() -{ - return _dict; -} - - -template <> -const EnumPostingTree & -EnumStoreDict<EnumTree>::getPostingDictionary() const -{ - LOG_ABORT("should not be reached"); -} - - -template <> -const EnumPostingTree & -EnumStoreDict<EnumPostingTree>::getPostingDictionary() const -{ - return _dict; -} - - -template <typename Dictionary> -bool -EnumStoreDict<Dictionary>::hasData() const -{ - return Dictionary::LeafNodeType::hasData(); -} - - -template class datastore::DataStoreT<datastore::AlignedEntryRefT<31, 4> >; +template class datastore::DataStoreT<EnumStoreIndex>; template ssize_t @@ -480,103 +268,6 @@ template void EnumStoreBase::fixupRefCounts<EnumPostingTree>(const EnumVector &hist, EnumPostingTree &tree); -template class EnumStoreDict<EnumTree>; - -template class EnumStoreDict<EnumPostingTree>; - -template -class btree::BTreeNodeT<EnumStoreBase::Index, EnumTreeTraits::INTERNAL_SLOTS>; - -template -class btree::BTreeNodeTT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; - -template -class btree::BTreeNodeTT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeInternalNode<EnumStoreBase::Index, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; - -template -class btree::BTreeLeafNode<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeLeafNode<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeNodeStore<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeNodeStore<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - - -template -class btree::BTreeRoot<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeRoot<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeRootT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeRootT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeRootBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeRootBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeNodeAllocator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeNodeAllocator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeIteratorBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; -template -class btree::BTreeIteratorBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; - -template class btree::BTreeConstIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template class btree::BTreeConstIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; -template -class btree::BTreeIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTree<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; -template -class btree::BTree<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - - } namespace vespalib { diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h index 8d9ff28669d..32e7e5134ca 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h @@ -2,12 +2,10 @@ #pragma once +#include "enum_store_dictionary.h" #include <vespa/searchcommon/attribute/iattributevector.h> -#include <vespa/vespalib/btree/btree.h> #include <vespa/vespalib/datastore/datastore.h> -#include <vespa/vespalib/datastore/entry_comparator_wrapper.h> #include <vespa/vespalib/datastore/entryref.h> -#include <vespa/vespalib/datastore/unique_store_dictionary.h> #include <vespa/vespalib/stllike/hash_map.h> #include <vespa/vespalib/util/address_space.h> #include <vespa/vespalib/util/array.h> @@ -22,127 +20,13 @@ class BufferWriter; namespace attribute { class Status; } -class EnumStoreBase; - using EnumStoreComparator = datastore::EntryComparator; -using EnumStoreDataStoreType = datastore::DataStoreT<datastore::AlignedEntryRefT<31, 4> >; -using EnumStoreIndex = EnumStoreDataStoreType::RefType; -using EnumStoreIndexVector = vespalib::Array<EnumStoreIndex>; -using EnumStoreEnumVector = vespalib::Array<uint32_t>; - -using EnumTreeTraits = btree::BTreeTraits<16, 16, 10, true>; - -using EnumTree = btree::BTree<EnumStoreIndex, btree::BTreeNoLeafData, - btree::NoAggregated, - const datastore::EntryComparatorWrapper, - EnumTreeTraits>; - -using EnumPostingTree = btree::BTree<EnumStoreIndex, datastore::EntryRef, - btree::NoAggregated, - const datastore::EntryComparatorWrapper, - EnumTreeTraits>; - -struct CompareEnumIndex -{ - using Index = EnumStoreIndex; - - bool operator()(const Index &lhs, const Index &rhs) const { - return lhs.ref() < rhs.ref(); - } -}; - -class EnumStoreDictBase : public datastore::UniqueStoreDictionaryBase { -public: - using EnumVector = EnumStoreEnumVector; - using Index = EnumStoreIndex; - using IndexSet = std::set<Index, CompareEnumIndex>; - using IndexVector = EnumStoreIndexVector; - using generation_t = vespalib::GenerationHandler::generation_t; - -public: - EnumStoreDictBase(); - virtual ~EnumStoreDictBase(); - - virtual uint32_t getNumUniques() const = 0; - virtual void writeAllValues(BufferWriter &writer, btree::BTreeNode::Ref rootRef) const = 0; - virtual ssize_t deserialize(const void *src, size_t available, IndexVector &idx) = 0; - - virtual void fixupRefCounts(const EnumVector &hist) = 0; - virtual void freeUnusedEnums(const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp) = 0; - virtual void freeUnusedEnums(const IndexSet& toRemove, - const datastore::EntryComparator& cmp, - const datastore::EntryComparator* fcmp) = 0; - virtual bool findIndex(const datastore::EntryComparator &cmp, Index &idx) const = 0; - virtual bool findFrozenIndex(const datastore::EntryComparator &cmp, Index &idx) const = 0; - virtual std::vector<attribute::IAttributeVector::EnumHandle> - findMatchingEnums(const datastore::EntryComparator &cmp) const = 0; - - virtual void onReset() = 0; - virtual btree::BTreeNode::Ref getFrozenRootRef() const = 0; - - virtual EnumPostingTree &getPostingDictionary() = 0; - virtual const EnumPostingTree &getPostingDictionary() const = 0; - virtual bool hasData() const = 0; -}; - - -template <typename Dictionary> -class EnumStoreDict : public datastore::UniqueStoreDictionary<Dictionary, EnumStoreDictBase> -{ -private: - using EnumVector = EnumStoreDictBase::EnumVector; - using Index = EnumStoreDictBase::Index; - using IndexSet = EnumStoreDictBase::IndexSet; - using IndexVector = EnumStoreDictBase::IndexVector; - using ParentUniqueStoreDictionary = datastore::UniqueStoreDictionary<Dictionary, EnumStoreDictBase>; - using generation_t = EnumStoreDictBase::generation_t; - - EnumStoreBase& _enumStore; - -public: - EnumStoreDict(EnumStoreBase &enumStore); - - ~EnumStoreDict() override; - - const Dictionary &getDictionary() const { return this->_dict; } - Dictionary &getDictionary() { return this->_dict; } - - uint32_t getNumUniques() const override; - void writeAllValues(BufferWriter &writer, btree::BTreeNode::Ref rootRef) const override; - ssize_t deserialize(const void *src, size_t available, IndexVector &idx) override; - void fixupRefCounts(const EnumVector &hist) override; - - void removeUnusedEnums(const IndexSet &unused, - const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp); - - void freeUnusedEnums(const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp) override; - - void freeUnusedEnums(const IndexSet& toRemove, - const datastore::EntryComparator& cmp, - const datastore::EntryComparator* fcmp) override; - - bool findIndex(const datastore::EntryComparator &cmp, Index &idx) const override; - bool findFrozenIndex(const datastore::EntryComparator &cmp, Index &idx) const override; - std::vector<attribute::IAttributeVector::EnumHandle> - findMatchingEnums(const datastore::EntryComparator &cmp) const override; - - void onReset() override; - btree::BTreeNode::Ref getFrozenRootRef() const override { return this->get_frozen_root(); } - - EnumPostingTree & getPostingDictionary() override; - const EnumPostingTree & getPostingDictionary() const override; - - bool hasData() const override; -}; class EnumStoreBase { public: - using DataStoreType = EnumStoreDataStoreType; + using DataStoreType = datastore::DataStoreT<EnumStoreIndex>; using EnumHandle = attribute::IAttributeVector::EnumHandle; using EnumVector = EnumStoreEnumVector; using Index = EnumStoreIndex; @@ -211,7 +95,7 @@ protected: void clearPendingCompact() { _pendingCompact = false; } }; - EnumStoreDictBase *_enumDict; + IEnumStoreDictionary *_enumDict; DataStoreType _store; EnumBufferType _type; std::vector<uint32_t> _toHoldBuffers; // used during compaction @@ -304,8 +188,8 @@ public: virtual bool performCompaction(uint64_t bytesNeeded, EnumIndexMap & old2New) = 0; - EnumStoreDictBase &getEnumStoreDict() { return *_enumDict; } - const EnumStoreDictBase &getEnumStoreDict() const { return *_enumDict; } + IEnumStoreDictionary &getEnumStoreDict() { return *_enumDict; } + const IEnumStoreDictionary &getEnumStoreDict() const { return *_enumDict; } EnumPostingTree &getPostingDictionary() { return _enumDict->getPostingDictionary(); } const EnumPostingTree &getPostingDictionary() const { @@ -316,100 +200,8 @@ public: vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStoreBase::Index & idx); - extern template -class datastore::DataStoreT<datastore::AlignedEntryRefT<31, 4> >; +class datastore::DataStoreT<EnumStoreIndex>; -extern template -class btree::BTreeNodeT<EnumStoreBase::Index, EnumTreeTraits::INTERNAL_SLOTS>; - -extern template -class btree::BTreeNodeTT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; - -extern template -class btree::BTreeNodeTT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeInternalNode<EnumStoreBase::Index, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; - -extern template -class btree::BTreeLeafNode<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeLeafNode<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeNodeStore<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeNodeStore<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeRoot<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeRoot<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeRootT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeRootT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeRootBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeRootBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeNodeAllocator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeNodeAllocator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - - -extern template -class btree::BTreeIteratorBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; -extern template -class btree::BTreeIteratorBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; - -extern template class btree::BTreeConstIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template class btree::BTreeConstIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; -extern template -class btree::BTreeIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTree<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; -extern template -class btree::BTree<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; } diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h new file mode 100644 index 00000000000..3c324a30779 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h @@ -0,0 +1,75 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchcommon/attribute/iattributevector.h> +#include <vespa/vespalib/datastore/entry_comparator_wrapper.h> +#include <vespa/vespalib/datastore/unique_store_dictionary.h> +#include <set> + +namespace search { + +class BufferWriter; + +using EnumStoreIndex = datastore::AlignedEntryRefT<31, 4>; +using EnumStoreIndexVector = vespalib::Array<EnumStoreIndex>; +using EnumStoreEnumVector = vespalib::Array<uint32_t>; + +using EnumTreeTraits = btree::BTreeTraits<16, 16, 10, true>; + +using EnumTree = btree::BTree<EnumStoreIndex, btree::BTreeNoLeafData, + btree::NoAggregated, + const datastore::EntryComparatorWrapper, + EnumTreeTraits>; + +using EnumPostingTree = btree::BTree<EnumStoreIndex, datastore::EntryRef, + btree::NoAggregated, + const datastore::EntryComparatorWrapper, + EnumTreeTraits>; + +struct CompareEnumIndex { + using Index = EnumStoreIndex; + + bool operator()(const Index &lhs, const Index &rhs) const { + return lhs.ref() < rhs.ref(); + } +}; + +/** + * Interface for the dictionary used by an enum store. + */ +class IEnumStoreDictionary : public datastore::IUniqueStoreDictionary { +public: + using EnumVector = EnumStoreEnumVector; + using Index = EnumStoreIndex; + using IndexSet = std::set<Index, CompareEnumIndex>; + using IndexVector = EnumStoreIndexVector; + using generation_t = vespalib::GenerationHandler::generation_t; + +public: + virtual ~IEnumStoreDictionary() = default; + + virtual uint32_t getNumUniques() const = 0; + virtual void writeAllValues(BufferWriter& writer, btree::BTreeNode::Ref rootRef) const = 0; + virtual ssize_t deserialize(const void* src, size_t available, IndexVector& idx) = 0; + + virtual void fixupRefCounts(const EnumVector& hist) = 0; + virtual void freeUnusedEnums(const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) = 0; + virtual void freeUnusedEnums(const IndexSet& toRemove, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) = 0; + virtual bool findIndex(const datastore::EntryComparator& cmp, Index& idx) const = 0; + virtual bool findFrozenIndex(const datastore::EntryComparator& cmp, Index& idx) const = 0; + virtual std::vector<attribute::IAttributeVector::EnumHandle> + findMatchingEnums(const datastore::EntryComparator& cmp) const = 0; + + virtual void onReset() = 0; + virtual btree::BTreeNode::Ref getFrozenRootRef() const = 0; + + virtual EnumPostingTree& getPostingDictionary() = 0; + virtual const EnumPostingTree& getPostingDictionary() const = 0; + virtual bool hasData() const = 0; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h b/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h index 4652d26f7a3..4876f43cd5d 100644 --- a/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h +++ b/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h @@ -2,6 +2,8 @@ #pragma once +#include <vespa/searchcommon/attribute/iattributevector.h> + namespace vespalib { class MemoryUsage; } namespace search::attribute { diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h index 539b362534b..b9346daefa2 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h @@ -2,16 +2,17 @@ #pragma once +#include "dociditerator.h" +#include "ipostinglistattributebase.h" +#include "postingchange.h" +#include "postinglistsearchcontext.h" +#include <vespa/searchlib/attribute/enumattribute.h> #include <vespa/searchlib/attribute/numericbase.h> #include <vespa/searchlib/attribute/stringbase.h> -#include <vespa/searchlib/attribute/enumattribute.h> #include <vespa/searchlib/queryeval/searchiterator.h> -#include <vespa/vespalib/datastore/entryref.h> #include <vespa/vespalib/btree/btreestore.h> -#include "dociditerator.h" -#include "postinglistsearchcontext.h" -#include "postingchange.h" -#include "ipostinglistattributebase.h" +#include <vespa/vespalib/datastore/entry_comparator.h> +#include <vespa/vespalib/datastore/entryref.h> namespace search { @@ -19,9 +20,9 @@ class EnumPostingPair { private: EnumStoreBase::Index _idx; - const EnumStoreComparator *_cmp; + const datastore::EntryComparator *_cmp; public: - EnumPostingPair(EnumStoreBase::Index idx, const EnumStoreComparator *cmp) + EnumPostingPair(EnumStoreBase::Index idx, const datastore::EntryComparator *cmp) : _idx(idx), _cmp(cmp) { } diff --git a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp index 8b963dbf007..1ec89aea69e 100644 --- a/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp +++ b/vespalib/src/tests/datastore/unique_store_dictionary/unique_store_dictionary_test.cpp @@ -32,7 +32,7 @@ public: struct DictionaryReadTest : public ::testing::Test { DefaultUniqueStoreDictionary dict; - UniqueStoreDictionaryBase::ReadSnapshot::UP snapshot; + IUniqueStoreDictionary::ReadSnapshot::UP snapshot; DictionaryReadTest() : dict(), diff --git a/vespalib/src/vespa/vespalib/datastore/entry_comparator.h b/vespalib/src/vespa/vespalib/datastore/entry_comparator.h index a1b95d656d5..a598ca195ae 100644 --- a/vespalib/src/vespa/vespalib/datastore/entry_comparator.h +++ b/vespalib/src/vespa/vespalib/datastore/entry_comparator.h @@ -2,9 +2,9 @@ #pragma once -namespace search::datastore { +#include "entryref.h" -class EntryRef; +namespace search::datastore { /* * Compare two entries based on entry refs. Valid entry ref is mapped diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary_base.h b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h index e09fdc6093c..cda62884318 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary_base.h +++ b/vespalib/src/vespa/vespalib/datastore/i_unique_store_dictionary.h @@ -15,7 +15,7 @@ class UniqueStoreAddResult; /** * Interface class for unique store dictionary. */ -class UniqueStoreDictionaryBase { +class IUniqueStoreDictionary { public: /** * Class that provides a read snapshot of the dictionary. @@ -34,7 +34,7 @@ public: }; using generation_t = vespalib::GenerationHandler::generation_t; - virtual ~UniqueStoreDictionaryBase() = default; + virtual ~IUniqueStoreDictionary() = default; virtual void freeze() = 0; virtual void transfer_hold_lists(generation_t generation) = 0; virtual void trim_hold_lists(generation_t firstUsed) = 0; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.h b/vespalib/src/vespa/vespalib/datastore/unique_store.h index bbf5f9c90a4..bf7808e9325 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.h @@ -5,14 +5,14 @@ #include "buffer_type.h" #include "bufferstate.h" #include "datastore.h" -#include "entryref.h" #include "entry_comparator_wrapper.h" +#include "entryref.h" +#include "i_compaction_context.h" +#include "i_unique_store_dictionary.h" #include "unique_store_add_result.h" -#include "unique_store_entry.h" #include "unique_store_allocator.h" #include "unique_store_comparator.h" -#include "unique_store_dictionary_base.h" -#include "i_compaction_context.h" +#include "unique_store_entry.h" namespace search::datastore { @@ -39,7 +39,7 @@ public: private: Allocator _allocator; DataStoreType &_store; - std::unique_ptr<UniqueStoreDictionaryBase> _dict; + std::unique_ptr<IUniqueStoreDictionary> _dict; using generation_t = vespalib::GenerationHandler::generation_t; public: diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp index 1ebdd65d87a..f1b60845403 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store.hpp @@ -82,7 +82,7 @@ private: EntryComparatorWrapper, DictionaryTraits>; DataStoreBase &_dataStore; - UniqueStoreDictionaryBase &_dict; + IUniqueStoreDictionary &_dict; ICompactable &_store; std::vector<uint32_t> _bufferIdsToCompact; std::vector<std::vector<EntryRef>> _mapping; @@ -121,7 +121,7 @@ private: public: CompactionContext(DataStoreBase &dataStore, - UniqueStoreDictionaryBase &dict, + IUniqueStoreDictionary &dict, ICompactable &store, std::vector<uint32_t> bufferIdsToCompact) : ICompactionContext(), diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h b/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h index 58ecf61eb82..a0e9f3d63a7 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_builder.h @@ -6,7 +6,7 @@ namespace search::datastore { -class UniqueStoreDictionaryBase; +class IUniqueStoreDictionary; /** * Builder for related UniqueStore class. @@ -19,12 +19,12 @@ template <typename Allocator> class UniqueStoreBuilder { using EntryType = typename Allocator::EntryType; - Allocator &_allocator; - UniqueStoreDictionaryBase &_dict; + Allocator& _allocator; + IUniqueStoreDictionary& _dict; std::vector<EntryRef> _refs; std::vector<uint32_t> _refCounts; public: - UniqueStoreBuilder(Allocator& allocator, UniqueStoreDictionaryBase& dict, uint32_t uniqueValuesHint); + UniqueStoreBuilder(Allocator& allocator, IUniqueStoreDictionary& dict, uint32_t uniqueValuesHint); ~UniqueStoreBuilder(); void setupRefCounts(); void makeDictionary(); diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_builder.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_builder.hpp index 030c8b6d6ba..7ea61f4e0ea 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_builder.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_builder.hpp @@ -3,13 +3,13 @@ #pragma once #include "unique_store_builder.h" -#include "unique_store_dictionary_base.h" +#include "i_unique_store_dictionary.h" #include "datastore.hpp" namespace search::datastore { template <typename Allocator> -UniqueStoreBuilder<Allocator>::UniqueStoreBuilder(Allocator& allocator, UniqueStoreDictionaryBase& dict, uint32_t uniqueValuesHint) +UniqueStoreBuilder<Allocator>::UniqueStoreBuilder(Allocator& allocator, IUniqueStoreDictionary& dict, uint32_t uniqueValuesHint) : _allocator(allocator), _dict(dict), _refs(), diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h index cd13e88c77e..4ae32c45dea 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_dictionary.h @@ -1,7 +1,7 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/btree/btree.h> -#include "unique_store_dictionary_base.h" +#include "i_unique_store_dictionary.h" #pragma once @@ -12,7 +12,7 @@ class EntryComparatorWrapper; /** * A dictionary for unique store. Mostly accessed via base class. */ -template <typename DictionaryT, typename ParentT = UniqueStoreDictionaryBase> +template <typename DictionaryT, typename ParentT = IUniqueStoreDictionary> class UniqueStoreDictionary : public ParentT { protected: using DictionaryType = DictionaryT; diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h index 3f260bfbc15..40cc295e76d 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.h @@ -2,7 +2,7 @@ #pragma once -#include "unique_store_dictionary_base.h" +#include "i_unique_store_dictionary.h" namespace search::datastore { @@ -19,12 +19,12 @@ public: using EnumValues = std::vector<std::vector<uint32_t>>; private: - UniqueStoreDictionaryBase::ReadSnapshot::UP _dict_snapshot; + IUniqueStoreDictionary::ReadSnapshot::UP _dict_snapshot; const DataStoreBase &_store; EnumValues _enumValues; uint32_t _next_enum_val; public: - UniqueStoreEnumerator(const UniqueStoreDictionaryBase &dict, const DataStoreBase &store); + UniqueStoreEnumerator(const IUniqueStoreDictionary &dict, const DataStoreBase &store); ~UniqueStoreEnumerator(); EntryRef get_frozen_root() const { return _dict_snapshot->get_frozen_root(); } void enumerateValue(EntryRef ref); diff --git a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp index 10ca0944519..8867a40228a 100644 --- a/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp +++ b/vespalib/src/vespa/vespalib/datastore/unique_store_enumerator.hpp @@ -7,7 +7,7 @@ namespace search::datastore { template <typename RefT> -UniqueStoreEnumerator<RefT>::UniqueStoreEnumerator(const UniqueStoreDictionaryBase &dict, const DataStoreBase &store) +UniqueStoreEnumerator<RefT>::UniqueStoreEnumerator(const IUniqueStoreDictionary &dict, const DataStoreBase &store) : _dict_snapshot(dict.get_read_snapshot()), _store(store), _enumValues(), |