diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-08-27 14:14:33 +0000 |
---|---|---|
committer | Geir Storli <geirst@verizonmedia.com> | 2019-08-27 14:17:47 +0000 |
commit | 217b71bbc8c48544d01bcc5aab60c1859f4e03c0 (patch) | |
tree | 304d63fbaa2ef44b249e76b17d874d441f79bf58 /searchlib | |
parent | 8d0aa26af1e26bd9a108af6c3f8ec5a83457ba69 (diff) |
Rename enum store dictionary api and implementation and move to separate files.
Diffstat (limited to 'searchlib')
15 files changed, 589 insertions, 549 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp index 6fc90786c7d..29c35f11e65 100644 --- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -361,10 +361,8 @@ void EnumStoreTest::testUniques (const EnumStoreType &ses, const std::vector<std::string> &unique) { - const EnumStoreDict<Dictionary> *enumDict = - dynamic_cast<const EnumStoreDict<Dictionary> *> - (&ses.getEnumStoreDict()); - assert(enumDict != NULL); + const auto* enumDict = dynamic_cast<const EnumStoreDictionary<Dictionary> *>(&ses.getEnumStoreDict()); + assert(enumDict != nullptr); const Dictionary &dict = enumDict->getDictionary(); uint32_t i = 0; EnumIndex idx; diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index 41132bddc64..ea33a4d552c 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -39,6 +39,7 @@ vespa_add_library(searchlib_attribute OBJECT enumattributesaver.cpp enumcomparator.cpp enumhintsearchcontext.cpp + enum_store_dictionary.cpp enumstore.cpp enumstorebase.cpp extendableattributes.cpp diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp new file mode 100644 index 00000000000..9222b9340ca --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp @@ -0,0 +1,320 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "enum_store_dictionary.h" +#include "enumstore.h" +#include "enumstorebase.h" +#include <vespa/vespalib/btree/btree.hpp> +#include <vespa/vespalib/btree/btreeiterator.hpp> +#include <vespa/vespalib/btree/btreenode.hpp> +#include <vespa/vespalib/btree/btreenodeallocator.hpp> +#include <vespa/vespalib/btree/btreeroot.hpp> +#include <vespa/vespalib/datastore/datastore.hpp> +#include <vespa/vespalib/datastore/unique_store_dictionary.hpp> +#include <vespa/vespalib/util/bufferwriter.h> + +#include <vespa/log/log.h> +LOG_SETUP(".searchlib.attribute.enum_store_dictionary"); + +namespace search { + +using btree::BTreeNode; + +template <typename DictionaryT> +EnumStoreDictionary<DictionaryT>::EnumStoreDictionary(EnumStoreBase& enumStore) + : ParentUniqueStoreDictionary(), + _enumStore(enumStore) +{ +} + +template <typename DictionaryT> +EnumStoreDictionary<DictionaryT>::~EnumStoreDictionary() = default; + +template <typename DictionaryT> +uint32_t +EnumStoreDictionary<DictionaryT>::getNumUniques() const +{ + return this->_dict.size(); +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::writeAllValues(BufferWriter& writer, + BTreeNode::Ref rootRef) const +{ + constexpr size_t BATCHSIZE = 1000; + std::vector<Index> idxs; + idxs.reserve(BATCHSIZE); + typename DictionaryT::Iterator it(rootRef, this->_dict.getAllocator()); + while (it.valid()) { + if (idxs.size() >= idxs.capacity()) { + _enumStore.writeValues(writer, &idxs[0], idxs.size()); + idxs.clear(); + } + idxs.push_back(it.getKey()); + ++it; + } + if (!idxs.empty()) { + _enumStore.writeValues(writer, &idxs[0], idxs.size()); + } +} + +template <typename DictionaryT> +ssize_t +EnumStoreDictionary<DictionaryT>::deserialize(const void* src, + size_t available, + IndexVector& idx) +{ + return _enumStore.deserialize(src, available, idx, this->_dict); +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::fixupRefCounts(const EnumVector& hist) +{ + _enumStore.fixupRefCounts(hist, this->_dict); +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::removeUnusedEnums(const IndexSet& unused, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) +{ + using Iterator = typename DictionaryT::Iterator; + if (unused.empty()) { + return; + } + Iterator it(BTreeNode::Ref(), this->_dict.getAllocator()); + for (const auto& idx : unused) { + it.lower_bound(this->_dict.getRoot(), idx, cmp); + assert(it.valid() && !cmp(idx, it.getKey())); + if (Iterator::hasData() && fcmp != nullptr) { + typename DictionaryT::DataType pidx(it.getData()); + this->_dict.remove(it); + if (!it.valid() || (*fcmp)(idx, it.getKey())) { + continue; // Next entry does not use same posting list + } + --it; + if (it.valid() && !(*fcmp)(it.getKey(), idx)) { + continue; // Previous entry uses same posting list + } + if (it.valid()) { + ++it; + } else { + it.begin(); + } + this->_dict.thaw(it); + it.writeData(pidx); + } else { + this->_dict.remove(it); + } + } +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::freeUnusedEnums(const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) +{ + IndexSet unused; + + // find unused enums + for (auto iter = this->_dict.begin(); iter.valid(); ++iter) { + _enumStore.freeUnusedEnum(iter.getKey(), unused); + } + removeUnusedEnums(unused, cmp, fcmp); +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::freeUnusedEnums(const IndexSet& toRemove, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) +{ + IndexSet unused; + for (const auto& index : toRemove) { + _enumStore.freeUnusedEnum(index, unused); + } + removeUnusedEnums(unused, cmp, fcmp); +} + +template <typename DictionaryT> +bool +EnumStoreDictionary<DictionaryT>::findIndex(const datastore::EntryComparator& cmp, + Index& idx) const +{ + auto itr = this->_dict.find(Index(), cmp); + if (!itr.valid()) { + return false; + } + idx = itr.getKey(); + return true; +} + +template <typename DictionaryT> +bool +EnumStoreDictionary<DictionaryT>::findFrozenIndex(const datastore::EntryComparator& cmp, + Index& idx) const +{ + auto itr = this->_dict.getFrozenView().find(Index(), cmp); + if (!itr.valid()) { + return false; + } + idx = itr.getKey(); + return true; +} + +template <typename DictionaryT> +std::vector<EnumStoreBase::EnumHandle> +EnumStoreDictionary<DictionaryT>::findMatchingEnums(const datastore::EntryComparator& cmp) const +{ + std::vector<EnumStoreBase::EnumHandle> result; + auto itr = this->_dict.getFrozenView().find(Index(), cmp); + while (itr.valid() && !cmp(Index(), itr.getKey())) { + result.push_back(itr.getKey().ref()); + ++itr; + } + return result; +} + +template <typename DictionaryT> +void +EnumStoreDictionary<DictionaryT>::onReset() +{ + this->_dict.clear(); +} + +template <> +EnumPostingTree & +EnumStoreDictionary<EnumTree>::getPostingDictionary() +{ + LOG_ABORT("should not be reached"); +} + +template <> +EnumPostingTree & +EnumStoreDictionary<EnumPostingTree>::getPostingDictionary() +{ + return _dict; +} + +template <> +const EnumPostingTree & +EnumStoreDictionary<EnumTree>::getPostingDictionary() const +{ + LOG_ABORT("should not be reached"); +} + +template <> +const EnumPostingTree & +EnumStoreDictionary<EnumPostingTree>::getPostingDictionary() const +{ + return _dict; +} + +template <typename DictionaryT> +bool +EnumStoreDictionary<DictionaryT>::hasData() const +{ + return DictionaryT::LeafNodeType::hasData(); +} + + +template class datastore::DataStoreT<datastore::AlignedEntryRefT<31, 4> >; + +template class EnumStoreDictionary<EnumTree>; + +template class EnumStoreDictionary<EnumPostingTree>; + +template +class btree::BTreeNodeT<EnumStoreBase::Index, EnumTreeTraits::INTERNAL_SLOTS>; + +template +class btree::BTreeNodeTT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; + +template +class btree::BTreeNodeTT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeInternalNode<EnumStoreBase::Index, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; + +template +class btree::BTreeLeafNode<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeLeafNode<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeNodeStore<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeNodeStore<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeRoot<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeRoot<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeRootT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeRootT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeRootBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeRootBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeNodeAllocator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeNodeAllocator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +template +class btree::BTreeIteratorBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; +template +class btree::BTreeIteratorBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; + +template class btree::BTreeConstIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template class btree::BTreeConstIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTreeIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; +template +class btree::BTreeIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +template +class btree::BTree<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; +template +class btree::BTree<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h new file mode 100644 index 00000000000..cd28f10a3cd --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h @@ -0,0 +1,158 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "i_enum_store_dictionary.h" +#include <vespa/vespalib/btree/btree.h> + +namespace search { + +class EnumStoreBase; + +/** + * Concrete dictionary for an enum store that extends the functionality of a unique store dictionary. + */ +template <typename DictionaryT> +class EnumStoreDictionary : public datastore::UniqueStoreDictionary<DictionaryT, IEnumStoreDictionary> { +private: + using EnumVector = IEnumStoreDictionary::EnumVector; + using Index = IEnumStoreDictionary::Index; + using IndexSet = IEnumStoreDictionary::IndexSet; + using IndexVector = IEnumStoreDictionary::IndexVector; + using ParentUniqueStoreDictionary = datastore::UniqueStoreDictionary<DictionaryT, IEnumStoreDictionary>; + using generation_t = IEnumStoreDictionary::generation_t; + + EnumStoreBase& _enumStore; + +public: + EnumStoreDictionary(EnumStoreBase& enumStore); + + ~EnumStoreDictionary() override; + + const DictionaryT &getDictionary() const { return this->_dict; } + DictionaryT &getDictionary() { return this->_dict; } + + uint32_t getNumUniques() const override; + void writeAllValues(BufferWriter& writer, btree::BTreeNode::Ref rootRef) const override; + ssize_t deserialize(const void* src, size_t available, IndexVector& idx) override; + void fixupRefCounts(const EnumVector& hist) override; + + void removeUnusedEnums(const IndexSet& unused, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp); + + void freeUnusedEnums(const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) override; + + void freeUnusedEnums(const IndexSet& toRemove, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) override; + + bool findIndex(const datastore::EntryComparator& cmp, Index& idx) const override; + bool findFrozenIndex(const datastore::EntryComparator& cmp, Index& idx) const override; + std::vector<attribute::IAttributeVector::EnumHandle> + findMatchingEnums(const datastore::EntryComparator& cmp) const override; + + void onReset() override; + btree::BTreeNode::Ref getFrozenRootRef() const override { return this->get_frozen_root(); } + + EnumPostingTree & getPostingDictionary() override; + const EnumPostingTree & getPostingDictionary() const override; + + bool hasData() const override; +}; + +extern template +class btree::BTreeNodeT<EnumStoreIndex, EnumTreeTraits::INTERNAL_SLOTS>; + +extern template +class btree::BTreeNodeTT<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; + +extern template +class btree::BTreeNodeTT<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeInternalNode<EnumStoreIndex, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; + +extern template +class btree::BTreeLeafNode<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeLeafNode<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeLeafNodeTemp<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeLeafNodeTemp<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeNodeStore<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeNodeStore<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeRoot<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeRoot<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeRootT<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeRootT<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeRootBase<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeRootBase<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeNodeAllocator<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + +extern template +class btree::BTreeNodeAllocator<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; + + +extern template +class btree::BTreeIteratorBase<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; +extern template +class btree::BTreeIteratorBase<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; + +extern template class btree::BTreeConstIterator<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template class btree::BTreeConstIterator<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTreeIterator<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; +extern template +class btree::BTreeIterator<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + +extern template +class btree::BTree<EnumStoreIndex, btree::BTreeNoLeafData, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; +extern template +class btree::BTree<EnumStoreIndex, datastore::EntryRef, btree::NoAggregated, + const datastore::EntryComparatorWrapper, EnumTreeTraits>; + + +} diff --git a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp index a1c7a343ac8..75cd504faad 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumattributesaver.cpp @@ -24,7 +24,7 @@ EnumAttributeSaver::writeUdat(IAttributeSaveTarget &saveTarget) if (saveTarget.getEnumerated()) { std::unique_ptr<BufferWriter> udatWriter(saveTarget.udatWriter().allocBufferWriter()); - const EnumStoreDictBase &enumDict = _enumStore.getEnumStoreDict(); + const auto& enumDict = _enumStore.getEnumStoreDict(); enumDict.writeAllValues(*udatWriter, _enumerator.get_frozen_root()); udatWriter->flush(); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.h b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h index a9e76082a34..66dd9dd5e6c 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumcomparator.h +++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h @@ -3,6 +3,7 @@ #pragma once #include "enumstore.h" +#include <vespa/vespalib/datastore/entry_comparator.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp index 4fec660468b..f221ac858bd 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.cpp @@ -10,7 +10,7 @@ using btree::BTreeNode; using fef::TermFieldMatchData; EnumHintSearchContext:: -EnumHintSearchContext(const EnumStoreDictBase &dictionary, +EnumHintSearchContext(const IEnumStoreDictionary &dictionary, uint32_t docIdLimit, uint64_t numValues) : _dict_snapshot(dictionary.get_read_snapshot()), diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h index 1844c228014..6f766bec386 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h @@ -15,13 +15,13 @@ namespace search::attribute { class EnumHintSearchContext : public IPostingListSearchContext { - const EnumStoreDictBase::ReadSnapshot::UP _dict_snapshot; + const IEnumStoreDictionary::ReadSnapshot::UP _dict_snapshot; uint32_t _uniqueValues; uint32_t _docIdLimit; uint64_t _numValues; // attr.getStatus().getNumValues(); protected: - EnumHintSearchContext(const EnumStoreDictBase &dictionary, + EnumHintSearchContext(const IEnumStoreDictionary &dictionary, uint32_t docIdLimit, uint64_t numValues); ~EnumHintSearchContext(); diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 7cf9ada8064..56bf257b046 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -2,6 +2,7 @@ #pragma once +#include "enum_store_dictionary.h" #include "enumstorebase.h" #include <vespa/searchlib/util/foldedstringcompare.h> #include <vespa/vespalib/btree/btreenode.h> diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index b826f1ca088..4627c7bc03e 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -274,9 +274,9 @@ void EnumStoreT<EntryType>::addEnum(Type value, Index & newIdx) { if (_enumDict->hasData()) { - addEnum(value, newIdx, static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->getDictionary()); + addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary()); } else { - addEnum(value, newIdx, static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->getDictionary()); + addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary()); } } @@ -339,9 +339,9 @@ void EnumStoreT<EntryType>::reset(Builder &builder) { if (_enumDict->hasData()) { - reset(builder, static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->getDictionary()); + reset(builder, static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary()); } else { - reset(builder, static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->getDictionary()); + reset(builder, static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary()); } } @@ -405,9 +405,9 @@ EnumStoreT<EntryType>::performCompaction(uint64_t bytesNeeded, EnumIndexMap & ol return false; } if (_enumDict->hasData()) { - performCompaction(static_cast<EnumStoreDict<EnumPostingTree> *>(_enumDict)->getDictionary(), old2New); + performCompaction(static_cast<EnumStoreDictionary<EnumPostingTree> *>(_enumDict)->getDictionary(), old2New); } else { - performCompaction(static_cast<EnumStoreDict<EnumTree> *>(_enumDict)->getDictionary(), old2New); + performCompaction(static_cast<EnumStoreDictionary<EnumTree> *>(_enumDict)->getDictionary(), old2New); } return true; } diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp index f54f7a26941..71b55689048 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.cpp @@ -2,6 +2,7 @@ #include "enumstorebase.h" #include "enumstore.h" +#include "enum_store_dictionary.h" #include <vespa/vespalib/btree/btree.hpp> #include <vespa/vespalib/btree/btreeiterator.hpp> #include <vespa/vespalib/btree/btreenode.hpp> @@ -74,9 +75,9 @@ EnumStoreBase::EnumStoreBase(uint64_t initBufferSize, bool hasPostings) _toHoldBuffers() { if (hasPostings) - _enumDict = new EnumStoreDict<EnumPostingTree>(*this); + _enumDict = new EnumStoreDictionary<EnumPostingTree>(*this); else - _enumDict = new EnumStoreDict<EnumTree>(*this); + _enumDict = new EnumStoreDictionary<EnumTree>(*this); _store.addType(&_type); _type.setSizeNeededAndDead(initBufferSize, 0); _store.initActiveBuffers(); @@ -249,220 +250,7 @@ vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStore } -EnumStoreDictBase::EnumStoreDictBase() - : datastore::UniqueStoreDictionaryBase() -{ -} - -EnumStoreDictBase::~EnumStoreDictBase() = default; - -template <typename Dictionary> -EnumStoreDict<Dictionary>::EnumStoreDict(EnumStoreBase &enumStore) - : ParentUniqueStoreDictionary(), - _enumStore(enumStore) -{ -} - -template <typename Dictionary> -EnumStoreDict<Dictionary>::~EnumStoreDict() = default; - -template <typename Dictionary> -uint32_t -EnumStoreDict<Dictionary>::getNumUniques() const -{ - return this->_dict.size(); -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>:: -writeAllValues(BufferWriter &writer, - btree::BTreeNode::Ref rootRef) const -{ - constexpr size_t BATCHSIZE = 1000; - std::vector<Index> idxs; - idxs.reserve(BATCHSIZE); - typename Dictionary::Iterator it(rootRef, this->_dict.getAllocator()); - while (it.valid()) { - if (idxs.size() >= idxs.capacity()) { - _enumStore.writeValues(writer, &idxs[0], idxs.size()); - idxs.clear(); - } - idxs.push_back(it.getKey()); - ++it; - } - if (!idxs.empty()) { - _enumStore.writeValues(writer, &idxs[0], idxs.size()); - } -} - -template <typename Dictionary> -ssize_t -EnumStoreDict<Dictionary>::deserialize(const void *src, - size_t available, - IndexVector &idx) -{ - return _enumStore.deserialize(src, available, idx, this->_dict); -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::fixupRefCounts(const EnumVector & hist) -{ - _enumStore.fixupRefCounts(hist, this->_dict); -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::removeUnusedEnums(const IndexSet &unused, - const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp) -{ - typedef typename Dictionary::Iterator Iterator; - if (unused.empty()) - return; - Iterator it(BTreeNode::Ref(), this->_dict.getAllocator()); - for (const auto& idx : unused) { - it.lower_bound(this->_dict.getRoot(), idx, cmp); - assert(it.valid() && !cmp(idx, it.getKey())); - if (Iterator::hasData() && fcmp != nullptr) { - typename Dictionary::DataType pidx(it.getData()); - this->_dict.remove(it); - if (!it.valid() || (*fcmp)(idx, it.getKey())) { - continue; // Next entry does not use same posting list - } - --it; - if (it.valid() && !(*fcmp)(it.getKey(), idx)) { - continue; // Previous entry uses same posting list - } - if (it.valid()) { - ++it; - } else { - it.begin(); - } - this->_dict.thaw(it); - it.writeData(pidx); - } else { - this->_dict.remove(it); - } - } -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::freeUnusedEnums(const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp) -{ - IndexSet unused; - - // find unused enums - for (auto iter = this->_dict.begin(); iter.valid(); ++iter) { - _enumStore.freeUnusedEnum(iter.getKey(), unused); - } - removeUnusedEnums(unused, cmp, fcmp); -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::freeUnusedEnums(const IndexSet& toRemove, - const datastore::EntryComparator& cmp, - const datastore::EntryComparator* fcmp) -{ - IndexSet unused; - for (const auto& index : toRemove) { - _enumStore.freeUnusedEnum(index, unused); - } - removeUnusedEnums(unused, cmp, fcmp); -} - -template <typename Dictionary> -bool -EnumStoreDict<Dictionary>::findIndex(const datastore::EntryComparator &cmp, - Index &idx) const -{ - auto itr = this->_dict.find(Index(), cmp); - if (!itr.valid()) { - return false; - } - idx = itr.getKey(); - return true; -} - -template <typename Dictionary> -bool -EnumStoreDict<Dictionary>::findFrozenIndex(const datastore::EntryComparator &cmp, - Index &idx) const -{ - auto itr = this->_dict.getFrozenView().find(Index(), cmp); - if (!itr.valid()) { - return false; - } - idx = itr.getKey(); - return true; -} - -template <typename Dictionary> -std::vector<EnumStoreBase::EnumHandle> -EnumStoreDict<Dictionary>::findMatchingEnums(const datastore::EntryComparator &cmp) const -{ - std::vector<EnumStoreBase::EnumHandle> result; - auto itr = this->_dict.getFrozenView().find(Index(), cmp); - while (itr.valid() && !cmp(Index(), itr.getKey())) { - result.push_back(itr.getKey().ref()); - ++itr; - } - return result; -} - -template <typename Dictionary> -void -EnumStoreDict<Dictionary>::onReset() -{ - this->_dict.clear(); -} - - -template <> -EnumPostingTree & -EnumStoreDict<EnumTree>::getPostingDictionary() -{ - LOG_ABORT("should not be reached"); -} - - -template <> -EnumPostingTree & -EnumStoreDict<EnumPostingTree>::getPostingDictionary() -{ - return _dict; -} - - -template <> -const EnumPostingTree & -EnumStoreDict<EnumTree>::getPostingDictionary() const -{ - LOG_ABORT("should not be reached"); -} - - -template <> -const EnumPostingTree & -EnumStoreDict<EnumPostingTree>::getPostingDictionary() const -{ - return _dict; -} - - -template <typename Dictionary> -bool -EnumStoreDict<Dictionary>::hasData() const -{ - return Dictionary::LeafNodeType::hasData(); -} - - -template class datastore::DataStoreT<datastore::AlignedEntryRefT<31, 4> >; +template class datastore::DataStoreT<EnumStoreIndex>; template ssize_t @@ -480,103 +268,6 @@ template void EnumStoreBase::fixupRefCounts<EnumPostingTree>(const EnumVector &hist, EnumPostingTree &tree); -template class EnumStoreDict<EnumTree>; - -template class EnumStoreDict<EnumPostingTree>; - -template -class btree::BTreeNodeT<EnumStoreBase::Index, EnumTreeTraits::INTERNAL_SLOTS>; - -template -class btree::BTreeNodeTT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; - -template -class btree::BTreeNodeTT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeInternalNode<EnumStoreBase::Index, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; - -template -class btree::BTreeLeafNode<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeLeafNode<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeNodeStore<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeNodeStore<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - - -template -class btree::BTreeRoot<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeRoot<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeRootT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeRootT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeRootBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeRootBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeNodeAllocator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeNodeAllocator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -template -class btree::BTreeIteratorBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; -template -class btree::BTreeIteratorBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; - -template class btree::BTreeConstIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template class btree::BTreeConstIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTreeIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; -template -class btree::BTreeIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -template -class btree::BTree<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; -template -class btree::BTree<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - - } namespace vespalib { diff --git a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h index 8d9ff28669d..32e7e5134ca 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstorebase.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstorebase.h @@ -2,12 +2,10 @@ #pragma once +#include "enum_store_dictionary.h" #include <vespa/searchcommon/attribute/iattributevector.h> -#include <vespa/vespalib/btree/btree.h> #include <vespa/vespalib/datastore/datastore.h> -#include <vespa/vespalib/datastore/entry_comparator_wrapper.h> #include <vespa/vespalib/datastore/entryref.h> -#include <vespa/vespalib/datastore/unique_store_dictionary.h> #include <vespa/vespalib/stllike/hash_map.h> #include <vespa/vespalib/util/address_space.h> #include <vespa/vespalib/util/array.h> @@ -22,127 +20,13 @@ class BufferWriter; namespace attribute { class Status; } -class EnumStoreBase; - using EnumStoreComparator = datastore::EntryComparator; -using EnumStoreDataStoreType = datastore::DataStoreT<datastore::AlignedEntryRefT<31, 4> >; -using EnumStoreIndex = EnumStoreDataStoreType::RefType; -using EnumStoreIndexVector = vespalib::Array<EnumStoreIndex>; -using EnumStoreEnumVector = vespalib::Array<uint32_t>; - -using EnumTreeTraits = btree::BTreeTraits<16, 16, 10, true>; - -using EnumTree = btree::BTree<EnumStoreIndex, btree::BTreeNoLeafData, - btree::NoAggregated, - const datastore::EntryComparatorWrapper, - EnumTreeTraits>; - -using EnumPostingTree = btree::BTree<EnumStoreIndex, datastore::EntryRef, - btree::NoAggregated, - const datastore::EntryComparatorWrapper, - EnumTreeTraits>; - -struct CompareEnumIndex -{ - using Index = EnumStoreIndex; - - bool operator()(const Index &lhs, const Index &rhs) const { - return lhs.ref() < rhs.ref(); - } -}; - -class EnumStoreDictBase : public datastore::UniqueStoreDictionaryBase { -public: - using EnumVector = EnumStoreEnumVector; - using Index = EnumStoreIndex; - using IndexSet = std::set<Index, CompareEnumIndex>; - using IndexVector = EnumStoreIndexVector; - using generation_t = vespalib::GenerationHandler::generation_t; - -public: - EnumStoreDictBase(); - virtual ~EnumStoreDictBase(); - - virtual uint32_t getNumUniques() const = 0; - virtual void writeAllValues(BufferWriter &writer, btree::BTreeNode::Ref rootRef) const = 0; - virtual ssize_t deserialize(const void *src, size_t available, IndexVector &idx) = 0; - - virtual void fixupRefCounts(const EnumVector &hist) = 0; - virtual void freeUnusedEnums(const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp) = 0; - virtual void freeUnusedEnums(const IndexSet& toRemove, - const datastore::EntryComparator& cmp, - const datastore::EntryComparator* fcmp) = 0; - virtual bool findIndex(const datastore::EntryComparator &cmp, Index &idx) const = 0; - virtual bool findFrozenIndex(const datastore::EntryComparator &cmp, Index &idx) const = 0; - virtual std::vector<attribute::IAttributeVector::EnumHandle> - findMatchingEnums(const datastore::EntryComparator &cmp) const = 0; - - virtual void onReset() = 0; - virtual btree::BTreeNode::Ref getFrozenRootRef() const = 0; - - virtual EnumPostingTree &getPostingDictionary() = 0; - virtual const EnumPostingTree &getPostingDictionary() const = 0; - virtual bool hasData() const = 0; -}; - - -template <typename Dictionary> -class EnumStoreDict : public datastore::UniqueStoreDictionary<Dictionary, EnumStoreDictBase> -{ -private: - using EnumVector = EnumStoreDictBase::EnumVector; - using Index = EnumStoreDictBase::Index; - using IndexSet = EnumStoreDictBase::IndexSet; - using IndexVector = EnumStoreDictBase::IndexVector; - using ParentUniqueStoreDictionary = datastore::UniqueStoreDictionary<Dictionary, EnumStoreDictBase>; - using generation_t = EnumStoreDictBase::generation_t; - - EnumStoreBase& _enumStore; - -public: - EnumStoreDict(EnumStoreBase &enumStore); - - ~EnumStoreDict() override; - - const Dictionary &getDictionary() const { return this->_dict; } - Dictionary &getDictionary() { return this->_dict; } - - uint32_t getNumUniques() const override; - void writeAllValues(BufferWriter &writer, btree::BTreeNode::Ref rootRef) const override; - ssize_t deserialize(const void *src, size_t available, IndexVector &idx) override; - void fixupRefCounts(const EnumVector &hist) override; - - void removeUnusedEnums(const IndexSet &unused, - const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp); - - void freeUnusedEnums(const datastore::EntryComparator &cmp, - const datastore::EntryComparator *fcmp) override; - - void freeUnusedEnums(const IndexSet& toRemove, - const datastore::EntryComparator& cmp, - const datastore::EntryComparator* fcmp) override; - - bool findIndex(const datastore::EntryComparator &cmp, Index &idx) const override; - bool findFrozenIndex(const datastore::EntryComparator &cmp, Index &idx) const override; - std::vector<attribute::IAttributeVector::EnumHandle> - findMatchingEnums(const datastore::EntryComparator &cmp) const override; - - void onReset() override; - btree::BTreeNode::Ref getFrozenRootRef() const override { return this->get_frozen_root(); } - - EnumPostingTree & getPostingDictionary() override; - const EnumPostingTree & getPostingDictionary() const override; - - bool hasData() const override; -}; class EnumStoreBase { public: - using DataStoreType = EnumStoreDataStoreType; + using DataStoreType = datastore::DataStoreT<EnumStoreIndex>; using EnumHandle = attribute::IAttributeVector::EnumHandle; using EnumVector = EnumStoreEnumVector; using Index = EnumStoreIndex; @@ -211,7 +95,7 @@ protected: void clearPendingCompact() { _pendingCompact = false; } }; - EnumStoreDictBase *_enumDict; + IEnumStoreDictionary *_enumDict; DataStoreType _store; EnumBufferType _type; std::vector<uint32_t> _toHoldBuffers; // used during compaction @@ -304,8 +188,8 @@ public: virtual bool performCompaction(uint64_t bytesNeeded, EnumIndexMap & old2New) = 0; - EnumStoreDictBase &getEnumStoreDict() { return *_enumDict; } - const EnumStoreDictBase &getEnumStoreDict() const { return *_enumDict; } + IEnumStoreDictionary &getEnumStoreDict() { return *_enumDict; } + const IEnumStoreDictionary &getEnumStoreDict() const { return *_enumDict; } EnumPostingTree &getPostingDictionary() { return _enumDict->getPostingDictionary(); } const EnumPostingTree &getPostingDictionary() const { @@ -316,100 +200,8 @@ public: vespalib::asciistream & operator << (vespalib::asciistream & os, const EnumStoreBase::Index & idx); - extern template -class datastore::DataStoreT<datastore::AlignedEntryRefT<31, 4> >; +class datastore::DataStoreT<EnumStoreIndex>; -extern template -class btree::BTreeNodeT<EnumStoreBase::Index, EnumTreeTraits::INTERNAL_SLOTS>; - -extern template -class btree::BTreeNodeTT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; - -extern template -class btree::BTreeNodeTT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeInternalNode<EnumStoreBase::Index, btree::NoAggregated, EnumTreeTraits::INTERNAL_SLOTS>; - -extern template -class btree::BTreeLeafNode<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeLeafNode<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeLeafNodeTemp<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeNodeStore<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeNodeStore<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeRoot<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeRoot<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeRootT<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeRootT<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeRootBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeRootBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeNodeAllocator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - -extern template -class btree::BTreeNodeAllocator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS>; - - -extern template -class btree::BTreeIteratorBase<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; -extern template -class btree::BTreeIteratorBase<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - EnumTreeTraits::INTERNAL_SLOTS, EnumTreeTraits::LEAF_SLOTS, EnumTreeTraits::PATH_SIZE>; - -extern template class btree::BTreeConstIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template class btree::BTreeConstIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTreeIterator<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; -extern template -class btree::BTreeIterator<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; - -extern template -class btree::BTree<EnumStoreBase::Index, btree::BTreeNoLeafData, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; -extern template -class btree::BTree<EnumStoreBase::Index, datastore::EntryRef, btree::NoAggregated, - const datastore::EntryComparatorWrapper, EnumTreeTraits>; } diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h new file mode 100644 index 00000000000..619b69ed8a6 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h @@ -0,0 +1,75 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchcommon/attribute/iattributevector.h> +#include <vespa/vespalib/datastore/entry_comparator_wrapper.h> +#include <vespa/vespalib/datastore/unique_store_dictionary.h> +#include <set> + +namespace search { + +class BufferWriter; + +using EnumStoreIndex = datastore::AlignedEntryRefT<31, 4>; +using EnumStoreIndexVector = vespalib::Array<EnumStoreIndex>; +using EnumStoreEnumVector = vespalib::Array<uint32_t>; + +using EnumTreeTraits = btree::BTreeTraits<16, 16, 10, true>; + +using EnumTree = btree::BTree<EnumStoreIndex, btree::BTreeNoLeafData, + btree::NoAggregated, + const datastore::EntryComparatorWrapper, + EnumTreeTraits>; + +using EnumPostingTree = btree::BTree<EnumStoreIndex, datastore::EntryRef, + btree::NoAggregated, + const datastore::EntryComparatorWrapper, + EnumTreeTraits>; + +struct CompareEnumIndex { + using Index = EnumStoreIndex; + + bool operator()(const Index &lhs, const Index &rhs) const { + return lhs.ref() < rhs.ref(); + } +}; + +/** + * Interface for the dictionary used by an enum store. + */ +class IEnumStoreDictionary : public datastore::UniqueStoreDictionaryBase { +public: + using EnumVector = EnumStoreEnumVector; + using Index = EnumStoreIndex; + using IndexSet = std::set<Index, CompareEnumIndex>; + using IndexVector = EnumStoreIndexVector; + using generation_t = vespalib::GenerationHandler::generation_t; + +public: + virtual ~IEnumStoreDictionary() = default; + + virtual uint32_t getNumUniques() const = 0; + virtual void writeAllValues(BufferWriter& writer, btree::BTreeNode::Ref rootRef) const = 0; + virtual ssize_t deserialize(const void* src, size_t available, IndexVector& idx) = 0; + + virtual void fixupRefCounts(const EnumVector& hist) = 0; + virtual void freeUnusedEnums(const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) = 0; + virtual void freeUnusedEnums(const IndexSet& toRemove, + const datastore::EntryComparator& cmp, + const datastore::EntryComparator* fcmp) = 0; + virtual bool findIndex(const datastore::EntryComparator& cmp, Index& idx) const = 0; + virtual bool findFrozenIndex(const datastore::EntryComparator& cmp, Index& idx) const = 0; + virtual std::vector<attribute::IAttributeVector::EnumHandle> + findMatchingEnums(const datastore::EntryComparator& cmp) const = 0; + + virtual void onReset() = 0; + virtual btree::BTreeNode::Ref getFrozenRootRef() const = 0; + + virtual EnumPostingTree& getPostingDictionary() = 0; + virtual const EnumPostingTree& getPostingDictionary() const = 0; + virtual bool hasData() const = 0; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h b/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h index 4652d26f7a3..4876f43cd5d 100644 --- a/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h +++ b/searchlib/src/vespa/searchlib/attribute/ipostinglistattributebase.h @@ -2,6 +2,8 @@ #pragma once +#include <vespa/searchcommon/attribute/iattributevector.h> + namespace vespalib { class MemoryUsage; } namespace search::attribute { diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h index 539b362534b..b9346daefa2 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h @@ -2,16 +2,17 @@ #pragma once +#include "dociditerator.h" +#include "ipostinglistattributebase.h" +#include "postingchange.h" +#include "postinglistsearchcontext.h" +#include <vespa/searchlib/attribute/enumattribute.h> #include <vespa/searchlib/attribute/numericbase.h> #include <vespa/searchlib/attribute/stringbase.h> -#include <vespa/searchlib/attribute/enumattribute.h> #include <vespa/searchlib/queryeval/searchiterator.h> -#include <vespa/vespalib/datastore/entryref.h> #include <vespa/vespalib/btree/btreestore.h> -#include "dociditerator.h" -#include "postinglistsearchcontext.h" -#include "postingchange.h" -#include "ipostinglistattributebase.h" +#include <vespa/vespalib/datastore/entry_comparator.h> +#include <vespa/vespalib/datastore/entryref.h> namespace search { @@ -19,9 +20,9 @@ class EnumPostingPair { private: EnumStoreBase::Index _idx; - const EnumStoreComparator *_cmp; + const datastore::EntryComparator *_cmp; public: - EnumPostingPair(EnumStoreBase::Index idx, const EnumStoreComparator *cmp) + EnumPostingPair(EnumStoreBase::Index idx, const datastore::EntryComparator *cmp) : _idx(idx), _cmp(cmp) { } |