// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "enum_store_dictionary.h" #include #include #include #include #include #include LOG_SETUP(".searchlib.attribute.enum_store_dictionary"); using vespalib::datastore::AtomicEntryRef; using vespalib::datastore::EntryRef; using vespalib::datastore::UniqueStoreAddResult; namespace search { using vespalib::btree::BTreeNode; template void EnumStoreDictionary::remove_unused_values(const IndexList & unused,const EntryComparator& cmp) { for (const auto& ref : unused) { this->remove(cmp, ref); } } template EnumStoreDictionary::EnumStoreDictionary(IEnumStore& enumStore, std::unique_ptr compare) : ParentUniqueStoreDictionary(std::move(compare)), _enumStore(enumStore) { } template EnumStoreDictionary::~EnumStoreDictionary() = default; template void EnumStoreDictionary::free_unused_values(const EntryComparator& cmp) { IndexList unused; // find unused enums if constexpr (has_btree_dictionary) { for (auto iter = this->_btree_dict.begin(); iter.valid(); ++iter) { _enumStore.free_value_if_unused(iter.getKey().load_relaxed(), unused); } } else { this->_hash_dict.foreach_key([this, &unused](EntryRef ref) { _enumStore.free_value_if_unused(ref, unused); }); } remove_unused_values(unused, cmp); } template void EnumStoreDictionary::free_unused_values(const IndexList& to_remove, const EntryComparator& cmp) { IndexList unused; EntryRef prev; for (const auto& index : to_remove) { assert(prev <= index); if (index != prev) { _enumStore.free_value_if_unused(index, unused); prev = index; } } remove_unused_values(unused, cmp); } template void EnumStoreDictionary::remove(const EntryComparator &comp, EntryRef ref) { assert(ref.valid()); if constexpr (has_btree_dictionary) { auto itr = this->_btree_dict.lowerBound(AtomicEntryRef(ref), comp); assert(itr.valid() && itr.getKey().load_relaxed() == ref); if constexpr (std::is_same_v) { assert(!itr.getData().load_relaxed().valid()); } this->_btree_dict.remove(itr); } if constexpr (has_hash_dictionary) { auto *result = this->_hash_dict.remove(comp, ref); assert(result != nullptr && result->first.load_relaxed() == ref); } } template bool EnumStoreDictionary::find_index(const EntryComparator& cmp, Index& idx) const { if constexpr (has_hash_dictionary) { auto find_result = this->_hash_dict.find(cmp, EntryRef()); if (find_result != nullptr) { idx = find_result->first.load_relaxed(); return true; } return false; } else { auto itr = this->_btree_dict.find(AtomicEntryRef(), cmp); if (!itr.valid()) { return false; } idx = itr.getKey().load_relaxed(); return true; } } template bool EnumStoreDictionary::find_frozen_index(const EntryComparator& cmp, Index& idx) const { if constexpr (has_hash_dictionary) { auto find_result = this->_hash_dict.find(cmp, EntryRef()); if (find_result != nullptr) { idx = find_result->first.load_acquire(); return true; } return false; } else { auto itr = this->_btree_dict.getFrozenView().find(AtomicEntryRef(), cmp); if (!itr.valid()) { return false; } idx = itr.getKey().load_acquire(); return true; } } template std::vector EnumStoreDictionary::find_matching_enums(const EntryComparator& cmp) const { std::vector result; if constexpr (has_btree_dictionary) { auto itr = this->_btree_dict.getFrozenView().find(AtomicEntryRef(), cmp); while (itr.valid() && !cmp.less(Index(), itr.getKey().load_acquire())) { result.push_back(itr.getKey().load_acquire().ref()); ++itr; } } else { auto find_result = this->_hash_dict.find(cmp, EntryRef()); if (find_result != nullptr) { result.push_back(find_result->first.load_acquire().ref()); } } return result; } template EntryRef EnumStoreDictionary::get_frozen_root() const { if constexpr (has_btree_dictionary) { return this->_btree_dict.getFrozenView().getRoot(); } else { return EntryRef(); } } template <> std::pair EnumStoreDictionary::find_posting_list(const EntryComparator&, EntryRef) const { LOG_ABORT("should not be reached"); } template std::pair EnumStoreDictionary::find_posting_list(const EntryComparator& cmp, EntryRef root) const { if constexpr (has_hash_dictionary) { (void) root; auto find_result = this->_hash_dict.find(cmp, EntryRef()); if (find_result != nullptr) { return std::make_pair(find_result->first.load_acquire(), find_result->second.load_acquire()); } return std::make_pair(Index(), EntryRef()); } else { typename BTreeDictionaryType::ConstIterator itr(vespalib::btree::BTreeNode::Ref(), this->_btree_dict.getAllocator()); itr.lower_bound(root, AtomicEntryRef(), cmp); if (itr.valid() && !cmp.less(Index(), itr.getKey().load_acquire())) { return std::make_pair(itr.getKey().load_acquire(), itr.getData().load_acquire()); } return std::make_pair(Index(), EntryRef()); } } template void EnumStoreDictionary::collect_folded(Index idx, EntryRef, const std::function& callback) const { callback(idx); } template IEnumStore::Index EnumStoreDictionary::remap_index(Index idx) { return idx; } template <> void EnumStoreDictionary::clear_all_posting_lists(std::function) { LOG_ABORT("should not be reached"); } template void EnumStoreDictionary::clear_all_posting_lists(std::function clearer) { if constexpr (has_btree_dictionary) { auto& dict = this->_btree_dict; auto itr = dict.begin(); EntryRef prev; while (itr.valid()) { EntryRef ref(itr.getData().load_relaxed()); if (ref.ref() != prev.ref()) { if (ref.valid()) { clearer(ref); } prev = ref; } itr.getWData().store_release(EntryRef()); ++itr; } } else { this->_hash_dict.normalize_values([&clearer](EntryRef ref) { clearer(ref); return EntryRef(); }); } } template <> void EnumStoreDictionary::update_posting_list(Index, const EntryComparator&, std::function) { LOG_ABORT("should not be reached"); } template void EnumStoreDictionary::update_posting_list(Index idx, const EntryComparator& cmp, std::function updater) { if constexpr (has_btree_dictionary) { auto& dict = this->_btree_dict; auto itr = dict.lowerBound(AtomicEntryRef(idx), cmp); assert(itr.valid() && itr.getKey().load_relaxed() == idx); EntryRef old_posting_idx(itr.getData().load_relaxed()); EntryRef new_posting_idx = updater(old_posting_idx); itr.getWData().store_release(new_posting_idx); if constexpr (has_hash_dictionary) { auto find_result = this->_hash_dict.find(this->_hash_dict.get_default_comparator(), idx); assert(find_result != nullptr && find_result->first.load_relaxed() == idx); assert(find_result->second.load_relaxed() == old_posting_idx); find_result->second.store_release(new_posting_idx); } } else { auto find_result = this->_hash_dict.find(this->_hash_dict.get_default_comparator(), idx); assert(find_result != nullptr && find_result->first.load_relaxed() == idx); EntryRef old_posting_idx = find_result->second.load_relaxed(); EntryRef new_posting_idx = updater(old_posting_idx); find_result->second.store_release(new_posting_idx); } } template <> bool EnumStoreDictionary::normalize_posting_lists(std::function) { LOG_ABORT("should not be reached"); } template bool EnumStoreDictionary::normalize_posting_lists(std::function normalize) { if constexpr (has_btree_dictionary) { bool changed = false; auto& dict = this->_btree_dict; for (auto itr = dict.begin(); itr.valid(); ++itr) { EntryRef old_posting_idx(itr.getData().load_relaxed()); EntryRef new_posting_idx = normalize(old_posting_idx); if (new_posting_idx != old_posting_idx) { changed = true; itr.getWData().store_release(new_posting_idx); if constexpr (has_hash_dictionary) { auto find_result = this->_hash_dict.find(this->_hash_dict.get_default_comparator(), itr.getKey().load_relaxed()); assert(find_result != nullptr && find_result->first.load_relaxed() == itr.getKey().load_relaxed()); assert(find_result->second.load_relaxed() == old_posting_idx); find_result->second.store_release(new_posting_idx); } } } return changed; } else { return this->_hash_dict.normalize_values(normalize); } } template <> bool EnumStoreDictionary::normalize_posting_lists(std::function&)>, const EntryRefFilter&) { LOG_ABORT("should not be reached"); } namespace { template class ChangeWriterBase { protected: HashDictionaryT* _hash_dict; static constexpr bool has_hash_dictionary = true; ChangeWriterBase() : _hash_dict(nullptr) { } public: void set_hash_dict(HashDictionaryT &hash_dict) { _hash_dict = &hash_dict; } }; template <> class ChangeWriterBase { protected: static constexpr bool has_hash_dictionary = false; ChangeWriterBase() = default; }; template class ChangeWriter : public ChangeWriterBase { using Parent = ChangeWriterBase; using Parent::has_hash_dictionary; std::vector> _tree_refs; public: ChangeWriter(uint32_t capacity); ~ChangeWriter(); bool write(const std::vector& refs); void emplace_back(EntryRef key, AtomicEntryRef& tree_ref) { _tree_refs.emplace_back(std::make_pair(key, &tree_ref)); } }; template ChangeWriter::ChangeWriter(uint32_t capacity) : ChangeWriterBase(), _tree_refs() { _tree_refs.reserve(capacity); } template ChangeWriter::~ChangeWriter() = default; template bool ChangeWriter::write(const std::vector &refs) { bool changed = false; assert(refs.size() == _tree_refs.size()); auto tree_ref = _tree_refs.begin(); for (auto ref : refs) { EntryRef old_ref(tree_ref->second->load_relaxed()); if (ref != old_ref) { if (!changed) { changed = true; } tree_ref->second->store_release(ref); if constexpr (has_hash_dictionary) { auto find_result = this->_hash_dict->find(this->_hash_dict->get_default_comparator(), tree_ref->first); assert(find_result != nullptr && find_result->first.load_relaxed() == tree_ref->first); assert(find_result->second.load_relaxed() == old_ref); find_result->second.store_release(ref); } } ++tree_ref; } assert(tree_ref == _tree_refs.end()); _tree_refs.clear(); return changed; } } template bool EnumStoreDictionary::normalize_posting_lists(std::function&)> normalize, const EntryRefFilter& filter) { if constexpr (has_btree_dictionary) { std::vector refs; refs.reserve(1024); bool changed = false; ChangeWriter change_writer(refs.capacity()); if constexpr (has_hash_dictionary) { change_writer.set_hash_dict(this->_hash_dict); } auto& dict = this->_btree_dict; for (auto itr = dict.begin(); itr.valid(); ++itr) { EntryRef ref(itr.getData().load_relaxed()); if (ref.valid()) { if (filter.has(ref)) { refs.emplace_back(ref); change_writer.emplace_back(itr.getKey().load_relaxed(), itr.getWData()); if (refs.size() >= refs.capacity()) { normalize(refs); changed |= change_writer.write(refs); refs.clear(); } } } } if (!refs.empty()) { normalize(refs); changed |= change_writer.write(refs); } return changed; } else { return this->_hash_dict.normalize_values(normalize, filter); } } template <> void EnumStoreDictionary::foreach_posting_list(std::function&)>, const EntryRefFilter&) { LOG_ABORT("should not be reached"); } template void EnumStoreDictionary::foreach_posting_list(std::function&)> callback, const EntryRefFilter& filter) { if constexpr (has_btree_dictionary) { std::vector refs; refs.reserve(1024); auto& dict = this->_btree_dict; for (auto itr = dict.begin(); itr.valid(); ++itr) { EntryRef ref(itr.getData().load_relaxed()); if (ref.valid()) { if (filter.has(ref)) { refs.emplace_back(ref); if (refs.size() >= refs.capacity()) { callback(refs); refs.clear(); } } } } if (!refs.empty()) { callback(refs); } } else { this->_hash_dict.foreach_value(callback, filter); } } template <> const EnumPostingTree & EnumStoreDictionary::get_posting_dictionary() const { LOG_ABORT("should not be reached"); } template <> const EnumPostingTree & EnumStoreDictionary::get_posting_dictionary() const { LOG_ABORT("should not be reached"); } template const EnumPostingTree & EnumStoreDictionary::get_posting_dictionary() const { return this->_btree_dict; } EnumStoreFoldedDictionary::EnumStoreFoldedDictionary(IEnumStore& enumStore, std::unique_ptr compare, std::unique_ptr folded_compare) : EnumStoreDictionary(enumStore, std::move(compare)), _folded_compare(std::move(folded_compare)) { } EnumStoreFoldedDictionary::~EnumStoreFoldedDictionary() = default; UniqueStoreAddResult EnumStoreFoldedDictionary::add(const EntryComparator& comp, std::function insertEntry) { static_assert(!has_hash_dictionary, "Folded Dictionary does not support hash dictionary"); auto it = _btree_dict.lowerBound(AtomicEntryRef(), comp); if (it.valid() && !comp.less(EntryRef(), it.getKey().load_relaxed())) { // Entry already exists return UniqueStoreAddResult(it.getKey().load_relaxed(), false); } EntryRef newRef = insertEntry(); _btree_dict.insert(it, AtomicEntryRef(newRef), AtomicEntryRef()); // Maybe move posting list reference from next entry ++it; if (it.valid() && it.getData().load_relaxed().valid() && !_folded_compare->less(newRef, it.getKey().load_relaxed())) { EntryRef posting_list_ref(it.getData().load_relaxed()); _btree_dict.thaw(it); it.writeData(AtomicEntryRef()); --it; assert(it.valid() && it.getKey().load_relaxed() == newRef); it.writeData(AtomicEntryRef(posting_list_ref)); } return UniqueStoreAddResult(newRef, true); } void EnumStoreFoldedDictionary::remove(const EntryComparator& comp, EntryRef ref) { static_assert(!has_hash_dictionary, "Folded Dictionary does not support hash dictionary"); assert(ref.valid()); auto it = _btree_dict.lowerBound(AtomicEntryRef(ref), comp); assert(it.valid() && it.getKey().load_relaxed() == ref); EntryRef posting_list_ref(it.getData().load_relaxed()); _btree_dict.remove(it); // Maybe copy posting list reference to next entry if (posting_list_ref.valid()) { if (it.valid() && !it.getData().load_relaxed().valid() && !_folded_compare->less(ref, it.getKey().load_relaxed())) { this->_btree_dict.thaw(it); it.writeData(AtomicEntryRef(posting_list_ref)); } else { LOG_ABORT("Posting list not cleared for removed unique value"); } } } void EnumStoreFoldedDictionary::collect_folded(Index idx, EntryRef root, const std::function& callback) const { BTreeDictionaryType::ConstIterator itr(vespalib::btree::BTreeNode::Ref(), _btree_dict.getAllocator()); itr.lower_bound(root, AtomicEntryRef(idx), *_folded_compare); while (itr.valid() && !_folded_compare->less(idx, itr.getKey().load_acquire())) { callback(itr.getKey().load_acquire()); ++itr; } } IEnumStore::Index EnumStoreFoldedDictionary::remap_index(Index idx) { auto itr = _btree_dict.find(AtomicEntryRef(idx), *_folded_compare); assert(itr.valid()); return itr.getKey().load_acquire(); } template class EnumStoreDictionary; template class EnumStoreDictionary; template class EnumStoreDictionary; template class EnumStoreDictionary; } namespace vespalib::btree { using search::IEnumStore; using search::EnumTreeTraits; using datastore::EntryComparatorWrapper; template class BTreeNodeT; template class BTreeNodeTT; template class BTreeNodeTT; template class BTreeInternalNode; template class BTreeLeafNode; template class BTreeLeafNode; template class BTreeLeafNodeTemp; template class BTreeLeafNodeTemp; template class BTreeNodeStore; template class BTreeNodeStore; template class BTreeRoot; template class BTreeRoot; template class BTreeRootT; template class BTreeRootT; template class BTreeRootBase; template class BTreeRootBase; template class BTreeNodeAllocator; template class BTreeNodeAllocator; template class BTreeIteratorBase; template class BTreeIteratorBase; template class BTreeConstIterator; template class BTreeConstIterator; template class BTreeIterator; template class BTreeIterator; template class BTree; template class BTree; }