diff options
Diffstat (limited to 'searchlib')
6 files changed, 36 insertions, 38 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp index d867ae9f211..1b21f3a7b6e 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp @@ -1,13 +1,8 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "enum_store_dictionary.h" -#include "enumstore.h" #include <vespa/vespalib/btree/btree.hpp> -#include <vespa/vespalib/btree/btreeiterator.hpp> #include <vespa/vespalib/btree/btreenode.hpp> -#include <vespa/vespalib/btree/btreenodeallocator.hpp> -#include <vespa/vespalib/btree/btreeroot.hpp> -#include <vespa/vespalib/datastore/datastore.hpp> #include <vespa/vespalib/datastore/sharded_hash_map.h> #include <vespa/vespalib/datastore/unique_store_dictionary.hpp> #include <vespa/searchlib/util/bufferwriter.h> @@ -28,9 +23,6 @@ void EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::remove_unused_values(const IndexSet& unused, const vespalib::datastore::EntryComparator& cmp) { - if (unused.empty()) { - return; - } for (const auto& ref : unused) { this->remove(cmp, ref); } @@ -58,7 +50,9 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::free_unused_values(const _enumStore.free_value_if_unused(iter.getKey(), unused); } } else { - this->_hash_dict.foreach_key([this, &unused](EntryRef ref) { _enumStore.free_value_if_unused(ref, unused); }); + this->_hash_dict.foreach_key([this, &unused](EntryRef ref) { + _enumStore.free_value_if_unused(ref, unused); + }); } remove_unused_values(unused, cmp); } @@ -66,11 +60,17 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::free_unused_values(const template <typename BTreeDictionaryT, typename HashDictionaryT> void EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::free_unused_values(const IndexSet& to_remove, - const vespalib::datastore::EntryComparator& cmp) + const vespalib::datastore::EntryComparator& cmp) { IndexSet unused; + + EntryRef prev; for (const auto& index : to_remove) { - _enumStore.free_value_if_unused(index, unused); + assert(prev <= index); + if (index != prev) { + _enumStore.free_value_if_unused(index, unused); + prev = index; + } } remove_unused_values(unused, cmp); } @@ -96,8 +96,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::remove(const EntryCompar template <typename BTreeDictionaryT, typename HashDictionaryT> bool -EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_index(const vespalib::datastore::EntryComparator& cmp, - Index& idx) const +EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_index(const vespalib::datastore::EntryComparator& cmp, Index& idx) const { if constexpr (has_hash_dictionary) { auto find_result = this->_hash_dict.find(cmp, EntryRef()); @@ -118,8 +117,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_index(const vespali template <typename BTreeDictionaryT, typename HashDictionaryT> bool -EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_frozen_index(const vespalib::datastore::EntryComparator& cmp, - Index& idx) const +EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_frozen_index(const vespalib::datastore::EntryComparator& cmp, Index& idx) const { if constexpr (has_hash_dictionary) { auto find_result = this->_hash_dict.find(cmp, EntryRef()); diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 326e0916039..59d77ea0558 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -63,7 +63,7 @@ private: EnumStoreT(const EnumStoreT & rhs) = delete; EnumStoreT & operator=(const EnumStoreT & rhs) = delete; - void free_value_if_unused(Index idx, IndexSet &unused) override; + void free_value_if_unused(Index idx, IndexList &unused) override; const vespalib::datastore::UniqueStoreEntryBase& get_entry_base(Index idx) const { return _store.get_allocator().get_wrapped(idx); @@ -153,7 +153,7 @@ public: class BatchUpdater { private: EnumStoreType& _store; - IndexSet _possibly_unused; + IndexList _possibly_unused; public: BatchUpdater(EnumStoreType& store) @@ -168,11 +168,11 @@ public: auto& entry = _store.get_entry_base(idx); entry.dec_ref_count(); if (entry.get_ref_count() == 0) { - _possibly_unused.insert(idx); + _possibly_unused.push_back(idx); } } void commit() { - _store.free_unused_values(_possibly_unused); + _store.free_unused_values(std::move(_possibly_unused)); } }; @@ -198,7 +198,7 @@ public: Index insert(EntryType value); bool find_index(EntryType value, Index& idx) const; void free_unused_values() override; - void free_unused_values(const IndexSet& to_remove); + void free_unused_values(IndexList to_remove); vespalib::MemoryUsage update_stat() override; std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override; std::unique_ptr<EnumIndexRemapper> compact_worst_values(bool compact_memory, bool compact_address_space) override; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 9885613f4e3..771da8ffa01 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -30,11 +30,11 @@ make_enum_store_dictionary(IEnumStore &store, bool has_postings, const search::D std::unique_ptr<EntryComparator> folded_compare); template <typename EntryT> -void EnumStoreT<EntryT>::free_value_if_unused(Index idx, IndexSet& unused) +void EnumStoreT<EntryT>::free_value_if_unused(Index idx, IndexList& unused) { const auto& entry = get_entry_base(idx); if (entry.get_ref_count() == 0) { - unused.insert(idx); + unused.push_back(idx); _store.get_allocator().hold(idx); } } @@ -140,7 +140,7 @@ EnumStoreT<EntryT>::BatchUpdater::insert(EntryType value) auto cmp = _store.make_comparator(value); auto result = _store._dict->add(cmp, [this, &value]() -> EntryRef { return _store._store.get_allocator().allocate(value); }); if (result.inserted()) { - _possibly_unused.insert(result.ref()); + _possibly_unused.push_back(result.ref()); } return result.ref(); } @@ -191,8 +191,16 @@ EnumStoreT<EntryT>::free_unused_values() template <typename EntryT> void -EnumStoreT<EntryT>::free_unused_values(const IndexSet& to_remove) +EnumStoreT<EntryT>::free_unused_values(IndexList to_remove) { + struct CompareEnumIndex { + using Index = IEnumStore::Index; + + bool operator()(const Index &lhs, const Index &rhs) const { + return lhs.ref() < rhs.ref(); + } + }; + std::sort(to_remove.begin(), to_remove.end(), CompareEnumIndex()); _dict->free_unused_values(to_remove, get_comparator()); } diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h index 6d714ec25ba..716609764f4 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h @@ -40,22 +40,14 @@ public: using EnumIndexRemapper = vespalib::datastore::UniqueStoreRemapper<InternalIndex>; using Enumerator = vespalib::datastore::UniqueStoreEnumerator<IEnumStore::InternalIndex>; - struct CompareEnumIndex { - using Index = IEnumStore::Index; - - bool operator()(const Index &lhs, const Index &rhs) const { - return lhs.ref() < rhs.ref(); - } - }; - - using IndexSet = std::set<Index, CompareEnumIndex>; + using IndexList = std::vector<Index>; virtual ~IEnumStore() = default; virtual void write_value(BufferWriter& writer, Index idx) const = 0; virtual ssize_t load_unique_values(const void* src, size_t available, IndexVector& idx) = 0; virtual void set_ref_count(Index idx, uint32_t ref_count) = 0; - virtual void free_value_if_unused(Index idx, IndexSet& unused) = 0; + virtual void free_value_if_unused(Index idx, IndexList& unused) = 0; virtual void free_unused_values() = 0; virtual bool is_folded_change(Index idx1, Index idx2) const = 0; virtual IEnumStoreDictionary& get_dictionary() = 0; diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h index f816177b06c..9d72369f245 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h @@ -31,7 +31,7 @@ public: using EntryRef = vespalib::datastore::EntryRef; using EnumVector = IEnumStore::EnumVector; using Index = IEnumStore::Index; - using IndexSet = IEnumStore::IndexSet; + using IndexSet = IEnumStore::IndexList; using IndexVector = IEnumStore::IndexVector; using generation_t = vespalib::GenerationHandler::generation_t; diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index bf75400b157..a9a94afb763 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -312,9 +312,9 @@ SingleValueEnumAttribute<B>::onShrinkLidSpace() uint32_t default_value_ref_count = this->_enumStore.get_ref_count(default_value_ref); assert(default_value_ref_count >= shrink_docs); this->_enumStore.set_ref_count(default_value_ref, default_value_ref_count - shrink_docs); - IEnumStore::IndexSet possibly_unused; - possibly_unused.insert(default_value_ref); - this->_enumStore.free_unused_values(possibly_unused); + IEnumStore::IndexList possibly_unused; + possibly_unused.push_back(default_value_ref); + this->_enumStore.free_unused_values(std::move(possibly_unused)); } _enumIndices.shrink(committedDocIdLimit); this->setNumDocs(committedDocIdLimit); |