From 8fdcba59a3a588ed40b9ff98daac52f08dfc01d5 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Mon, 14 Jun 2021 11:38:43 +0000 Subject: Use a list instead of a set to make building faster. Then sort and uniq before applying the list. --- .../searchlib/attribute/enum_store_dictionary.cpp | 28 +++++++++--------- .../src/vespa/searchlib/attribute/enumstore.h | 10 +++---- .../src/vespa/searchlib/attribute/enumstore.hpp | 16 ++++++++--- .../src/vespa/searchlib/attribute/i_enum_store.h | 12 ++------ .../searchlib/attribute/i_enum_store_dictionary.h | 2 +- .../searchlib/attribute/singleenumattribute.hpp | 6 ++-- .../tests/datastore/datastore/datastore_test.cpp | 33 ++++++++++++++++++++++ vespalib/src/vespa/vespalib/datastore/entryref.h | 1 + 8 files changed, 70 insertions(+), 38 deletions(-) diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp index d867ae9f211..1b21f3a7b6e 100644 --- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp @@ -1,13 +1,8 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "enum_store_dictionary.h" -#include "enumstore.h" #include -#include #include -#include -#include -#include #include #include #include @@ -28,9 +23,6 @@ void EnumStoreDictionary::remove_unused_values(const IndexSet& unused, const vespalib::datastore::EntryComparator& cmp) { - if (unused.empty()) { - return; - } for (const auto& ref : unused) { this->remove(cmp, ref); } @@ -58,7 +50,9 @@ EnumStoreDictionary::free_unused_values(const _enumStore.free_value_if_unused(iter.getKey(), unused); } } else { - this->_hash_dict.foreach_key([this, &unused](EntryRef ref) { _enumStore.free_value_if_unused(ref, unused); }); + this->_hash_dict.foreach_key([this, &unused](EntryRef ref) { + _enumStore.free_value_if_unused(ref, unused); + }); } remove_unused_values(unused, cmp); } @@ -66,11 +60,17 @@ EnumStoreDictionary::free_unused_values(const template void EnumStoreDictionary::free_unused_values(const IndexSet& to_remove, - const vespalib::datastore::EntryComparator& cmp) + const vespalib::datastore::EntryComparator& cmp) { IndexSet unused; + + EntryRef prev; for (const auto& index : to_remove) { - _enumStore.free_value_if_unused(index, unused); + assert(prev <= index); + if (index != prev) { + _enumStore.free_value_if_unused(index, unused); + prev = index; + } } remove_unused_values(unused, cmp); } @@ -96,8 +96,7 @@ EnumStoreDictionary::remove(const EntryCompar template bool -EnumStoreDictionary::find_index(const vespalib::datastore::EntryComparator& cmp, - Index& idx) const +EnumStoreDictionary::find_index(const vespalib::datastore::EntryComparator& cmp, Index& idx) const { if constexpr (has_hash_dictionary) { auto find_result = this->_hash_dict.find(cmp, EntryRef()); @@ -118,8 +117,7 @@ EnumStoreDictionary::find_index(const vespali template bool -EnumStoreDictionary::find_frozen_index(const vespalib::datastore::EntryComparator& cmp, - Index& idx) const +EnumStoreDictionary::find_frozen_index(const vespalib::datastore::EntryComparator& cmp, Index& idx) const { if constexpr (has_hash_dictionary) { auto find_result = this->_hash_dict.find(cmp, EntryRef()); diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index 326e0916039..59d77ea0558 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -63,7 +63,7 @@ private: EnumStoreT(const EnumStoreT & rhs) = delete; EnumStoreT & operator=(const EnumStoreT & rhs) = delete; - void free_value_if_unused(Index idx, IndexSet &unused) override; + void free_value_if_unused(Index idx, IndexList &unused) override; const vespalib::datastore::UniqueStoreEntryBase& get_entry_base(Index idx) const { return _store.get_allocator().get_wrapped(idx); @@ -153,7 +153,7 @@ public: class BatchUpdater { private: EnumStoreType& _store; - IndexSet _possibly_unused; + IndexList _possibly_unused; public: BatchUpdater(EnumStoreType& store) @@ -168,11 +168,11 @@ public: auto& entry = _store.get_entry_base(idx); entry.dec_ref_count(); if (entry.get_ref_count() == 0) { - _possibly_unused.insert(idx); + _possibly_unused.push_back(idx); } } void commit() { - _store.free_unused_values(_possibly_unused); + _store.free_unused_values(std::move(_possibly_unused)); } }; @@ -198,7 +198,7 @@ public: Index insert(EntryType value); bool find_index(EntryType value, Index& idx) const; void free_unused_values() override; - void free_unused_values(const IndexSet& to_remove); + void free_unused_values(IndexList to_remove); vespalib::MemoryUsage update_stat() override; std::unique_ptr consider_compact_values(const CompactionStrategy& compaction_strategy) override; std::unique_ptr compact_worst_values(bool compact_memory, bool compact_address_space) override; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 9885613f4e3..771da8ffa01 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -30,11 +30,11 @@ make_enum_store_dictionary(IEnumStore &store, bool has_postings, const search::D std::unique_ptr folded_compare); template -void EnumStoreT::free_value_if_unused(Index idx, IndexSet& unused) +void EnumStoreT::free_value_if_unused(Index idx, IndexList& unused) { const auto& entry = get_entry_base(idx); if (entry.get_ref_count() == 0) { - unused.insert(idx); + unused.push_back(idx); _store.get_allocator().hold(idx); } } @@ -140,7 +140,7 @@ EnumStoreT::BatchUpdater::insert(EntryType value) auto cmp = _store.make_comparator(value); auto result = _store._dict->add(cmp, [this, &value]() -> EntryRef { return _store._store.get_allocator().allocate(value); }); if (result.inserted()) { - _possibly_unused.insert(result.ref()); + _possibly_unused.push_back(result.ref()); } return result.ref(); } @@ -191,8 +191,16 @@ EnumStoreT::free_unused_values() template void -EnumStoreT::free_unused_values(const IndexSet& to_remove) +EnumStoreT::free_unused_values(IndexList to_remove) { + struct CompareEnumIndex { + using Index = IEnumStore::Index; + + bool operator()(const Index &lhs, const Index &rhs) const { + return lhs.ref() < rhs.ref(); + } + }; + std::sort(to_remove.begin(), to_remove.end(), CompareEnumIndex()); _dict->free_unused_values(to_remove, get_comparator()); } diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h index 6d714ec25ba..716609764f4 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h @@ -40,22 +40,14 @@ public: using EnumIndexRemapper = vespalib::datastore::UniqueStoreRemapper; using Enumerator = vespalib::datastore::UniqueStoreEnumerator; - struct CompareEnumIndex { - using Index = IEnumStore::Index; - - bool operator()(const Index &lhs, const Index &rhs) const { - return lhs.ref() < rhs.ref(); - } - }; - - using IndexSet = std::set; + using IndexList = std::vector; virtual ~IEnumStore() = default; virtual void write_value(BufferWriter& writer, Index idx) const = 0; virtual ssize_t load_unique_values(const void* src, size_t available, IndexVector& idx) = 0; virtual void set_ref_count(Index idx, uint32_t ref_count) = 0; - virtual void free_value_if_unused(Index idx, IndexSet& unused) = 0; + virtual void free_value_if_unused(Index idx, IndexList& unused) = 0; virtual void free_unused_values() = 0; virtual bool is_folded_change(Index idx1, Index idx2) const = 0; virtual IEnumStoreDictionary& get_dictionary() = 0; diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h index f816177b06c..9d72369f245 100644 --- a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h +++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h @@ -31,7 +31,7 @@ public: using EntryRef = vespalib::datastore::EntryRef; using EnumVector = IEnumStore::EnumVector; using Index = IEnumStore::Index; - using IndexSet = IEnumStore::IndexSet; + using IndexSet = IEnumStore::IndexList; using IndexVector = IEnumStore::IndexVector; using generation_t = vespalib::GenerationHandler::generation_t; diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp index bf75400b157..a9a94afb763 100644 --- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp @@ -312,9 +312,9 @@ SingleValueEnumAttribute::onShrinkLidSpace() uint32_t default_value_ref_count = this->_enumStore.get_ref_count(default_value_ref); assert(default_value_ref_count >= shrink_docs); this->_enumStore.set_ref_count(default_value_ref, default_value_ref_count - shrink_docs); - IEnumStore::IndexSet possibly_unused; - possibly_unused.insert(default_value_ref); - this->_enumStore.free_unused_values(possibly_unused); + IEnumStore::IndexList possibly_unused; + possibly_unused.push_back(default_value_ref); + this->_enumStore.free_unused_values(std::move(possibly_unused)); } _enumIndices.shrink(committedDocIdLimit); this->setNumDocs(committedDocIdLimit); diff --git a/vespalib/src/tests/datastore/datastore/datastore_test.cpp b/vespalib/src/tests/datastore/datastore/datastore_test.cpp index 11b4f5e6631..90281acb0d3 100644 --- a/vespalib/src/tests/datastore/datastore/datastore_test.cpp +++ b/vespalib/src/tests/datastore/datastore/datastore_test.cpp @@ -141,6 +141,38 @@ assertMemStats(const DataStoreBase::MemStats &exp, EXPECT_EQ(exp._holdBuffers, act._holdBuffers); } +TEST(DataStoreTest, require_that_invalid_entry_ref_can_be_ordered) { + EntryRef inValid; + EntryRef a(1); + EXPECT_EQ(inValid, inValid); + EXPECT_EQ(a, a); + EXPECT_NE(inValid, a); + EXPECT_NE(a, inValid); + EXPECT_LT(inValid, a); + EXPECT_LE(inValid, a); +} + +TEST(DataStoreTest, require_that_entry_ref_can_be_ordered) { + EntryRef a(1); + EntryRef b(2); + EntryRef c(3); + EXPECT_EQ(a, a); + EXPECT_EQ(b, b); + EXPECT_EQ(c, c); + EXPECT_NE(a, b); + EXPECT_NE(a, c); + EXPECT_NE(b, c); + EXPECT_LT(a, b); + EXPECT_LT(b, c); + EXPECT_LT(a, c); + EXPECT_LE(a, a); + EXPECT_LE(b, b); + EXPECT_LE(c, c); + EXPECT_LE(a, b); + EXPECT_LE(b, c); + EXPECT_LE(a, c); +} + TEST(DataStoreTest, require_that_entry_ref_is_working) { using MyRefType = EntryRefT<22>; @@ -643,6 +675,7 @@ TEST(DataStoreTest, control_static_sizes) { EXPECT_EQ(0, bs.size()); } + } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/vespalib/src/vespa/vespalib/datastore/entryref.h b/vespalib/src/vespa/vespalib/datastore/entryref.h index 046d9089580..01f473fcf17 100644 --- a/vespalib/src/vespa/vespalib/datastore/entryref.h +++ b/vespalib/src/vespa/vespalib/datastore/entryref.h @@ -21,6 +21,7 @@ public: bool operator==(const EntryRef &rhs) const noexcept { return _ref == rhs._ref; } bool operator!=(const EntryRef &rhs) const noexcept { return _ref != rhs._ref; } bool operator <(const EntryRef &rhs) const noexcept { return _ref < rhs._ref; } + bool operator <=(const EntryRef &rhs) const noexcept { return _ref <= rhs._ref; } }; /** -- cgit v1.2.3