summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-06-14 11:38:43 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-06-14 11:53:56 +0000
commit8fdcba59a3a588ed40b9ff98daac52f08dfc01d5 (patch)
tree83568f2f83ffb232ec527501a4110b691f2dd743 /searchlib
parent291e527fb70897742f16caa4b3c7062cc6e47708 (diff)
Use a list instead of a set to make building faster.
Then sort and uniq before applying the list.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp28
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.h10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store.h12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp6
6 files changed, 36 insertions, 38 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
index d867ae9f211..1b21f3a7b6e 100644
--- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
@@ -1,13 +1,8 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "enum_store_dictionary.h"
-#include "enumstore.h"
#include <vespa/vespalib/btree/btree.hpp>
-#include <vespa/vespalib/btree/btreeiterator.hpp>
#include <vespa/vespalib/btree/btreenode.hpp>
-#include <vespa/vespalib/btree/btreenodeallocator.hpp>
-#include <vespa/vespalib/btree/btreeroot.hpp>
-#include <vespa/vespalib/datastore/datastore.hpp>
#include <vespa/vespalib/datastore/sharded_hash_map.h>
#include <vespa/vespalib/datastore/unique_store_dictionary.hpp>
#include <vespa/searchlib/util/bufferwriter.h>
@@ -28,9 +23,6 @@ void
EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::remove_unused_values(const IndexSet& unused,
const vespalib::datastore::EntryComparator& cmp)
{
- if (unused.empty()) {
- return;
- }
for (const auto& ref : unused) {
this->remove(cmp, ref);
}
@@ -58,7 +50,9 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::free_unused_values(const
_enumStore.free_value_if_unused(iter.getKey(), unused);
}
} else {
- this->_hash_dict.foreach_key([this, &unused](EntryRef ref) { _enumStore.free_value_if_unused(ref, unused); });
+ this->_hash_dict.foreach_key([this, &unused](EntryRef ref) {
+ _enumStore.free_value_if_unused(ref, unused);
+ });
}
remove_unused_values(unused, cmp);
}
@@ -66,11 +60,17 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::free_unused_values(const
template <typename BTreeDictionaryT, typename HashDictionaryT>
void
EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::free_unused_values(const IndexSet& to_remove,
- const vespalib::datastore::EntryComparator& cmp)
+ const vespalib::datastore::EntryComparator& cmp)
{
IndexSet unused;
+
+ EntryRef prev;
for (const auto& index : to_remove) {
- _enumStore.free_value_if_unused(index, unused);
+ assert(prev <= index);
+ if (index != prev) {
+ _enumStore.free_value_if_unused(index, unused);
+ prev = index;
+ }
}
remove_unused_values(unused, cmp);
}
@@ -96,8 +96,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::remove(const EntryCompar
template <typename BTreeDictionaryT, typename HashDictionaryT>
bool
-EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_index(const vespalib::datastore::EntryComparator& cmp,
- Index& idx) const
+EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_index(const vespalib::datastore::EntryComparator& cmp, Index& idx) const
{
if constexpr (has_hash_dictionary) {
auto find_result = this->_hash_dict.find(cmp, EntryRef());
@@ -118,8 +117,7 @@ EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_index(const vespali
template <typename BTreeDictionaryT, typename HashDictionaryT>
bool
-EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_frozen_index(const vespalib::datastore::EntryComparator& cmp,
- Index& idx) const
+EnumStoreDictionary<BTreeDictionaryT, HashDictionaryT>::find_frozen_index(const vespalib::datastore::EntryComparator& cmp, Index& idx) const
{
if constexpr (has_hash_dictionary) {
auto find_result = this->_hash_dict.find(cmp, EntryRef());
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
index 326e0916039..59d77ea0558 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -63,7 +63,7 @@ private:
EnumStoreT(const EnumStoreT & rhs) = delete;
EnumStoreT & operator=(const EnumStoreT & rhs) = delete;
- void free_value_if_unused(Index idx, IndexSet &unused) override;
+ void free_value_if_unused(Index idx, IndexList &unused) override;
const vespalib::datastore::UniqueStoreEntryBase& get_entry_base(Index idx) const {
return _store.get_allocator().get_wrapped(idx);
@@ -153,7 +153,7 @@ public:
class BatchUpdater {
private:
EnumStoreType& _store;
- IndexSet _possibly_unused;
+ IndexList _possibly_unused;
public:
BatchUpdater(EnumStoreType& store)
@@ -168,11 +168,11 @@ public:
auto& entry = _store.get_entry_base(idx);
entry.dec_ref_count();
if (entry.get_ref_count() == 0) {
- _possibly_unused.insert(idx);
+ _possibly_unused.push_back(idx);
}
}
void commit() {
- _store.free_unused_values(_possibly_unused);
+ _store.free_unused_values(std::move(_possibly_unused));
}
};
@@ -198,7 +198,7 @@ public:
Index insert(EntryType value);
bool find_index(EntryType value, Index& idx) const;
void free_unused_values() override;
- void free_unused_values(const IndexSet& to_remove);
+ void free_unused_values(IndexList to_remove);
vespalib::MemoryUsage update_stat() override;
std::unique_ptr<EnumIndexRemapper> consider_compact_values(const CompactionStrategy& compaction_strategy) override;
std::unique_ptr<EnumIndexRemapper> compact_worst_values(bool compact_memory, bool compact_address_space) override;
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index 9885613f4e3..771da8ffa01 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -30,11 +30,11 @@ make_enum_store_dictionary(IEnumStore &store, bool has_postings, const search::D
std::unique_ptr<EntryComparator> folded_compare);
template <typename EntryT>
-void EnumStoreT<EntryT>::free_value_if_unused(Index idx, IndexSet& unused)
+void EnumStoreT<EntryT>::free_value_if_unused(Index idx, IndexList& unused)
{
const auto& entry = get_entry_base(idx);
if (entry.get_ref_count() == 0) {
- unused.insert(idx);
+ unused.push_back(idx);
_store.get_allocator().hold(idx);
}
}
@@ -140,7 +140,7 @@ EnumStoreT<EntryT>::BatchUpdater::insert(EntryType value)
auto cmp = _store.make_comparator(value);
auto result = _store._dict->add(cmp, [this, &value]() -> EntryRef { return _store._store.get_allocator().allocate(value); });
if (result.inserted()) {
- _possibly_unused.insert(result.ref());
+ _possibly_unused.push_back(result.ref());
}
return result.ref();
}
@@ -191,8 +191,16 @@ EnumStoreT<EntryT>::free_unused_values()
template <typename EntryT>
void
-EnumStoreT<EntryT>::free_unused_values(const IndexSet& to_remove)
+EnumStoreT<EntryT>::free_unused_values(IndexList to_remove)
{
+ struct CompareEnumIndex {
+ using Index = IEnumStore::Index;
+
+ bool operator()(const Index &lhs, const Index &rhs) const {
+ return lhs.ref() < rhs.ref();
+ }
+ };
+ std::sort(to_remove.begin(), to_remove.end(), CompareEnumIndex());
_dict->free_unused_values(to_remove, get_comparator());
}
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
index 6d714ec25ba..716609764f4 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
@@ -40,22 +40,14 @@ public:
using EnumIndexRemapper = vespalib::datastore::UniqueStoreRemapper<InternalIndex>;
using Enumerator = vespalib::datastore::UniqueStoreEnumerator<IEnumStore::InternalIndex>;
- struct CompareEnumIndex {
- using Index = IEnumStore::Index;
-
- bool operator()(const Index &lhs, const Index &rhs) const {
- return lhs.ref() < rhs.ref();
- }
- };
-
- using IndexSet = std::set<Index, CompareEnumIndex>;
+ using IndexList = std::vector<Index>;
virtual ~IEnumStore() = default;
virtual void write_value(BufferWriter& writer, Index idx) const = 0;
virtual ssize_t load_unique_values(const void* src, size_t available, IndexVector& idx) = 0;
virtual void set_ref_count(Index idx, uint32_t ref_count) = 0;
- virtual void free_value_if_unused(Index idx, IndexSet& unused) = 0;
+ virtual void free_value_if_unused(Index idx, IndexList& unused) = 0;
virtual void free_unused_values() = 0;
virtual bool is_folded_change(Index idx1, Index idx2) const = 0;
virtual IEnumStoreDictionary& get_dictionary() = 0;
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
index f816177b06c..9d72369f245 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
@@ -31,7 +31,7 @@ public:
using EntryRef = vespalib::datastore::EntryRef;
using EnumVector = IEnumStore::EnumVector;
using Index = IEnumStore::Index;
- using IndexSet = IEnumStore::IndexSet;
+ using IndexSet = IEnumStore::IndexList;
using IndexVector = IEnumStore::IndexVector;
using generation_t = vespalib::GenerationHandler::generation_t;
diff --git a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
index bf75400b157..a9a94afb763 100644
--- a/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singleenumattribute.hpp
@@ -312,9 +312,9 @@ SingleValueEnumAttribute<B>::onShrinkLidSpace()
uint32_t default_value_ref_count = this->_enumStore.get_ref_count(default_value_ref);
assert(default_value_ref_count >= shrink_docs);
this->_enumStore.set_ref_count(default_value_ref, default_value_ref_count - shrink_docs);
- IEnumStore::IndexSet possibly_unused;
- possibly_unused.insert(default_value_ref);
- this->_enumStore.free_unused_values(possibly_unused);
+ IEnumStore::IndexList possibly_unused;
+ possibly_unused.push_back(default_value_ref);
+ this->_enumStore.free_unused_values(std::move(possibly_unused));
}
_enumIndices.shrink(committedDocIdLimit);
this->setNumDocs(committedDocIdLimit);