summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@broadpark.no>2019-09-03 11:26:16 +0200
committerTor Egge <Tor.Egge@broadpark.no>2019-09-03 11:30:55 +0200
commit6bbbbd15f93adb6d8f640c8f9d1adaf1cff4fa77 (patch)
treeb793aeb085db0c59b9b75b587dc663669f49213c /searchlib
parenta41c6717c20cd547b539bd2c387efe54c8bb8ef4 (diff)
Add EnumStoreFoldedDictionary class, used when multiple entries
in the dictionary might share a posting list. Ensure that valid posting list reference is kept at the first of these entries.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp91
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h30
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.h9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumstore.hpp78
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store.h4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp2
9 files changed, 103 insertions, 129 deletions
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
index 60cad3e9c48..4d1d0b6fff5 100644
--- a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -280,7 +280,7 @@ EnumStoreTest::testHoldListAndGeneration()
ses.decRefCount(idx);
EXPECT_EQUAL(0u, ses.getRefCount(idx));
}
- ses.freeUnusedEnums(true);
+ ses.freeUnusedEnums();
// check readers again
checkReaders(ses, sesGen, readers);
@@ -304,7 +304,7 @@ void
decRefCount(NumericEnumStore& store, NumericEnumStore::Index idx)
{
store.decRefCount(idx);
- store.freeUnusedEnums(false);
+ store.freeUnusedEnums();
generation_t gen = 5;
store.transferHoldLists(gen);
store.trimHoldLists(gen + 1);
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
index 24c0eeabd2c..a080c91cb34 100644
--- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.cpp
@@ -14,6 +14,10 @@
#include <vespa/log/log.h>
LOG_SETUP(".searchlib.attribute.enum_store_dictionary");
+using search::datastore::EntryComparator;
+using search::datastore::EntryRef;
+using search::datastore::UniqueStoreAddResult;
+
namespace search {
using btree::BTreeNode;
@@ -76,44 +80,19 @@ EnumStoreDictionary<DictionaryT>::fixupRefCounts(const EnumVector& hist)
template <typename DictionaryT>
void
EnumStoreDictionary<DictionaryT>::removeUnusedEnums(const IndexSet& unused,
- const datastore::EntryComparator& cmp,
- const datastore::EntryComparator* fcmp)
+ const datastore::EntryComparator& cmp)
{
- using Iterator = typename DictionaryT::Iterator;
if (unused.empty()) {
return;
}
- Iterator it(BTreeNode::Ref(), this->_dict.getAllocator());
- for (const auto& idx : unused) {
- it.lower_bound(this->_dict.getRoot(), idx, cmp);
- assert(it.valid() && !cmp(idx, it.getKey()));
- if (Iterator::hasData() && fcmp != nullptr) {
- typename DictionaryT::DataType pidx(it.getData());
- this->_dict.remove(it);
- if (!it.valid() || (*fcmp)(idx, it.getKey())) {
- continue; // Next entry does not use same posting list
- }
- --it;
- if (it.valid() && !(*fcmp)(it.getKey(), idx)) {
- continue; // Previous entry uses same posting list
- }
- if (it.valid()) {
- ++it;
- } else {
- it.begin();
- }
- this->_dict.thaw(it);
- it.writeData(pidx);
- } else {
- this->_dict.remove(it);
- }
- }
+ for (const auto& ref : unused) {
+ this->remove(cmp, ref);
+ }
}
template <typename DictionaryT>
void
-EnumStoreDictionary<DictionaryT>::freeUnusedEnums(const datastore::EntryComparator& cmp,
- const datastore::EntryComparator* fcmp)
+EnumStoreDictionary<DictionaryT>::freeUnusedEnums(const datastore::EntryComparator& cmp)
{
IndexSet unused;
@@ -121,20 +100,19 @@ EnumStoreDictionary<DictionaryT>::freeUnusedEnums(const datastore::EntryComparat
for (auto iter = this->_dict.begin(); iter.valid(); ++iter) {
_enumStore.freeUnusedEnum(iter.getKey(), unused);
}
- removeUnusedEnums(unused, cmp, fcmp);
+ removeUnusedEnums(unused, cmp);
}
template <typename DictionaryT>
void
EnumStoreDictionary<DictionaryT>::freeUnusedEnums(const IndexSet& toRemove,
- const datastore::EntryComparator& cmp,
- const datastore::EntryComparator* fcmp)
+ const datastore::EntryComparator& cmp)
{
IndexSet unused;
for (const auto& index : toRemove) {
_enumStore.freeUnusedEnum(index, unused);
}
- removeUnusedEnums(unused, cmp, fcmp);
+ removeUnusedEnums(unused, cmp);
}
template <typename DictionaryT>
@@ -218,6 +196,51 @@ EnumStoreDictionary<DictionaryT>::hasData() const
return DictionaryT::LeafNodeType::hasData();
}
+EnumStoreFoldedDictionary::EnumStoreFoldedDictionary(IEnumStore& enumStore, std::unique_ptr<EntryComparator> folded_compare)
+ : EnumStoreDictionary<EnumPostingTree>(enumStore),
+ _folded_compare(std::move(folded_compare))
+{
+}
+
+EnumStoreFoldedDictionary::~EnumStoreFoldedDictionary() = default;
+
+UniqueStoreAddResult
+EnumStoreFoldedDictionary::add(const EntryComparator &comp, std::function<EntryRef(void)> insertEntry)
+{
+ auto it = _dict.lowerBound(EntryRef(), comp);
+ if (it.valid() && !comp(EntryRef(), it.getKey())) {
+ // Entry already exists
+ return UniqueStoreAddResult(it.getKey(), false);
+ }
+ EntryRef newRef = insertEntry();
+ _dict.insert(it, newRef, EntryRef());
+ // Maybe move posting list reference from next entry
+ ++it;
+ if (it.valid() && it.getData().valid() && !(*_folded_compare)(newRef, it.getKey())) {
+ EntryRef posting_list_ref(it.getData());
+ _dict.thaw(it);
+ it.writeData(EntryRef());
+ --it;
+ assert(it.valid() && it.getKey() == newRef);
+ it.writeData(posting_list_ref);
+ }
+ return UniqueStoreAddResult(newRef, true);
+}
+
+void
+EnumStoreFoldedDictionary::remove(const EntryComparator &comp, EntryRef ref)
+{
+ assert(ref.valid());
+ auto it = _dict.lowerBound(ref, comp);
+ assert(it.valid() && it.getKey() == ref);
+ EntryRef posting_list_ref(it.getData());
+ _dict.remove(it);
+ // Maybe copy posting list reference to next entry
+ if (posting_list_ref.valid() && it.valid() && !it.getData().valid() && !(*_folded_compare)(ref, it.getKey())) {
+ this->_dict.thaw(it);
+ it.writeData(posting_list_ref);
+ }
+}
template class EnumStoreDictionary<EnumTree>;
diff --git a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
index 31f52e4e965..38a7d6b1985 100644
--- a/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
+++ b/searchlib/src/vespa/searchlib/attribute/enum_store_dictionary.h
@@ -38,15 +38,12 @@ public:
void fixupRefCounts(const EnumVector& hist) override;
void removeUnusedEnums(const IndexSet& unused,
- const datastore::EntryComparator& cmp,
- const datastore::EntryComparator* fcmp);
+ const datastore::EntryComparator& cmp);
- void freeUnusedEnums(const datastore::EntryComparator& cmp,
- const datastore::EntryComparator* fcmp) override;
+ void freeUnusedEnums(const datastore::EntryComparator& cmp) override;
void freeUnusedEnums(const IndexSet& toRemove,
- const datastore::EntryComparator& cmp,
- const datastore::EntryComparator* fcmp) override;
+ const datastore::EntryComparator& cmp) override;
bool findIndex(const datastore::EntryComparator& cmp, Index& idx) const override;
bool findFrozenIndex(const datastore::EntryComparator& cmp, Index& idx) const override;
@@ -62,6 +59,27 @@ public:
bool hasData() const override;
};
+/**
+ * Concrete dictionary for an enum store that extends the
+ * functionality of a unique store dictionary.
+ *
+ * Special handling of value (posting list reference) is added to
+ * ensure that entries with same folded key share a posting list
+ * (e.g. case insensitive search) and posting list reference is found
+ * for the first of these entries.
+ */
+class EnumStoreFoldedDictionary : public EnumStoreDictionary<EnumPostingTree>
+{
+private:
+ std::unique_ptr<datastore::EntryComparator> _folded_compare;
+
+public:
+ EnumStoreFoldedDictionary(IEnumStore& enumStore, std::unique_ptr<datastore::EntryComparator> folded_compare);
+ ~EnumStoreFoldedDictionary() override;
+ datastore::UniqueStoreAddResult add(const datastore::EntryComparator& comp, std::function<datastore::EntryRef(void)> insertEntry) override;
+ void remove(const datastore::EntryComparator& comp, datastore::EntryRef ref) override;
+};
+
extern template
class btree::BTreeNodeT<IEnumStore::Index, EnumTreeTraits::INTERNAL_SLOTS>;
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
index 7ce65193c40..1d5b4348af8 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.cpp
@@ -47,10 +47,14 @@ EnumStoreT<StringEntryType>::deserialize(const void* src,
}
std::unique_ptr<datastore::IUniqueStoreDictionary>
-make_enum_store_dictionary(IEnumStore &store, bool has_postings)
+make_enum_store_dictionary(IEnumStore &store, bool has_postings, std::unique_ptr<datastore::EntryComparator> folded_compare)
{
if (has_postings) {
- return std::make_unique<EnumStoreDictionary<EnumPostingTree>>(store);
+ if (folded_compare) {
+ return std::make_unique<EnumStoreFoldedDictionary>(store, std::move(folded_compare));
+ } else {
+ return std::make_unique<EnumStoreDictionary<EnumPostingTree>>(store);
+ }
} else {
return std::make_unique<EnumStoreDictionary<EnumTree>>(store);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h
index 94252239975..c422cf14cda 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.h
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h
@@ -243,20 +243,15 @@ public:
std::vector<IEnumStore::EnumHandle> findFoldedEnums(DataType value) const;
void addEnum(DataType value, Index &newIdx);
bool findIndex(DataType value, Index &idx) const;
- void freeUnusedEnums(bool movePostingidx) override;
+ void freeUnusedEnums() override;
void freeUnusedEnums(const IndexSet& toRemove);
vespalib::MemoryUsage update_stat() override;
std::unique_ptr<EnumIndexRemapper> consider_compact(const CompactionStrategy& compaction_strategy) override;
std::unique_ptr<EnumIndexRemapper> compact_worst(bool compact_memory, bool compact_address_space) override;
-
-private:
- template <typename Dictionary>
- void addEnum(DataType value, Index& newIdx, Dictionary& dict);
-
};
std::unique_ptr<datastore::IUniqueStoreDictionary>
-make_enum_store_dictionary(IEnumStore &store, bool has_postings);
+make_enum_store_dictionary(IEnumStore &store, bool has_postings, std::unique_ptr<datastore::EntryComparator> folded_compare);
vespalib::asciistream & operator << (vespalib::asciistream & os, const IEnumStore::Index & idx);
diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
index 574712798c2..e107b6da787 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp
@@ -34,7 +34,7 @@ void EnumStoreT<EntryType>::freeUnusedEnum(Index idx, IndexSet& unused)
template <typename EntryType>
EnumStoreT<EntryType>::EnumStoreT(bool has_postings)
- : _store(make_enum_store_dictionary(*this, has_postings)),
+ : _store(make_enum_store_dictionary(*this, has_postings, EntryType::hasFold() ? std::make_unique<FoldedComparatorType>(*this) : std::unique_ptr<datastore::EntryComparator>())),
_dict(static_cast<IEnumStoreDictionary&>(_store.get_dictionary())),
_cached_values_memory_usage(),
_cached_values_address_space_usage(0, 0, (1ull << 32))
@@ -169,15 +169,10 @@ EnumStoreT<EntryType>::findIndex(DataType value, Index &idx) const
template <typename EntryType>
void
-EnumStoreT<EntryType>::freeUnusedEnums(bool movePostingIdx)
+EnumStoreT<EntryType>::freeUnusedEnums()
{
ComparatorType cmp(*this);
- if (EntryType::hasFold() && movePostingIdx) {
- FoldedComparatorType fcmp(*this);
- _dict.freeUnusedEnums(cmp, &fcmp);
- } else {
- _dict.freeUnusedEnums(cmp, nullptr);
- }
+ _dict.freeUnusedEnums(cmp);
}
template <typename EntryType>
@@ -185,75 +180,16 @@ void
EnumStoreT<EntryType>::freeUnusedEnums(const IndexSet& toRemove)
{
ComparatorType cmp(*this);
- if (EntryType::hasFold()) {
- FoldedComparatorType fcmp(*this);
- _dict.freeUnusedEnums(toRemove, cmp, &fcmp);
- } else {
- _dict.freeUnusedEnums(toRemove, cmp, nullptr);
- }
-}
-
-template <typename EntryType>
-template <typename Dictionary>
-void
-EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx, Dictionary& dict)
-{
- typedef typename Dictionary::Iterator DictionaryIterator;
-
- // check if already present
- ComparatorType cmp(*this, value);
- DictionaryIterator it(btree::BTreeNode::Ref(), dict.getAllocator());
- it.lower_bound(dict.getRoot(), Index(), cmp);
- if (it.valid() && !cmp(Index(), it.getKey())) {
- newIdx = it.getKey();
- return;
- }
-
- newIdx = _store.get_allocator().allocate(value);
-
- // TODO: Move this logic to "add/insert" on the dictionary
- // update tree with new index
- dict.insert(it, newIdx, typename Dictionary::DataType());
-
- // Copy posting list idx from next entry if same folded value.
- // Only for string posting list attributes, i.e. dictionary has
- // data and entry type has folded compare.
- if (DictionaryIterator::hasData() && EntryType::hasFold()) {
- FoldedComparatorType foldCmp(*this);
- ++it;
- if (!it.valid() || foldCmp(newIdx, it.getKey())) {
- return; // Next entry does not use same posting list
- }
- --it;
- --it;
- if (it.valid() && !foldCmp(it.getKey(), newIdx)) {
- return; // Previous entry uses same posting list
- }
- if (it.valid()) {
- ++it;
- } else {
- it.begin();
- }
- assert(it.valid() && it.getKey() == newIdx);
- ++it;
- typename Dictionary::DataType pidx(it.getData());
- dict.thaw(it);
- it.writeData(typename Dictionary::DataType());
- --it;
- assert(it.valid() && it.getKey() == newIdx);
- it.writeData(pidx);
- }
+ _dict.freeUnusedEnums(toRemove, cmp);
}
template <typename EntryType>
void
EnumStoreT<EntryType>::addEnum(DataType value, Index& newIdx)
{
- if (_dict.hasData()) {
- addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumPostingTree> &>(_dict).getDictionary());
- } else {
- addEnum(value, newIdx, static_cast<EnumStoreDictionary<EnumTree> &>(_dict).getDictionary());
- }
+ ComparatorType cmp(*this, value);
+ auto add_result = _dict.add(cmp, [this, &value]() -> EntryRef { return _store.get_allocator().allocate(value); });
+ newIdx = add_result.ref();
}
template <typename EntryType>
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
index 2a9842075e6..8a9058d8766 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store.h
@@ -51,7 +51,7 @@ public:
virtual ssize_t deserialize0(const void* src, size_t available, IndexVector& idx) = 0;
virtual void fixupRefCount(Index idx, uint32_t refCount) = 0;
virtual void freeUnusedEnum(Index idx, IndexSet& unused) = 0;
- virtual void freeUnusedEnums(bool movePostingIdx) = 0;
+ virtual void freeUnusedEnums() = 0;
virtual bool foldedChange(const Index& idx1, const Index& idx2) = 0;
virtual IEnumStoreDictionary& getEnumStoreDict() = 0;
virtual const IEnumStoreDictionary& getEnumStoreDict() const = 0;
@@ -94,7 +94,7 @@ public:
fixupRefCount(ti.getKey(), *hi);
}
assert(!ti.valid());
- freeUnusedEnums(false);
+ freeUnusedEnums();
}
};
diff --git a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
index a4c14256835..5c2510346c8 100644
--- a/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
+++ b/searchlib/src/vespa/searchlib/attribute/i_enum_store_dictionary.h
@@ -42,11 +42,9 @@ public:
virtual ssize_t deserialize(const void* src, size_t available, IndexVector& idx) = 0;
virtual void fixupRefCounts(const EnumVector& hist) = 0;
- virtual void freeUnusedEnums(const datastore::EntryComparator& cmp,
- const datastore::EntryComparator* fcmp) = 0;
+ virtual void freeUnusedEnums(const datastore::EntryComparator& cmp) = 0;
virtual void freeUnusedEnums(const IndexSet& toRemove,
- const datastore::EntryComparator& cmp,
- const datastore::EntryComparator* fcmp) = 0;
+ const datastore::EntryComparator& cmp) = 0;
virtual bool findIndex(const datastore::EntryComparator& cmp, Index& idx) const = 0;
virtual bool findFrozenIndex(const datastore::EntryComparator& cmp, Index& idx) const = 0;
virtual std::vector<attribute::IAttributeVector::EnumHandle>
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
index c8ee6ab50b2..e6e390ef3a9 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp
@@ -108,7 +108,7 @@ PostingListAttributeBase<P>::fillPostingsFixupEnumBase(const LoadedEnumAttribute
&postings._removals[0],
&postings._removals[0] + postings._removals.size());
posting_itr.writeData(newIndex);
- enumStore.freeUnusedEnums(false);
+ enumStore.freeUnusedEnums();
}
template <typename P>