diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-04-20 07:34:50 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-04-20 07:34:50 +0000 |
commit | d5203746895c91c0597e76cb9a8d92957cdedd2d (patch) | |
tree | 527ea9b6e509bfa1d50a705046a7edb4ace0e1bd /searchlib | |
parent | 8ed4e70600e1c773f97a237c77697d61bbdcf409 (diff) |
Make folding a runtime argument rather than a compiletime desicion
as it can no longer be inferred from the type.
Diffstat (limited to 'searchlib')
6 files changed, 63 insertions, 119 deletions
diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp index 3e601e823c5..368de359c85 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp @@ -30,50 +30,48 @@ EnumStoreComparator<EntryT>::equal_helper(const EntryT& lhs, const EntryT& rhs) return vespalib::datastore::UniqueStoreComparatorHelper<EntryT>::equal(lhs, rhs); } -EnumStoreStringComparator::EnumStoreStringComparator(const DataStoreType& data_store) - : ParentType(data_store, nullptr) -{ -} - -EnumStoreStringComparator::EnumStoreStringComparator(const DataStoreType& data_store, const char* fallback_value) - : ParentType(data_store, fallback_value) +EnumStoreStringComparator::EnumStoreStringComparator(const DataStoreType& data_store, bool fold) + : ParentType(data_store, nullptr), + _fold(fold), + _prefix(false), + _prefix_len(0) { } -EnumStoreFoldedStringComparator::EnumStoreFoldedStringComparator(const DataStoreType& data_store) - : ParentType(data_store, nullptr), +EnumStoreStringComparator::EnumStoreStringComparator(const DataStoreType& data_store, bool fold, const char* fallback_value) + : ParentType(data_store, fallback_value), + _fold(fold), _prefix(false), - _prefix_len(0u) + _prefix_len(0) { } -EnumStoreFoldedStringComparator::EnumStoreFoldedStringComparator(const DataStoreType& data_store, - const char* fallback_value, bool prefix) +EnumStoreStringComparator::EnumStoreStringComparator(const DataStoreType& data_store, const char* fallback_value, bool prefix) : ParentType(data_store, fallback_value), + _fold(true), _prefix(prefix), - _prefix_len(0u) + _prefix_len(0) { if (use_prefix()) { _prefix_len = _strCmp.size(fallback_value); } } -int -EnumStoreStringComparator::compare(const char* lhs, const char* rhs) -{ - return _strCmp.compare(lhs, rhs); -} +bool +EnumStoreStringComparator::less(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const { + return _fold + ? (use_prefix() + ? (_strCmp.compareFoldedPrefix(get(lhs), get(rhs), _prefix_len) < 0) + : (_strCmp.compareFolded(get(lhs), get(rhs)) < 0)) + : (_strCmp.compare(get(lhs), get(rhs)) < 0); -int -EnumStoreFoldedStringComparator::compare_folded(const char* lhs, const char* rhs) -{ - return _strCmp.compareFolded(lhs, rhs); } -int -EnumStoreFoldedStringComparator::compare_folded_prefix(const char* lhs, const char* rhs, size_t prefix_len) -{ - return _strCmp.compareFoldedPrefix(lhs, rhs, prefix_len); +bool +EnumStoreStringComparator::equal(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const { + return _fold + ? (_strCmp.compareFolded(get(lhs), get(rhs)) == 0) + : (_strCmp.compare(get(lhs), get(rhs)) == 0); } template class EnumStoreComparator<int8_t>; diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.h b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h index 1e1b28d3881..018ec1b4ff4 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumcomparator.h +++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.h @@ -34,82 +34,32 @@ class EnumStoreStringComparator : public vespalib::datastore::UniqueStoreStringC protected: using ParentType = vespalib::datastore::UniqueStoreStringComparator<IEnumStore::InternalIndex>; using DataStoreType = ParentType::DataStoreType; +private: using ParentType::get; - static int compare(const char* lhs, const char* rhs); - public: - EnumStoreStringComparator(const DataStoreType& data_store); + EnumStoreStringComparator(const DataStoreType& data_store) + : EnumStoreStringComparator(data_store, false) + {} + EnumStoreStringComparator(const DataStoreType& data_store, bool fold); /** * Creates a comparator using the given low-level data store and that uses the * given value during compare if the enum index is invalid. */ - EnumStoreStringComparator(const DataStoreType& data_store, const char* fallback_value); - - static bool equal(const char* lhs, const char* rhs) { - return compare(lhs, rhs) == 0; - } - - bool less(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const override { - return compare(get(lhs), get(rhs)) < 0; - } - bool equal(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const override { - return compare(get(lhs), get(rhs)) == 0; - } -}; - + EnumStoreStringComparator(const DataStoreType& data_store, const char* fallback_value) + : EnumStoreStringComparator(data_store, false, fallback_value) + {} + EnumStoreStringComparator(const DataStoreType& data_store, bool fold, const char* fallback_value); + EnumStoreStringComparator(const DataStoreType& data_store, const char* fallback_value, bool prefix); -/** - * Less-than comparator used for folded-only comparing strings stored in an enum store. - * - * The input string values are first folded, then compared. - * There is NO fallback if they are equal. - */ -class EnumStoreFoldedStringComparator : public EnumStoreStringComparator { + bool less(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const override; + bool equal(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const override; private: - using ParentType = EnumStoreStringComparator; - - bool _prefix; - size_t _prefix_len; - inline bool use_prefix() const { return _prefix; } - static int compare_folded(const char* lhs, const char* rhs); - static int compare_folded_prefix(const char* lhs, const char* rhs, size_t prefix_len); - -public: - /** - * Creates a comparator using the given low-level data store. - * - * @param prefix whether we should perform prefix compare. - */ - EnumStoreFoldedStringComparator(const DataStoreType& data_store); - - /** - * Creates a comparator using the given low-level data store and that uses the - * given value during compare if the enum index is invalid. - * - * @param prefix whether we should perform prefix compare. - */ - EnumStoreFoldedStringComparator(const DataStoreType& data_store, - const char* fallback_value, bool prefix); - EnumStoreFoldedStringComparator(const DataStoreType& data_store, const char* fallback_value) - : EnumStoreFoldedStringComparator(data_store, fallback_value, false) - {} - - static bool equal(const char* lhs, const char* rhs) { - return compare_folded(lhs, rhs) == 0; - } - - bool less(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const override { - if (use_prefix()) { - return compare_folded_prefix(get(lhs), get(rhs), _prefix_len) < 0; - } - return compare_folded(get(lhs), get(rhs)) < 0; - } - bool equal(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const override { - return compare_folded(get(lhs), get(rhs)) == 0; - } + const bool _fold; + const bool _prefix; + uint32_t _prefix_len; }; extern template class EnumStoreComparator<int8_t>; diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.h b/searchlib/src/vespa/searchlib/attribute/enumstore.h index b95237fe176..ae674181824 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.h +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.h @@ -43,9 +43,7 @@ public: vespalib::datastore::UniqueStoreStringAllocator<InternalIndex>, vespalib::datastore::UniqueStoreAllocator<EntryT, InternalIndex>>; using UniqueStoreType = vespalib::datastore::UniqueStore<EntryT, InternalIndex, ComparatorType, AllocatorType>; - using FoldedComparatorType = std::conditional_t<std::is_same_v<EntryT, const char *>, - EnumStoreFoldedStringComparator, - ComparatorType>; + using EntryType = EntryT; using EnumStoreType = EnumStoreT<EntryT>; using EntryRef = vespalib::datastore::EntryRef; @@ -184,25 +182,13 @@ public: return ComparatorType(_store.get_data_store(), fallback_value); } - FoldedComparatorType make_folded_comparator() const { - return FoldedComparatorType(_store.get_data_store()); - } - - FoldedComparatorType - make_folded_comparator(const EntryType& fallback_value) const { - return FoldedComparatorType(_store.get_data_store(), fallback_value); - } - - template<bool prefix=true> - FoldedComparatorType - make_folded_comparator_prefix(const EntryType& fallback_value) const { - return FoldedComparatorType(_store.get_data_store(), fallback_value, prefix); + ComparatorType make_folded_comparator() const { + return ComparatorType(_store.get_data_store(), true); } void write_value(BufferWriter& writer, Index idx) const override; bool is_folded_change(Index idx1, Index idx2) const override; bool find_enum(EntryType value, IEnumStore::EnumHandle& e) const; - std::vector<IEnumStore::EnumHandle> find_folded_enums(EntryType value) const; Index insert(EntryType value); bool find_index(EntryType value, Index& idx) const; void free_unused_values() override; @@ -219,6 +205,24 @@ public: } std::unique_ptr<Enumerator> make_enumerator() const override; std::unique_ptr<vespalib::datastore::EntryComparator> allocate_comparator() const override; + + // Methods below are only relevant for strings, and are templated to only be instantiated on demand. + template <typename Type> + ComparatorType + make_folded_comparator(const Type& fallback_value) const { + return ComparatorType(_store.get_data_store(), true, fallback_value); + } + template<typename Type> + ComparatorType + make_folded_comparator_prefix(const Type& fallback_value) const { + return ComparatorType(_store.get_data_store(), fallback_value, true); + } + template<typename Type> + std::vector<IEnumStore::EnumHandle> + find_folded_enums(Type value) const { + auto cmp = make_folded_comparator(value); + return _dict->find_matching_enums(cmp); + } }; std::unique_ptr<vespalib::datastore::IUniqueStoreDictionary> diff --git a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp index 80209c99c4d..de5973dd4a1 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumstore.hpp +++ b/searchlib/src/vespa/searchlib/attribute/enumstore.hpp @@ -75,7 +75,7 @@ EnumStoreT<EntryT>::EnumStoreT(bool has_postings, const search::DictionaryConfig _store.set_dictionary(make_enum_store_dictionary(*this, has_postings, dict_cfg, std::make_unique<ComparatorType>(_store.get_data_store()), (has_string_type() ? - std::make_unique<FoldedComparatorType>(_store.get_data_store()) : + std::make_unique<ComparatorType>(_store.get_data_store(), true) : std::unique_ptr<vespalib::datastore::EntryComparator>()))); _dict = static_cast<IEnumStoreDictionary*>(&_store.get_dictionary()); } @@ -169,14 +169,6 @@ EnumStoreT<EntryT>::find_enum(EntryType value, IEnumStore::EnumHandle& e) const } template <typename EntryT> -std::vector<IEnumStore::EnumHandle> -EnumStoreT<EntryT>::find_folded_enums(EntryType value) const -{ - auto cmp = make_folded_comparator(value); - return _dict->find_matching_enums(cmp); -} - -template <typename EntryT> bool EnumStoreT<EntryT>::find_index(EntryType value, Index& idx) const { diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp index 9eaa33355e9..5b57b577926 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.cpp @@ -213,7 +213,7 @@ handle_load_posting_lists(LoadedVector& loaded) LoadedValueType prev = value.getValue(); for (size_t i(0), m(loaded.size()); i < m; i++, loaded.next()) { value = loaded.read(); - if (FoldedComparatorType::equal_helper(prev, value.getValue())) { + if (ComparatorType::equal_helper(prev, value.getValue())) { // for single value attributes loaded[numDocs] is used // for default value but we don't want to add an // invalid docId to the posting list. diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h index 8fb6d74319e..29c4846edd4 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistattribute.h @@ -84,7 +84,7 @@ public: using EntryRef = vespalib::datastore::EntryRef; using EnumIndex = IEnumStore::Index; using EnumStore = EnumStoreType; - using FoldedComparatorType = typename EnumStore::FoldedComparatorType; + using ComparatorType = typename EnumStore::ComparatorType; using LoadedEnumAttributeVector = attribute::LoadedEnumAttributeVector; using PostingList = typename Parent::PostingList; using PostingMap = typename Parent::PostingMap; |