diff options
4 files changed, 35 insertions, 10 deletions
diff --git a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp index 1c7b8b2b695..975a2918026 100644 --- a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp +++ b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp @@ -4,12 +4,16 @@ #include <vespa/searchlib/attribute/dfa_string_comparator.h> #include <vespa/vespalib/btree/btreeroot.h> #include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/text/lowercase.h> +#include <vespa/vespalib/text/utf8.h> #include <vespa/searchlib/attribute/enumstore.hpp> using namespace vespalib::btree; using vespalib::datastore::AtomicEntryRef; +using vespalib::LowerCase; +using vespalib::Utf8ReaderForZTS; namespace vespalib::datastore { @@ -18,6 +22,22 @@ std::ostream & operator << (std::ostream& os, const EntryRef& ref) { } } + +namespace { + +std::vector<uint32_t> as_utf32(const char* key) +{ + std::vector<uint32_t> result; + Utf8ReaderForZTS reader(key); + while (reader.hasMore()) { + uint32_t code_point = reader.getChar(); + result.push_back(code_point); + } + return result; +} + +} + namespace search { using NumericEnumStore = EnumStoreT<int32_t>; @@ -253,14 +273,16 @@ TEST(DfaStringComparatorTest, require_that_less_is_working) EnumIndex e1 = es.insert("Aa"); EnumIndex e2 = es.insert("aa"); EnumIndex e3 = es.insert("aB"); - DfaStringComparator cmp1(es.get_data_store(), "aa"); + auto aa_utf32 = as_utf32("aa"); + DfaStringComparator cmp1(es.get_data_store(), aa_utf32); EXPECT_FALSE(cmp1.less(EnumIndex(), e1)); EXPECT_FALSE(cmp1.less(EnumIndex(), e2)); EXPECT_TRUE(cmp1.less(EnumIndex(), e3)); EXPECT_FALSE(cmp1.less(e1, EnumIndex())); EXPECT_FALSE(cmp1.less(e2, EnumIndex())); EXPECT_FALSE(cmp1.less(e3, EnumIndex())); - DfaStringComparator cmp2(es.get_data_store(), "Aa"); + auto Aa_utf32 = as_utf32("Aa"); + DfaStringComparator cmp2(es.get_data_store(), Aa_utf32); EXPECT_TRUE(cmp2.less(EnumIndex(), e1)); EXPECT_TRUE(cmp2.less(EnumIndex(), e2)); EXPECT_TRUE(cmp2.less(EnumIndex(), e3)); diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h index 6b873020994..fcba13f85a4 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h @@ -17,7 +17,7 @@ namespace search::attribute { class DfaFuzzyMatcher { private: vespalib::fuzzy::LevenshteinDfa _dfa; - std::string _successor; + std::vector<uint32_t> _successor; public: DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type); @@ -29,7 +29,7 @@ public: if (match.matches()) { return true; } else { - DfaStringComparator cmp(data_store, _successor.c_str()); + DfaStringComparator cmp(data_store, _successor); itr.seek(vespalib::datastore::AtomicEntryRef(), cmp); return false; } diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp index ddbe4fd110f..e9710553ef1 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp +++ b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp @@ -5,8 +5,9 @@ namespace search::attribute { -DfaStringComparator::DfaStringComparator(const DataStoreType& data_store, const char* candidate) - : ParentType(data_store, candidate) +DfaStringComparator::DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate) + : ParentType(data_store), + _candidate(std::cref(candidate)) { } @@ -17,13 +18,13 @@ DfaStringComparator::less(const vespalib::datastore::EntryRef lhs, const vespali if (rhs.valid()) { return FoldedStringCompare::compareFolded<true, true>(get(lhs), get(rhs)) < 0; } else { - return FoldedStringCompare::compareFolded<true, false>(get(lhs), get(rhs)) < 0; + return FoldedStringCompare::compareFolded<true, false>(get(lhs), _candidate) < 0; } } else { if (rhs.valid()) { - return FoldedStringCompare::compareFolded<false, true>(get(lhs), get(rhs)) < 0; + return FoldedStringCompare::compareFolded<false, true>(_candidate, get(rhs)) < 0; } else { - return FoldedStringCompare::compareFolded<false, false>(get(lhs), get(rhs)) < 0; + return false; } } } diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h index 7ef14aa1719..8c80035c8fb 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h +++ b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h @@ -4,6 +4,7 @@ #include "i_enum_store.h" #include <vespa/vespalib/datastore/unique_store_string_comparator.h> +#include <functional> namespace search::attribute { @@ -24,9 +25,10 @@ public: using DataStoreType = ParentType::DataStoreType; private: using ParentType::get; + std::reference_wrapper<const std::vector<uint32_t>> _candidate; public: - DfaStringComparator(const DataStoreType& data_store, const char* candidate); + DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate); bool less(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const override; }; |