diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-11-08 15:54:51 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-08 15:54:51 +0100 |
commit | 48115cfd8f41001d78621efc6bd5ff552307b03d (patch) | |
tree | 5735a9c51ffd9b78b01ae0089dac6ac84ee6c219 | |
parent | e7b517e5705538cd90a72920c3edb0d36abb274e (diff) | |
parent | d4554fcd2eadd6d38492b0f7ecb65c0f627ebf8b (diff) |
Merge pull request #29279 from vespa-engine/toregge/fix-dfa-string-comparator-for-cased-attributes
Fix DfaStringComparator for cased attributes.
4 files changed, 57 insertions, 10 deletions
diff --git a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp index f99a4c4b3f9..c578f823dfd 100644 --- a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp +++ b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp @@ -267,14 +267,14 @@ TEST(EnumComparatorTest, require_that_cased_less_is_working) EXPECT_EQ((EnumIndexVector{e1, e4, e3, e2}), vec); } -TEST(DfaStringComparatorTest, require_that_less_is_working) +TEST(DfaStringComparatorTest, require_that_folded_less_is_working) { StringEnumStore es(false, DictionaryConfig::Type::BTREE); EnumIndex e1 = es.insert("Aa"); EnumIndex e2 = es.insert("aa"); EnumIndex e3 = es.insert("aB"); auto aa_utf32 = as_utf32("aa"); - DfaStringComparator cmp1(es.get_data_store(), aa_utf32); + DfaStringComparator cmp1(es.get_data_store(), aa_utf32, false); EXPECT_FALSE(cmp1.less(EnumIndex(), e1)); EXPECT_FALSE(cmp1.less(EnumIndex(), e2)); EXPECT_TRUE(cmp1.less(EnumIndex(), e3)); @@ -282,7 +282,7 @@ TEST(DfaStringComparatorTest, require_that_less_is_working) EXPECT_FALSE(cmp1.less(e2, EnumIndex())); EXPECT_FALSE(cmp1.less(e3, EnumIndex())); auto Aa_utf32 = as_utf32("Aa"); - DfaStringComparator cmp2(es.get_data_store(), Aa_utf32); + DfaStringComparator cmp2(es.get_data_store(), Aa_utf32, false); EXPECT_TRUE(cmp2.less(EnumIndex(), e1)); EXPECT_TRUE(cmp2.less(EnumIndex(), e2)); EXPECT_TRUE(cmp2.less(EnumIndex(), e3)); @@ -291,6 +291,39 @@ TEST(DfaStringComparatorTest, require_that_less_is_working) EXPECT_FALSE(cmp2.less(e3, EnumIndex())); } +TEST(DfaStringComparatorTest, require_that_cased_less_is_working) +{ + StringEnumStore es(false, DictionaryConfig(DictionaryConfig::Type::BTREE, DictionaryConfig::Match::CASED)); + auto e1 = es.insert("Aa"); + auto e2 = es.insert("aa"); + auto e3 = es.insert("aB"); + auto uaa_utf32 = as_utf32("Aa"); + auto aa_utf32 = as_utf32("aa"); + DfaStringComparator cmp1(es.get_data_store(), uaa_utf32, true); + DfaStringComparator cmp2(es.get_data_store(), aa_utf32, true); + EXPECT_FALSE(cmp1.less(e1, e1)); + EXPECT_TRUE(cmp1.less(e1, e2)); + EXPECT_TRUE(cmp1.less(e1, e3)); + EXPECT_FALSE(cmp1.less(e2, e1)); + EXPECT_FALSE(cmp1.less(e2, e2)); + EXPECT_FALSE(cmp1.less(e2, e3)); + EXPECT_FALSE(cmp1.less(e3, e1)); + EXPECT_TRUE(cmp1.less(e3, e2)); + EXPECT_FALSE(cmp1.less(e3, e3)); + EXPECT_FALSE(cmp1.less(EnumIndex(), e1)); + EXPECT_TRUE(cmp1.less(EnumIndex(), e2)); + EXPECT_TRUE(cmp1.less(EnumIndex(), e3)); + EXPECT_FALSE(cmp2.less(EnumIndex(), e1)); + EXPECT_FALSE(cmp2.less(EnumIndex(), e2)); + EXPECT_FALSE(cmp2.less(EnumIndex(), e3)); + EXPECT_FALSE(cmp1.less(e1, EnumIndex())); + EXPECT_FALSE(cmp1.less(e2, EnumIndex())); + EXPECT_FALSE(cmp1.less(e3, EnumIndex())); + EXPECT_TRUE(cmp2.less(e1, EnumIndex())); + EXPECT_FALSE(cmp2.less(e2, EnumIndex())); + EXPECT_TRUE(cmp2.less(e3, EnumIndex())); +} + } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h index a6467cfc91d..9de77085fae 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h @@ -62,7 +62,7 @@ public: return true; } } - DfaStringComparator cmp(data_store, _successor); + DfaStringComparator cmp(data_store, _successor, _cased); itr.seek(vespalib::datastore::AtomicEntryRef(), cmp); return false; } diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp index 762ad1e9042..9204510c0d2 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp +++ b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp @@ -5,9 +5,10 @@ namespace search::attribute { -DfaStringComparator::DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate) +DfaStringComparator::DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate, bool cased) : ParentType(data_store), - _candidate(std::cref(candidate)) + _candidate(std::cref(candidate)), + _cased(cased) { } @@ -16,13 +17,25 @@ DfaStringComparator::less(const vespalib::datastore::EntryRef lhs, const vespali { if (lhs.valid()) { if (rhs.valid()) { - return FoldedStringCompare::compareFolded<true, true>(get(lhs), get(rhs)) < 0; + if (_cased) { + return FoldedStringCompare::compareFolded<false, false>(get(lhs), get(rhs)) < 0; + } else { + return FoldedStringCompare::compareFolded<true, true>(get(lhs), get(rhs)) < 0; + } } else { - return FoldedStringCompare::compareFolded<true, false>(get(lhs), _candidate) < 0; + if (_cased) { + return FoldedStringCompare::compareFolded<false, false>(get(lhs), _candidate) < 0; + } else { + return FoldedStringCompare::compareFolded<true, false>(get(lhs), _candidate) < 0; + } } } else { if (rhs.valid()) { - return FoldedStringCompare::compareFolded<false, true>(_candidate, get(rhs)) < 0; + if (_cased) { + return FoldedStringCompare::compareFolded<false, false>(_candidate, get(rhs)) < 0; + } else { + return FoldedStringCompare::compareFolded<false, true>(_candidate, get(rhs)) < 0; + } } else { return false; } diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h index f9eaf281ff7..13a337f565c 100644 --- a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h +++ b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h @@ -26,9 +26,10 @@ public: private: using ParentType::get; std::reference_wrapper<const std::vector<uint32_t>> _candidate; + bool _cased; public: - DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate); + DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate, bool cased); bool less(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const override; }; |