aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-11-08 15:54:51 +0100
committerGitHub <noreply@github.com>2023-11-08 15:54:51 +0100
commit48115cfd8f41001d78621efc6bd5ff552307b03d (patch)
tree5735a9c51ffd9b78b01ae0089dac6ac84ee6c219
parente7b517e5705538cd90a72920c3edb0d36abb274e (diff)
parentd4554fcd2eadd6d38492b0f7ecb65c0f627ebf8b (diff)
Merge pull request #29279 from vespa-engine/toregge/fix-dfa-string-comparator-for-cased-attributes
Fix DfaStringComparator for cased attributes.
-rw-r--r--searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp39
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp23
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h3
4 files changed, 57 insertions, 10 deletions
diff --git a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp
index f99a4c4b3f9..c578f823dfd 100644
--- a/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp
+++ b/searchlib/src/tests/attribute/enum_comparator/enum_comparator_test.cpp
@@ -267,14 +267,14 @@ TEST(EnumComparatorTest, require_that_cased_less_is_working)
EXPECT_EQ((EnumIndexVector{e1, e4, e3, e2}), vec);
}
-TEST(DfaStringComparatorTest, require_that_less_is_working)
+TEST(DfaStringComparatorTest, require_that_folded_less_is_working)
{
StringEnumStore es(false, DictionaryConfig::Type::BTREE);
EnumIndex e1 = es.insert("Aa");
EnumIndex e2 = es.insert("aa");
EnumIndex e3 = es.insert("aB");
auto aa_utf32 = as_utf32("aa");
- DfaStringComparator cmp1(es.get_data_store(), aa_utf32);
+ DfaStringComparator cmp1(es.get_data_store(), aa_utf32, false);
EXPECT_FALSE(cmp1.less(EnumIndex(), e1));
EXPECT_FALSE(cmp1.less(EnumIndex(), e2));
EXPECT_TRUE(cmp1.less(EnumIndex(), e3));
@@ -282,7 +282,7 @@ TEST(DfaStringComparatorTest, require_that_less_is_working)
EXPECT_FALSE(cmp1.less(e2, EnumIndex()));
EXPECT_FALSE(cmp1.less(e3, EnumIndex()));
auto Aa_utf32 = as_utf32("Aa");
- DfaStringComparator cmp2(es.get_data_store(), Aa_utf32);
+ DfaStringComparator cmp2(es.get_data_store(), Aa_utf32, false);
EXPECT_TRUE(cmp2.less(EnumIndex(), e1));
EXPECT_TRUE(cmp2.less(EnumIndex(), e2));
EXPECT_TRUE(cmp2.less(EnumIndex(), e3));
@@ -291,6 +291,39 @@ TEST(DfaStringComparatorTest, require_that_less_is_working)
EXPECT_FALSE(cmp2.less(e3, EnumIndex()));
}
+TEST(DfaStringComparatorTest, require_that_cased_less_is_working)
+{
+ StringEnumStore es(false, DictionaryConfig(DictionaryConfig::Type::BTREE, DictionaryConfig::Match::CASED));
+ auto e1 = es.insert("Aa");
+ auto e2 = es.insert("aa");
+ auto e3 = es.insert("aB");
+ auto uaa_utf32 = as_utf32("Aa");
+ auto aa_utf32 = as_utf32("aa");
+ DfaStringComparator cmp1(es.get_data_store(), uaa_utf32, true);
+ DfaStringComparator cmp2(es.get_data_store(), aa_utf32, true);
+ EXPECT_FALSE(cmp1.less(e1, e1));
+ EXPECT_TRUE(cmp1.less(e1, e2));
+ EXPECT_TRUE(cmp1.less(e1, e3));
+ EXPECT_FALSE(cmp1.less(e2, e1));
+ EXPECT_FALSE(cmp1.less(e2, e2));
+ EXPECT_FALSE(cmp1.less(e2, e3));
+ EXPECT_FALSE(cmp1.less(e3, e1));
+ EXPECT_TRUE(cmp1.less(e3, e2));
+ EXPECT_FALSE(cmp1.less(e3, e3));
+ EXPECT_FALSE(cmp1.less(EnumIndex(), e1));
+ EXPECT_TRUE(cmp1.less(EnumIndex(), e2));
+ EXPECT_TRUE(cmp1.less(EnumIndex(), e3));
+ EXPECT_FALSE(cmp2.less(EnumIndex(), e1));
+ EXPECT_FALSE(cmp2.less(EnumIndex(), e2));
+ EXPECT_FALSE(cmp2.less(EnumIndex(), e3));
+ EXPECT_FALSE(cmp1.less(e1, EnumIndex()));
+ EXPECT_FALSE(cmp1.less(e2, EnumIndex()));
+ EXPECT_FALSE(cmp1.less(e3, EnumIndex()));
+ EXPECT_TRUE(cmp2.less(e1, EnumIndex()));
+ EXPECT_FALSE(cmp2.less(e2, EnumIndex()));
+ EXPECT_TRUE(cmp2.less(e3, EnumIndex()));
+}
+
}
GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
index a6467cfc91d..9de77085fae 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
@@ -62,7 +62,7 @@ public:
return true;
}
}
- DfaStringComparator cmp(data_store, _successor);
+ DfaStringComparator cmp(data_store, _successor, _cased);
itr.seek(vespalib::datastore::AtomicEntryRef(), cmp);
return false;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp
index 762ad1e9042..9204510c0d2 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.cpp
@@ -5,9 +5,10 @@
namespace search::attribute {
-DfaStringComparator::DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate)
+DfaStringComparator::DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate, bool cased)
: ParentType(data_store),
- _candidate(std::cref(candidate))
+ _candidate(std::cref(candidate)),
+ _cased(cased)
{
}
@@ -16,13 +17,25 @@ DfaStringComparator::less(const vespalib::datastore::EntryRef lhs, const vespali
{
if (lhs.valid()) {
if (rhs.valid()) {
- return FoldedStringCompare::compareFolded<true, true>(get(lhs), get(rhs)) < 0;
+ if (_cased) {
+ return FoldedStringCompare::compareFolded<false, false>(get(lhs), get(rhs)) < 0;
+ } else {
+ return FoldedStringCompare::compareFolded<true, true>(get(lhs), get(rhs)) < 0;
+ }
} else {
- return FoldedStringCompare::compareFolded<true, false>(get(lhs), _candidate) < 0;
+ if (_cased) {
+ return FoldedStringCompare::compareFolded<false, false>(get(lhs), _candidate) < 0;
+ } else {
+ return FoldedStringCompare::compareFolded<true, false>(get(lhs), _candidate) < 0;
+ }
}
} else {
if (rhs.valid()) {
- return FoldedStringCompare::compareFolded<false, true>(_candidate, get(rhs)) < 0;
+ if (_cased) {
+ return FoldedStringCompare::compareFolded<false, false>(_candidate, get(rhs)) < 0;
+ } else {
+ return FoldedStringCompare::compareFolded<false, true>(_candidate, get(rhs)) < 0;
+ }
} else {
return false;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h
index f9eaf281ff7..13a337f565c 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_string_comparator.h
@@ -26,9 +26,10 @@ public:
private:
using ParentType::get;
std::reference_wrapper<const std::vector<uint32_t>> _candidate;
+ bool _cased;
public:
- DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate);
+ DfaStringComparator(const DataStoreType& data_store, const std::vector<uint32_t>& candidate, bool cased);
bool less(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const override;
};