summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-09-15 11:44:42 +0200
committerTor Egge <Tor.Egge@online.no>2023-09-15 11:44:42 +0200
commit335116ff44844c2130f1e026a9f36631176e107b (patch)
tree7be16f0dd2dbb0da54e139805db598e93d6003f0 /searchlib
parenta4360988d590db4a568b71bd6d2bf7f5c81a5a54 (diff)
Control folding for FoldedStringCompare::comareFolded using template
arguments.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/util/folded_string_compare/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp130
-rw-r--r--searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp38
-rw-r--r--searchlib/src/vespa/searchlib/util/foldedstringcompare.h34
6 files changed, 190 insertions, 28 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index c0fdf03d262..b83c5912680 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -240,6 +240,7 @@ vespa_define_module(
src/tests/url
src/tests/util
src/tests/util/bufferwriter
+ src/tests/util/folded_string_compare
src/tests/util/searchable_stats
src/tests/util/slime_output_raw_buf_adapter
src/tests/vespa-fileheader-inspect
diff --git a/searchlib/src/tests/util/folded_string_compare/CMakeLists.txt b/searchlib/src/tests/util/folded_string_compare/CMakeLists.txt
new file mode 100644
index 00000000000..54058941c3a
--- /dev/null
+++ b/searchlib/src/tests/util/folded_string_compare/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_folded_string_compare_test_app TEST
+ SOURCES
+ folded_string_compare_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_folded_string_compare_test_app COMMAND searchlib_folded_string_compare_test_app)
diff --git a/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp
new file mode 100644
index 00000000000..06f687330a3
--- /dev/null
+++ b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp
@@ -0,0 +1,130 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/util/foldedstringcompare.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/stllike/string.h>
+
+using search::FoldedStringCompare;
+
+using IntVec = std::vector<int>;
+using StringVec = std::vector<vespalib::string>;
+
+class FoldedStringCompareTest : public ::testing::Test
+{
+protected:
+ FoldedStringCompareTest();
+ ~FoldedStringCompareTest() override;
+
+ static int normalize_ret(int ret) {
+ return (ret == 0) ? 0 : ((ret < 0) ? -1 : 1);
+ }
+
+ template <bool fold_lhs, bool fold_rhs>
+ int
+ compare_folded_helper(const vespalib::string& lhs, const vespalib::string& rhs)
+ {
+ int ret = FoldedStringCompare::compareFolded<fold_lhs, fold_rhs>(lhs.c_str(), rhs.c_str());
+ EXPECT_EQ(-ret, (FoldedStringCompare::compareFolded<fold_rhs, fold_lhs>(rhs.c_str(), lhs.c_str())));
+ return ret;
+ }
+
+ IntVec
+ compare_folded(const vespalib::string& lhs, const vespalib::string& rhs)
+ {
+ IntVec result;
+ result.emplace_back(compare_folded_helper<false, false>(lhs, rhs));
+ result.emplace_back(compare_folded_helper<false, true>(lhs, rhs));
+ result.emplace_back(compare_folded_helper<true, false>(lhs, rhs));
+ result.emplace_back(compare_folded_helper<true, true>(lhs, rhs));
+ return result;
+ }
+
+ template <bool fold_lhs, bool fold_rhs>
+ int
+ compare_folded_prefix_helper(const vespalib::string& lhs, const vespalib::string& rhs, size_t prefix_len)
+ {
+ int ret = FoldedStringCompare::compareFoldedPrefix<fold_lhs, fold_rhs>(lhs.c_str(), rhs.c_str(), prefix_len);
+ EXPECT_EQ(-ret, (FoldedStringCompare::compareFoldedPrefix<fold_rhs, fold_lhs>(rhs.c_str(), lhs.c_str(), prefix_len)));
+ return ret;
+ }
+
+ IntVec
+ compare_folded_prefix(const vespalib::string& lhs, const vespalib::string& rhs, size_t prefix_len)
+ {
+ IntVec result;
+ result.emplace_back(compare_folded_prefix_helper<false, false>(lhs, rhs, prefix_len));
+ result.emplace_back(compare_folded_prefix_helper<false, true>(lhs, rhs, prefix_len));
+ result.emplace_back(compare_folded_prefix_helper<true, false>(lhs, rhs, prefix_len));
+ result.emplace_back(compare_folded_prefix_helper<true, true>(lhs, rhs, prefix_len));
+ return result;
+ }
+
+ int
+ compare(const vespalib::string& lhs, const vespalib::string& rhs) {
+ int ret = normalize_ret(FoldedStringCompare::compare(lhs.c_str(), rhs.c_str()));
+ EXPECT_EQ(-ret, normalize_ret(FoldedStringCompare::compare(rhs.c_str(), lhs.c_str())));
+ return ret;
+ }
+
+ int
+ compare_prefix(const vespalib::string& lhs, const vespalib::string& rhs, size_t prefix_len) {
+ int ret = normalize_ret(FoldedStringCompare::comparePrefix(lhs.c_str(), rhs.c_str(), prefix_len));
+ EXPECT_EQ(-ret, normalize_ret(FoldedStringCompare::comparePrefix(rhs.c_str(), lhs.c_str(), prefix_len)));
+ return ret;
+ }
+};
+
+FoldedStringCompareTest::FoldedStringCompareTest()
+ : ::testing::Test()
+{
+}
+
+FoldedStringCompareTest::~FoldedStringCompareTest() = default;
+
+TEST_F(FoldedStringCompareTest, compare_folded)
+{
+ EXPECT_EQ((IntVec{0, 0, 0, 0}), compare_folded("bar", "bar"));
+ EXPECT_EQ((IntVec{1, 0, 1, 0}), compare_folded("bar", "BAR"));
+ EXPECT_EQ((IntVec{-1, -1, 0, 0}), compare_folded("BAR", "bar"));
+ EXPECT_EQ((IntVec{0, -1, 1, 0}), compare_folded("BAR", "BAR"));
+ EXPECT_EQ((IntVec{1, -1, 1, -1}), compare_folded("bar", "FOO"));
+ EXPECT_EQ((IntVec{-1, -1, -1, -1}), compare_folded("BAR", "foo"));
+}
+
+TEST_F(FoldedStringCompareTest, compare_folded_prefix)
+{
+ EXPECT_EQ((IntVec{0, 0, 0, 0}), compare_folded_prefix("bar", "bar", 100));
+ EXPECT_EQ((IntVec{1, 0, 1, 0}), compare_folded_prefix("bar", "BAR", 100));
+ EXPECT_EQ((IntVec{-1, -1, 0, 0}), compare_folded_prefix("BAR", "bar", 100));
+ EXPECT_EQ((IntVec{0, -1, 1, 0}), compare_folded_prefix("BAR", "BAR", 100));
+ EXPECT_EQ((IntVec{1, -1, 1, -1}), compare_folded_prefix("bar", "FOO", 100));
+ EXPECT_EQ((IntVec{-1, -1, -1, -1}), compare_folded_prefix("BAR", "foo", 100));
+ EXPECT_EQ((IntVec{1, 0, 1, 0}), compare_folded_prefix("ba", "BAR", 2));
+ EXPECT_EQ((IntVec{-1, -1, 0, 0}), compare_folded_prefix("BA", "bar", 2));
+ EXPECT_EQ((IntVec{1, -1, 1, -1}), compare_folded_prefix("ba", "FOO", 2));
+ EXPECT_EQ((IntVec{-1, -1, -1, -1}), compare_folded_prefix("BA", "foo", 2));
+}
+
+TEST_F(FoldedStringCompareTest, compare)
+{
+ EXPECT_EQ(0, compare("bar", "bar"));
+ EXPECT_EQ(1, compare("bar", "BAR"));
+ EXPECT_EQ(0, compare("BAR", "BAR"));
+ EXPECT_EQ(1, compare("FOO", "bar"));
+ EXPECT_EQ(-1, compare("BAR", "foo"));
+ StringVec words{"foo", "FOO", "bar", "BAR"};
+ std::sort(words.begin(), words.end(), [this](auto& lhs, auto& rhs) { return compare(lhs, rhs) < 0; });
+ EXPECT_EQ((StringVec{"BAR", "bar", "FOO", "foo"}), words);
+}
+
+TEST_F(FoldedStringCompareTest, compare_prefix)
+{
+ EXPECT_EQ(1, compare_prefix("ba", "BAR", 2));
+ EXPECT_EQ(-1, compare_prefix("BA", "bar", 2));
+ EXPECT_EQ(-1, compare_prefix("ba", "FOO", 2));
+ EXPECT_EQ(-1, compare_prefix("BA", "foo", 2));
+ // Verify that we don't mix number of bytes versus number of code points
+ EXPECT_EQ(1, compare_prefix("å", "Å", 1));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp
index 5a5702f50e8..24b562c5ec4 100644
--- a/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp
@@ -43,8 +43,8 @@ bool
EnumStoreStringComparator::less(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const {
return _fold
? (use_prefix()
- ? (FoldedStringCompare::compareFoldedPrefix(get(lhs), get(rhs), _prefix_len) < 0)
- : (FoldedStringCompare::compareFolded(get(lhs), get(rhs)) < 0))
+ ? (FoldedStringCompare::compareFoldedPrefix<true, true>(get(lhs), get(rhs), _prefix_len) < 0)
+ : (FoldedStringCompare::compareFolded<true, true>(get(lhs), get(rhs)) < 0))
: (use_prefix()
? (FoldedStringCompare::comparePrefix(get(lhs), get(rhs), _prefix_len) < 0)
: (FoldedStringCompare::compare(get(lhs), get(rhs)) < 0));
@@ -54,7 +54,7 @@ EnumStoreStringComparator::less(const vespalib::datastore::EntryRef lhs, const v
bool
EnumStoreStringComparator::equal(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const {
return _fold
- ? (FoldedStringCompare::compareFolded(get(lhs), get(rhs)) == 0)
+ ? (FoldedStringCompare::compareFolded<true, true>(get(lhs), get(rhs)) == 0)
: (FoldedStringCompare::compare(get(lhs), get(rhs)) == 0);
}
diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp
index 86da51fc9b6..a61a12cebf6 100644
--- a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp
+++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp
@@ -15,6 +15,7 @@ size(const char *key)
return Utf8ReaderForZTS::countChars(key);
}
+template <bool fold_lhs, bool fold_rhs>
int
FoldedStringCompare::
compareFolded(const char *key, const char *okey)
@@ -23,8 +24,8 @@ compareFolded(const char *key, const char *okey)
Utf8ReaderForZTS oreader(okey);
for (;;) {
- uint32_t kval = LowerCase::convert(kreader.getChar());
- uint32_t oval = LowerCase::convert(oreader.getChar());
+ uint32_t kval = fold_lhs ? LowerCase::convert(kreader.getChar()) : kreader.getChar();
+ uint32_t oval = fold_rhs ? LowerCase::convert(oreader.getChar()) : oreader.getChar();
if (kval != oval) {
if (kval < oval) {
@@ -40,6 +41,7 @@ compareFolded(const char *key, const char *okey)
}
+template <bool fold_lhs, bool fold_rhs>
int
FoldedStringCompare::
compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen)
@@ -48,8 +50,8 @@ compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen)
Utf8ReaderForZTS oreader(okey);
for (size_t j = 0; j < prefixLen; ++j ) {
- uint32_t kval = LowerCase::convert(kreader.getChar());
- uint32_t oval = LowerCase::convert(oreader.getChar());
+ uint32_t kval = fold_lhs ? LowerCase::convert(kreader.getChar()) : kreader.getChar();
+ uint32_t oval = fold_rhs ? LowerCase::convert(oreader.getChar()) : oreader.getChar();
if (kval != oval) {
if (kval < oval) {
@@ -58,7 +60,9 @@ compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen)
return 1;
}
}
- if (kval == 0) return 0;
+ if (kval == 0) {
+ return 0;
+ }
}
// reached end of prefix
return 0;
@@ -68,9 +72,11 @@ int
FoldedStringCompare::
comparePrefix(const char *key, const char *okey, size_t prefixLen)
{
- int res = compareFoldedPrefix(key, okey, prefixLen);
- if (res != 0) return res;
- return strncmp(key, okey, prefixLen);
+ int res = compareFoldedPrefix<true, true>(key, okey, prefixLen);
+ if (res != 0) {
+ return res;
+ }
+ return compareFoldedPrefix<false, false>(key, okey, prefixLen);
}
@@ -78,10 +84,22 @@ int
FoldedStringCompare::
compare(const char *key, const char *okey)
{
- int res = compareFolded(key, okey);
- if (res != 0) return res;
+ int res = compareFolded<true, true>(key, okey);
+ if (res != 0) {
+ return res;
+ }
return strcmp(key, okey);
}
+template int FoldedStringCompare::compareFolded<false, false>(const char* key, const char* okey);
+template int FoldedStringCompare::compareFolded<false, true>(const char* key, const char* okey);
+template int FoldedStringCompare::compareFolded<true, false>(const char* key, const char* okey);
+template int FoldedStringCompare::compareFolded<true, true>(const char* key, const char* okey);
+
+template int FoldedStringCompare::compareFoldedPrefix<false, false>(const char* key, const char* okey, size_t prefixLen);
+template int FoldedStringCompare::compareFoldedPrefix<false, true>(const char* key, const char* okey, size_t prefixLen);
+template int FoldedStringCompare::compareFoldedPrefix<true, false>(const char* key, const char* okey, size_t prefixLen);
+template int FoldedStringCompare::compareFoldedPrefix<true, true>(const char* key, const char* okey, size_t prefixLen);
+
} // namespace search
diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h
index 30e7ec93345..fb842e190e2 100644
--- a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h
+++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h
@@ -10,50 +10,54 @@ class FoldedStringCompare
{
public:
/**
- * count number of UCS-4 characters in utf8 string
+ * count number of UCS-4 characters in UTF-8 string
*
- * @param key NUL terminated utf8 string
- * @return integer number of symbols in utf8 string before NUL
+ * @param key NUL terminated UTF-8 string
+ * @return integer number of symbols in UTF-8 string before NUL
*/
static size_t size(const char *key);
/**
- * Compare utf8 key with utf8 other key after folding both
+ * Compare UTF-8 key with UTF-8 other key after folding both
*
- * @param key NUL terminated utf8 string
- * @param okey NUL terminated utf8 string
+ * @param key NUL terminated UTF-8 string
+ * @param okey NUL terminated UTF-8 string
* @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey
**/
+ template <bool fold_lhs, bool fold_rhs>
static int compareFolded(const char *key, const char *okey);
/**
- * Compare utf8 key with utf8 other key after folding both.
+ * Compare UTF-8 key with UTF-8 other key after folding both.
*
- * @param key NUL terminated utf8 string
- * @param okey NUL terminated utf8 string
+ * @param key NUL terminated UTF-8 string
+ * @param okey NUL terminated UTF-8 string
* @param prefixLen max number of symbols to compare before
* considering keys identical.
*
* @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey
*/
+ template <bool fold_lhs, bool fold_rhs>
static int compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen);
/*
- * Compare utf8 key with utf8 other key after folding both, if
+ * Compare UTF-8 key with UTF-8 other key after folding both, if
* they seem equal then fall back to comparing without folding.
*
- * @param key NUL terminated utf8 string
- * @param okey NUL terminated utf8 string
+ * @param key NUL terminated UTF-8 string
+ * @param okey NUL terminated UTF-8 string
* @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey
*/
static int compare(const char *key, const char *okey);
/*
- * Compare utf8 key with utf8 other key after folding both for prefix, if
+ * Compare UTF-8 key with UTF-8 other key after folding both for prefix, if
* they seem equal then fall back to comparing without folding.
*
- * @param key NUL terminated utf8 string
- * @param okey NUL terminated utf8 string
+ * @param key NUL terminated UTF-8 string
+ * @param okey NUL terminated UTF-8 string
+ * @param prefixLen max number of symbols to compare before
+ * considering keys identical.
* @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey
*/
static int comparePrefix(const char *key, const char *okey, size_t prefixLen);