diff options
Diffstat (limited to 'searchlib')
6 files changed, 190 insertions, 28 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index c0fdf03d262..b83c5912680 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -240,6 +240,7 @@ vespa_define_module( src/tests/url src/tests/util src/tests/util/bufferwriter + src/tests/util/folded_string_compare src/tests/util/searchable_stats src/tests/util/slime_output_raw_buf_adapter src/tests/vespa-fileheader-inspect diff --git a/searchlib/src/tests/util/folded_string_compare/CMakeLists.txt b/searchlib/src/tests/util/folded_string_compare/CMakeLists.txt new file mode 100644 index 00000000000..54058941c3a --- /dev/null +++ b/searchlib/src/tests/util/folded_string_compare/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_folded_string_compare_test_app TEST + SOURCES + folded_string_compare_test.cpp + DEPENDS + searchlib + GTest::GTest +) +vespa_add_test(NAME searchlib_folded_string_compare_test_app COMMAND searchlib_folded_string_compare_test_app) diff --git a/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp new file mode 100644 index 00000000000..06f687330a3 --- /dev/null +++ b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp @@ -0,0 +1,130 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/util/foldedstringcompare.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/stllike/string.h> + +using search::FoldedStringCompare; + +using IntVec = std::vector<int>; +using StringVec = std::vector<vespalib::string>; + +class FoldedStringCompareTest : public ::testing::Test +{ +protected: + FoldedStringCompareTest(); + ~FoldedStringCompareTest() override; + + static int normalize_ret(int ret) { + return (ret == 0) ? 0 : ((ret < 0) ? -1 : 1); + } + + template <bool fold_lhs, bool fold_rhs> + int + compare_folded_helper(const vespalib::string& lhs, const vespalib::string& rhs) + { + int ret = FoldedStringCompare::compareFolded<fold_lhs, fold_rhs>(lhs.c_str(), rhs.c_str()); + EXPECT_EQ(-ret, (FoldedStringCompare::compareFolded<fold_rhs, fold_lhs>(rhs.c_str(), lhs.c_str()))); + return ret; + } + + IntVec + compare_folded(const vespalib::string& lhs, const vespalib::string& rhs) + { + IntVec result; + result.emplace_back(compare_folded_helper<false, false>(lhs, rhs)); + result.emplace_back(compare_folded_helper<false, true>(lhs, rhs)); + result.emplace_back(compare_folded_helper<true, false>(lhs, rhs)); + result.emplace_back(compare_folded_helper<true, true>(lhs, rhs)); + return result; + } + + template <bool fold_lhs, bool fold_rhs> + int + compare_folded_prefix_helper(const vespalib::string& lhs, const vespalib::string& rhs, size_t prefix_len) + { + int ret = FoldedStringCompare::compareFoldedPrefix<fold_lhs, fold_rhs>(lhs.c_str(), rhs.c_str(), prefix_len); + EXPECT_EQ(-ret, (FoldedStringCompare::compareFoldedPrefix<fold_rhs, fold_lhs>(rhs.c_str(), lhs.c_str(), prefix_len))); + return ret; + } + + IntVec + compare_folded_prefix(const vespalib::string& lhs, const vespalib::string& rhs, size_t prefix_len) + { + IntVec result; + result.emplace_back(compare_folded_prefix_helper<false, false>(lhs, rhs, prefix_len)); + result.emplace_back(compare_folded_prefix_helper<false, true>(lhs, rhs, prefix_len)); + result.emplace_back(compare_folded_prefix_helper<true, false>(lhs, rhs, prefix_len)); + result.emplace_back(compare_folded_prefix_helper<true, true>(lhs, rhs, prefix_len)); + return result; + } + + int + compare(const vespalib::string& lhs, const vespalib::string& rhs) { + int ret = normalize_ret(FoldedStringCompare::compare(lhs.c_str(), rhs.c_str())); + EXPECT_EQ(-ret, normalize_ret(FoldedStringCompare::compare(rhs.c_str(), lhs.c_str()))); + return ret; + } + + int + compare_prefix(const vespalib::string& lhs, const vespalib::string& rhs, size_t prefix_len) { + int ret = normalize_ret(FoldedStringCompare::comparePrefix(lhs.c_str(), rhs.c_str(), prefix_len)); + EXPECT_EQ(-ret, normalize_ret(FoldedStringCompare::comparePrefix(rhs.c_str(), lhs.c_str(), prefix_len))); + return ret; + } +}; + +FoldedStringCompareTest::FoldedStringCompareTest() + : ::testing::Test() +{ +} + +FoldedStringCompareTest::~FoldedStringCompareTest() = default; + +TEST_F(FoldedStringCompareTest, compare_folded) +{ + EXPECT_EQ((IntVec{0, 0, 0, 0}), compare_folded("bar", "bar")); + EXPECT_EQ((IntVec{1, 0, 1, 0}), compare_folded("bar", "BAR")); + EXPECT_EQ((IntVec{-1, -1, 0, 0}), compare_folded("BAR", "bar")); + EXPECT_EQ((IntVec{0, -1, 1, 0}), compare_folded("BAR", "BAR")); + EXPECT_EQ((IntVec{1, -1, 1, -1}), compare_folded("bar", "FOO")); + EXPECT_EQ((IntVec{-1, -1, -1, -1}), compare_folded("BAR", "foo")); +} + +TEST_F(FoldedStringCompareTest, compare_folded_prefix) +{ + EXPECT_EQ((IntVec{0, 0, 0, 0}), compare_folded_prefix("bar", "bar", 100)); + EXPECT_EQ((IntVec{1, 0, 1, 0}), compare_folded_prefix("bar", "BAR", 100)); + EXPECT_EQ((IntVec{-1, -1, 0, 0}), compare_folded_prefix("BAR", "bar", 100)); + EXPECT_EQ((IntVec{0, -1, 1, 0}), compare_folded_prefix("BAR", "BAR", 100)); + EXPECT_EQ((IntVec{1, -1, 1, -1}), compare_folded_prefix("bar", "FOO", 100)); + EXPECT_EQ((IntVec{-1, -1, -1, -1}), compare_folded_prefix("BAR", "foo", 100)); + EXPECT_EQ((IntVec{1, 0, 1, 0}), compare_folded_prefix("ba", "BAR", 2)); + EXPECT_EQ((IntVec{-1, -1, 0, 0}), compare_folded_prefix("BA", "bar", 2)); + EXPECT_EQ((IntVec{1, -1, 1, -1}), compare_folded_prefix("ba", "FOO", 2)); + EXPECT_EQ((IntVec{-1, -1, -1, -1}), compare_folded_prefix("BA", "foo", 2)); +} + +TEST_F(FoldedStringCompareTest, compare) +{ + EXPECT_EQ(0, compare("bar", "bar")); + EXPECT_EQ(1, compare("bar", "BAR")); + EXPECT_EQ(0, compare("BAR", "BAR")); + EXPECT_EQ(1, compare("FOO", "bar")); + EXPECT_EQ(-1, compare("BAR", "foo")); + StringVec words{"foo", "FOO", "bar", "BAR"}; + std::sort(words.begin(), words.end(), [this](auto& lhs, auto& rhs) { return compare(lhs, rhs) < 0; }); + EXPECT_EQ((StringVec{"BAR", "bar", "FOO", "foo"}), words); +} + +TEST_F(FoldedStringCompareTest, compare_prefix) +{ + EXPECT_EQ(1, compare_prefix("ba", "BAR", 2)); + EXPECT_EQ(-1, compare_prefix("BA", "bar", 2)); + EXPECT_EQ(-1, compare_prefix("ba", "FOO", 2)); + EXPECT_EQ(-1, compare_prefix("BA", "foo", 2)); + // Verify that we don't mix number of bytes versus number of code points + EXPECT_EQ(1, compare_prefix("å", "Å", 1)); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp index 5a5702f50e8..24b562c5ec4 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp +++ b/searchlib/src/vespa/searchlib/attribute/enumcomparator.cpp @@ -43,8 +43,8 @@ bool EnumStoreStringComparator::less(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const { return _fold ? (use_prefix() - ? (FoldedStringCompare::compareFoldedPrefix(get(lhs), get(rhs), _prefix_len) < 0) - : (FoldedStringCompare::compareFolded(get(lhs), get(rhs)) < 0)) + ? (FoldedStringCompare::compareFoldedPrefix<true, true>(get(lhs), get(rhs), _prefix_len) < 0) + : (FoldedStringCompare::compareFolded<true, true>(get(lhs), get(rhs)) < 0)) : (use_prefix() ? (FoldedStringCompare::comparePrefix(get(lhs), get(rhs), _prefix_len) < 0) : (FoldedStringCompare::compare(get(lhs), get(rhs)) < 0)); @@ -54,7 +54,7 @@ EnumStoreStringComparator::less(const vespalib::datastore::EntryRef lhs, const v bool EnumStoreStringComparator::equal(const vespalib::datastore::EntryRef lhs, const vespalib::datastore::EntryRef rhs) const { return _fold - ? (FoldedStringCompare::compareFolded(get(lhs), get(rhs)) == 0) + ? (FoldedStringCompare::compareFolded<true, true>(get(lhs), get(rhs)) == 0) : (FoldedStringCompare::compare(get(lhs), get(rhs)) == 0); } diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp index 86da51fc9b6..a61a12cebf6 100644 --- a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp +++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp @@ -15,6 +15,7 @@ size(const char *key) return Utf8ReaderForZTS::countChars(key); } +template <bool fold_lhs, bool fold_rhs> int FoldedStringCompare:: compareFolded(const char *key, const char *okey) @@ -23,8 +24,8 @@ compareFolded(const char *key, const char *okey) Utf8ReaderForZTS oreader(okey); for (;;) { - uint32_t kval = LowerCase::convert(kreader.getChar()); - uint32_t oval = LowerCase::convert(oreader.getChar()); + uint32_t kval = fold_lhs ? LowerCase::convert(kreader.getChar()) : kreader.getChar(); + uint32_t oval = fold_rhs ? LowerCase::convert(oreader.getChar()) : oreader.getChar(); if (kval != oval) { if (kval < oval) { @@ -40,6 +41,7 @@ compareFolded(const char *key, const char *okey) } +template <bool fold_lhs, bool fold_rhs> int FoldedStringCompare:: compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen) @@ -48,8 +50,8 @@ compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen) Utf8ReaderForZTS oreader(okey); for (size_t j = 0; j < prefixLen; ++j ) { - uint32_t kval = LowerCase::convert(kreader.getChar()); - uint32_t oval = LowerCase::convert(oreader.getChar()); + uint32_t kval = fold_lhs ? LowerCase::convert(kreader.getChar()) : kreader.getChar(); + uint32_t oval = fold_rhs ? LowerCase::convert(oreader.getChar()) : oreader.getChar(); if (kval != oval) { if (kval < oval) { @@ -58,7 +60,9 @@ compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen) return 1; } } - if (kval == 0) return 0; + if (kval == 0) { + return 0; + } } // reached end of prefix return 0; @@ -68,9 +72,11 @@ int FoldedStringCompare:: comparePrefix(const char *key, const char *okey, size_t prefixLen) { - int res = compareFoldedPrefix(key, okey, prefixLen); - if (res != 0) return res; - return strncmp(key, okey, prefixLen); + int res = compareFoldedPrefix<true, true>(key, okey, prefixLen); + if (res != 0) { + return res; + } + return compareFoldedPrefix<false, false>(key, okey, prefixLen); } @@ -78,10 +84,22 @@ int FoldedStringCompare:: compare(const char *key, const char *okey) { - int res = compareFolded(key, okey); - if (res != 0) return res; + int res = compareFolded<true, true>(key, okey); + if (res != 0) { + return res; + } return strcmp(key, okey); } +template int FoldedStringCompare::compareFolded<false, false>(const char* key, const char* okey); +template int FoldedStringCompare::compareFolded<false, true>(const char* key, const char* okey); +template int FoldedStringCompare::compareFolded<true, false>(const char* key, const char* okey); +template int FoldedStringCompare::compareFolded<true, true>(const char* key, const char* okey); + +template int FoldedStringCompare::compareFoldedPrefix<false, false>(const char* key, const char* okey, size_t prefixLen); +template int FoldedStringCompare::compareFoldedPrefix<false, true>(const char* key, const char* okey, size_t prefixLen); +template int FoldedStringCompare::compareFoldedPrefix<true, false>(const char* key, const char* okey, size_t prefixLen); +template int FoldedStringCompare::compareFoldedPrefix<true, true>(const char* key, const char* okey, size_t prefixLen); + } // namespace search diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h index 30e7ec93345..fb842e190e2 100644 --- a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h +++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h @@ -10,50 +10,54 @@ class FoldedStringCompare { public: /** - * count number of UCS-4 characters in utf8 string + * count number of UCS-4 characters in UTF-8 string * - * @param key NUL terminated utf8 string - * @return integer number of symbols in utf8 string before NUL + * @param key NUL terminated UTF-8 string + * @return integer number of symbols in UTF-8 string before NUL */ static size_t size(const char *key); /** - * Compare utf8 key with utf8 other key after folding both + * Compare UTF-8 key with UTF-8 other key after folding both * - * @param key NUL terminated utf8 string - * @param okey NUL terminated utf8 string + * @param key NUL terminated UTF-8 string + * @param okey NUL terminated UTF-8 string * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey **/ + template <bool fold_lhs, bool fold_rhs> static int compareFolded(const char *key, const char *okey); /** - * Compare utf8 key with utf8 other key after folding both. + * Compare UTF-8 key with UTF-8 other key after folding both. * - * @param key NUL terminated utf8 string - * @param okey NUL terminated utf8 string + * @param key NUL terminated UTF-8 string + * @param okey NUL terminated UTF-8 string * @param prefixLen max number of symbols to compare before * considering keys identical. * * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey */ + template <bool fold_lhs, bool fold_rhs> static int compareFoldedPrefix(const char *key, const char *okey, size_t prefixLen); /* - * Compare utf8 key with utf8 other key after folding both, if + * Compare UTF-8 key with UTF-8 other key after folding both, if * they seem equal then fall back to comparing without folding. * - * @param key NUL terminated utf8 string - * @param okey NUL terminated utf8 string + * @param key NUL terminated UTF-8 string + * @param okey NUL terminated UTF-8 string * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey */ static int compare(const char *key, const char *okey); /* - * Compare utf8 key with utf8 other key after folding both for prefix, if + * Compare UTF-8 key with UTF-8 other key after folding both for prefix, if * they seem equal then fall back to comparing without folding. * - * @param key NUL terminated utf8 string - * @param okey NUL terminated utf8 string + * @param key NUL terminated UTF-8 string + * @param okey NUL terminated UTF-8 string + * @param prefixLen max number of symbols to compare before + * considering keys identical. * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey */ static int comparePrefix(const char *key, const char *okey, size_t prefixLen); |