diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-09-21 15:09:02 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-09-21 15:09:02 +0200 |
commit | b00282a4994832cd1af87d1c599ce05679ada4f5 (patch) | |
tree | dc20cc231fda723f4d7bcd9b4d68eab1d6ba7fe4 /searchlib/src | |
parent | ad67c63f8d7e7e475ebfbb44a9e988521fc186a4 (diff) |
Extend allowed key types for FoldedStringCompare::compareFolded.
Diffstat (limited to 'searchlib/src')
3 files changed, 101 insertions, 14 deletions
diff --git a/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp index e00cf109f8e..c0353e53bd1 100644 --- a/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp +++ b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp @@ -3,12 +3,32 @@ #include <vespa/searchlib/util/foldedstringcompare.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/text/lowercase.h> +#include <vespa/vespalib/text/utf8.h> using search::FoldedStringCompare; +using vespalib::LowerCase; +using vespalib::Utf8ReaderForZTS; using IntVec = std::vector<int>; using StringVec = std::vector<vespalib::string>; +namespace { + +template <bool fold> +std::vector<uint32_t> as_utf32(const char* key) +{ + std::vector<uint32_t> result; + Utf8ReaderForZTS reader(key); + while (reader.hasMore()) { + uint32_t code_point = fold ? LowerCase::convert(reader.getChar()) : reader.getChar(); + result.push_back(code_point); + } + return result; +} + +} + class FoldedStringCompareTest : public ::testing::Test { protected: @@ -21,10 +41,22 @@ protected: template <bool fold_lhs, bool fold_rhs> int - compare_folded_helper(const vespalib::string& lhs, const vespalib::string& rhs) + compare_folded_helper2(const vespalib::string& lhs, const vespalib::string& rhs) { int ret = FoldedStringCompare::compareFolded<fold_lhs, fold_rhs>(lhs.c_str(), rhs.c_str()); - EXPECT_EQ(-ret, (FoldedStringCompare::compareFolded<fold_rhs, fold_lhs>(rhs.c_str(), lhs.c_str()))); + auto folded_lhs_utf32 = as_utf32<fold_lhs>(lhs.c_str()); + EXPECT_EQ(ret, (FoldedStringCompare::compareFolded<false, fold_rhs>(std::cref(folded_lhs_utf32), rhs.c_str()))); + auto folded_rhs_utf32 = as_utf32<fold_rhs>(rhs.c_str()); + EXPECT_EQ(ret, (FoldedStringCompare::compareFolded<fold_lhs, false>(lhs.c_str(), std::cref(folded_rhs_utf32)))); + return ret; + } + + template <bool fold_lhs, bool fold_rhs> + int + compare_folded_helper(const vespalib::string& lhs, const vespalib::string& rhs) + { + int ret = compare_folded_helper2<fold_lhs, fold_rhs>(lhs, rhs); + EXPECT_EQ(-ret, (compare_folded_helper2<fold_rhs, fold_lhs>(rhs, lhs))); return ret; } diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp index a61a12cebf6..53b9a2db31d 100644 --- a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp +++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp @@ -8,6 +8,45 @@ using vespalib::LowerCase; using vespalib::Utf8ReaderForZTS; namespace search { +using Utf32VectorRef = std::reference_wrapper<const std::vector<uint32_t>>; + +namespace foldedstringcompare { + +class Utf32Reader { + using Iterator = typename std::vector<uint32_t>::const_iterator; + + Iterator _cur; + Iterator _end; +public: + Utf32Reader(const std::vector<uint32_t>& key) + : _cur(key.begin()), + _end(key.end()) + { + } + + bool hasMore() const noexcept { return _cur != _end; } + uint32_t getChar() noexcept { return *_cur++; } +}; + +template <typename T> class FoldableStringHelper; + +template <> class FoldableStringHelper<const char*> +{ +public: + using Reader = Utf8ReaderForZTS; +}; + +template <> class FoldableStringHelper<Utf32VectorRef> +{ +public: + using Reader = Utf32Reader; +}; + +} + +template <typename KeyType> +using Reader = typename foldedstringcompare::FoldableStringHelper<KeyType>::Reader; + size_t FoldedStringCompare:: size(const char *key) @@ -15,15 +54,20 @@ size(const char *key) return Utf8ReaderForZTS::countChars(key); } -template <bool fold_lhs, bool fold_rhs> +template <bool fold_lhs, bool fold_rhs, detail::FoldableString KeyType, detail::FoldableString OKeyType> int FoldedStringCompare:: -compareFolded(const char *key, const char *okey) +compareFolded(KeyType key, OKeyType okey) { - Utf8ReaderForZTS kreader(key); - Utf8ReaderForZTS oreader(okey); + Reader<KeyType> kreader(key); + Reader<OKeyType> oreader(okey); for (;;) { + if (!kreader.hasMore()) { + return oreader.hasMore() ? -1 : 0; + } else if (!oreader.hasMore()) { + return 1; + } uint32_t kval = fold_lhs ? LowerCase::convert(kreader.getChar()) : kreader.getChar(); uint32_t oval = fold_rhs ? LowerCase::convert(oreader.getChar()) : oreader.getChar(); @@ -34,13 +78,9 @@ compareFolded(const char *key, const char *okey) return 1; } } - if (kval == 0) { - return 0; - } } } - template <bool fold_lhs, bool fold_rhs> int FoldedStringCompare:: @@ -91,6 +131,11 @@ compare(const char *key, const char *okey) return strcmp(key, okey); } +template int FoldedStringCompare::compareFolded<false, false>(const char* key, Utf32VectorRef okey); +template int FoldedStringCompare::compareFolded<true, false>(const char* key, Utf32VectorRef okey); +template int FoldedStringCompare::compareFolded<false, false>(Utf32VectorRef key, const char* okey); +template int FoldedStringCompare::compareFolded<false, true>(Utf32VectorRef key, const char* okey); + template int FoldedStringCompare::compareFolded<false, false>(const char* key, const char* okey); template int FoldedStringCompare::compareFolded<false, true>(const char* key, const char* okey); template int FoldedStringCompare::compareFolded<true, false>(const char* key, const char* okey); diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h index fb842e190e2..cd7cd325667 100644 --- a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h +++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h @@ -3,9 +3,19 @@ #pragma once #include <cstddef> +#include <cstdint> +#include <functional> +#include <vector> namespace search { +namespace detail { + +template <typename T> +concept FoldableString = std::same_as<const char*,T> || std::same_as<std::reference_wrapper<const std::vector<uint32_t>>, T>; + +} + class FoldedStringCompare { public: @@ -20,12 +30,12 @@ public: /** * Compare UTF-8 key with UTF-8 other key after folding both * - * @param key NUL terminated UTF-8 string - * @param okey NUL terminated UTF-8 string + * @param key NUL terminated UTF-8 string or vector<uint32_t> + * @param okey NUL terminated UTF-8 string or vector<uint32_t> * @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey **/ - template <bool fold_lhs, bool fold_rhs> - static int compareFolded(const char *key, const char *okey); + template <bool fold_lhs, bool fold_rhs, detail::FoldableString KeyType, detail::FoldableString OKeyType> + static int compareFolded(KeyType key, OKeyType okey); /** * Compare UTF-8 key with UTF-8 other key after folding both. |