aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2023-09-21 15:09:02 +0200
committerTor Egge <Tor.Egge@online.no>2023-09-21 15:09:02 +0200
commitb00282a4994832cd1af87d1c599ce05679ada4f5 (patch)
treedc20cc231fda723f4d7bcd9b4d68eab1d6ba7fe4 /searchlib/src
parentad67c63f8d7e7e475ebfbb44a9e988521fc186a4 (diff)
Extend allowed key types for FoldedStringCompare::compareFolded.
Diffstat (limited to 'searchlib/src')
-rw-r--r--searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp36
-rw-r--r--searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp61
-rw-r--r--searchlib/src/vespa/searchlib/util/foldedstringcompare.h18
3 files changed, 101 insertions, 14 deletions
diff --git a/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp
index e00cf109f8e..c0353e53bd1 100644
--- a/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp
+++ b/searchlib/src/tests/util/folded_string_compare/folded_string_compare_test.cpp
@@ -3,12 +3,32 @@
#include <vespa/searchlib/util/foldedstringcompare.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/vespalib/text/utf8.h>
using search::FoldedStringCompare;
+using vespalib::LowerCase;
+using vespalib::Utf8ReaderForZTS;
using IntVec = std::vector<int>;
using StringVec = std::vector<vespalib::string>;
+namespace {
+
+template <bool fold>
+std::vector<uint32_t> as_utf32(const char* key)
+{
+ std::vector<uint32_t> result;
+ Utf8ReaderForZTS reader(key);
+ while (reader.hasMore()) {
+ uint32_t code_point = fold ? LowerCase::convert(reader.getChar()) : reader.getChar();
+ result.push_back(code_point);
+ }
+ return result;
+}
+
+}
+
class FoldedStringCompareTest : public ::testing::Test
{
protected:
@@ -21,10 +41,22 @@ protected:
template <bool fold_lhs, bool fold_rhs>
int
- compare_folded_helper(const vespalib::string& lhs, const vespalib::string& rhs)
+ compare_folded_helper2(const vespalib::string& lhs, const vespalib::string& rhs)
{
int ret = FoldedStringCompare::compareFolded<fold_lhs, fold_rhs>(lhs.c_str(), rhs.c_str());
- EXPECT_EQ(-ret, (FoldedStringCompare::compareFolded<fold_rhs, fold_lhs>(rhs.c_str(), lhs.c_str())));
+ auto folded_lhs_utf32 = as_utf32<fold_lhs>(lhs.c_str());
+ EXPECT_EQ(ret, (FoldedStringCompare::compareFolded<false, fold_rhs>(std::cref(folded_lhs_utf32), rhs.c_str())));
+ auto folded_rhs_utf32 = as_utf32<fold_rhs>(rhs.c_str());
+ EXPECT_EQ(ret, (FoldedStringCompare::compareFolded<fold_lhs, false>(lhs.c_str(), std::cref(folded_rhs_utf32))));
+ return ret;
+ }
+
+ template <bool fold_lhs, bool fold_rhs>
+ int
+ compare_folded_helper(const vespalib::string& lhs, const vespalib::string& rhs)
+ {
+ int ret = compare_folded_helper2<fold_lhs, fold_rhs>(lhs, rhs);
+ EXPECT_EQ(-ret, (compare_folded_helper2<fold_rhs, fold_lhs>(rhs, lhs)));
return ret;
}
diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp
index a61a12cebf6..53b9a2db31d 100644
--- a/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp
+++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.cpp
@@ -8,6 +8,45 @@ using vespalib::LowerCase;
using vespalib::Utf8ReaderForZTS;
namespace search {
+using Utf32VectorRef = std::reference_wrapper<const std::vector<uint32_t>>;
+
+namespace foldedstringcompare {
+
+class Utf32Reader {
+ using Iterator = typename std::vector<uint32_t>::const_iterator;
+
+ Iterator _cur;
+ Iterator _end;
+public:
+ Utf32Reader(const std::vector<uint32_t>& key)
+ : _cur(key.begin()),
+ _end(key.end())
+ {
+ }
+
+ bool hasMore() const noexcept { return _cur != _end; }
+ uint32_t getChar() noexcept { return *_cur++; }
+};
+
+template <typename T> class FoldableStringHelper;
+
+template <> class FoldableStringHelper<const char*>
+{
+public:
+ using Reader = Utf8ReaderForZTS;
+};
+
+template <> class FoldableStringHelper<Utf32VectorRef>
+{
+public:
+ using Reader = Utf32Reader;
+};
+
+}
+
+template <typename KeyType>
+using Reader = typename foldedstringcompare::FoldableStringHelper<KeyType>::Reader;
+
size_t
FoldedStringCompare::
size(const char *key)
@@ -15,15 +54,20 @@ size(const char *key)
return Utf8ReaderForZTS::countChars(key);
}
-template <bool fold_lhs, bool fold_rhs>
+template <bool fold_lhs, bool fold_rhs, detail::FoldableString KeyType, detail::FoldableString OKeyType>
int
FoldedStringCompare::
-compareFolded(const char *key, const char *okey)
+compareFolded(KeyType key, OKeyType okey)
{
- Utf8ReaderForZTS kreader(key);
- Utf8ReaderForZTS oreader(okey);
+ Reader<KeyType> kreader(key);
+ Reader<OKeyType> oreader(okey);
for (;;) {
+ if (!kreader.hasMore()) {
+ return oreader.hasMore() ? -1 : 0;
+ } else if (!oreader.hasMore()) {
+ return 1;
+ }
uint32_t kval = fold_lhs ? LowerCase::convert(kreader.getChar()) : kreader.getChar();
uint32_t oval = fold_rhs ? LowerCase::convert(oreader.getChar()) : oreader.getChar();
@@ -34,13 +78,9 @@ compareFolded(const char *key, const char *okey)
return 1;
}
}
- if (kval == 0) {
- return 0;
- }
}
}
-
template <bool fold_lhs, bool fold_rhs>
int
FoldedStringCompare::
@@ -91,6 +131,11 @@ compare(const char *key, const char *okey)
return strcmp(key, okey);
}
+template int FoldedStringCompare::compareFolded<false, false>(const char* key, Utf32VectorRef okey);
+template int FoldedStringCompare::compareFolded<true, false>(const char* key, Utf32VectorRef okey);
+template int FoldedStringCompare::compareFolded<false, false>(Utf32VectorRef key, const char* okey);
+template int FoldedStringCompare::compareFolded<false, true>(Utf32VectorRef key, const char* okey);
+
template int FoldedStringCompare::compareFolded<false, false>(const char* key, const char* okey);
template int FoldedStringCompare::compareFolded<false, true>(const char* key, const char* okey);
template int FoldedStringCompare::compareFolded<true, false>(const char* key, const char* okey);
diff --git a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h
index fb842e190e2..cd7cd325667 100644
--- a/searchlib/src/vespa/searchlib/util/foldedstringcompare.h
+++ b/searchlib/src/vespa/searchlib/util/foldedstringcompare.h
@@ -3,9 +3,19 @@
#pragma once
#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <vector>
namespace search {
+namespace detail {
+
+template <typename T>
+concept FoldableString = std::same_as<const char*,T> || std::same_as<std::reference_wrapper<const std::vector<uint32_t>>, T>;
+
+}
+
class FoldedStringCompare
{
public:
@@ -20,12 +30,12 @@ public:
/**
* Compare UTF-8 key with UTF-8 other key after folding both
*
- * @param key NUL terminated UTF-8 string
- * @param okey NUL terminated UTF-8 string
+ * @param key NUL terminated UTF-8 string or vector<uint32_t>
+ * @param okey NUL terminated UTF-8 string or vector<uint32_t>
* @return integer -1 if key < okey, 0 if key == okey, 1 if key > okey
**/
- template <bool fold_lhs, bool fold_rhs>
- static int compareFolded(const char *key, const char *okey);
+ template <bool fold_lhs, bool fold_rhs, detail::FoldableString KeyType, detail::FoldableString OKeyType>
+ static int compareFolded(KeyType key, OKeyType okey);
/**
* Compare UTF-8 key with UTF-8 other key after folding both.