- Separate methods for lowercasing, and lowercasing and folding.

- Hide implementations and use accessors. - Minor code cleanup.
author: Henning Baldersheim <balder@yahoo-inc.com> 2023-12-21 10:56:49 +0000
committer: Henning Baldersheim <balder@yahoo-inc.com> 2023-12-21 10:56:49 +0000
commit: 543366294b1e8ae8a01186c25b74e36ed4c3ae35 (patch)
tree: f62fb82c65a6152fabe944caa4e719051f4ab032 /streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
parent: ef3db955e75e6df68a2a358feb5b95e44979377f (diff)
1 files changed, 13 insertions, 13 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
index 4daea693e95..c31102ec0ab 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
@@ -25,8 +25,8 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t *
         if (c < 128) {
             if (!c) { break; }
             p++;
-            if (__builtin_expect(Fast_NormalizeWordFolder::_isWord[c], false)) {
-                *q++ = Fast_NormalizeWordFolder::_foldCase[c];
+            if (__builtin_expect(Fast_NormalizeWordFolder::is_wordchar_ascii7bit(c), false)) {
+                *q++ = Fast_NormalizeWordFolder::lowercase_and_fold_ascii(c);
                 c = 0;
             } else {
                 c = *p;
@@ -37,13 +37,13 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t *
             if (Fast_UnicodeUtil::IsWordChar(c)) {
                 _utf8Count[p-oldP-1]++;
                 const char *repl = Fast_NormalizeWordFolder::ReplacementString(c);
-                if (repl != NULL) {
+                if (repl != nullptr) {
                     size_t repllen = strlen(repl);
                     if (repllen > 0) {
                         q = Fast_UnicodeUtil::ucs4copy(q,repl);
                     }
                 } else {
-                    c = Fast_NormalizeWordFolder::ToFold(c);
+                    c = Fast_NormalizeWordFolder::lowercase_and_fold(c);
                     *q++ = c;
                 }
                 break;
@@ -63,10 +63,10 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t *
         if (c < 128) {             // Common case, ASCII
             if (!c) { break; }
             p++;
-            if (__builtin_expect(!Fast_NormalizeWordFolder::_isWord[c], false)) {
+            if (__builtin_expect(!Fast_NormalizeWordFolder::is_wordchar_ascii7bit(c), false)) {
                 c = 0;
             } else {
-                *q++ = Fast_NormalizeWordFolder::_foldCase[c];
+                *q++ = Fast_NormalizeWordFolder::lowercase_and_fold_ascii(c);
                 c = *p;
             }
         } else {
@@ -75,13 +75,13 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t *
             if (__builtin_expect(Fast_UnicodeUtil::IsWordChar(c), false)) {
                 _utf8Count[p-oldP-1]++;
                 const char *repl = Fast_NormalizeWordFolder::ReplacementString(c);
-                if (repl != NULL) {
+                if (repl != nullptr) {
                     size_t repllen = strlen(repl);
                     if (repllen > 0) {
                         q = Fast_UnicodeUtil::ucs4copy(q,repl);
                     }
                 } else {
-                    c = Fast_NormalizeWordFolder::ToFold(c);
+                    c = Fast_NormalizeWordFolder::lowercase_and_fold(c);
                     *q++ = c;
                 }
 
@@ -144,9 +144,9 @@ UTF8StringFieldSearcherBase::matchTermExact(const FieldRef & f, QueryTerm & qt)
         bool equal(true);
         for (; equal && (n < e) && (term < eterm); term++) {
             if (*term < 0x80) {
-                equal = (*term == Fast_NormalizeWordFolder::_foldCase[*n++]);
+                equal = (*term == Fast_NormalizeWordFolder::lowercase_ascii(*n++));
             } else {
-                cmptype_t c = Fast_NormalizeWordFolder::ToFold(Fast_UnicodeUtil::GetUTF8CharNonAscii(n));
+                cmptype_t c = Fast_NormalizeWordFolder::lowercase(Fast_UnicodeUtil::GetUTF8CharNonAscii(n));
                 equal = (*term == c);
             }
         }
@@ -280,12 +280,12 @@ UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T
         if (c < 128) {
             p++;
             if (!isSeparatorCharacter(c)) {
-                dstbuf.onCharacter(Fast_NormalizeWordFolder::_foldCase[c], (oldP - b));
+                dstbuf.onCharacter(Fast_NormalizeWordFolder::lowercase_and_fold_ascii(c), (oldP - b));
             }
         } else {
             c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p);
             const char *repl = Fast_NormalizeWordFolder::ReplacementString(c);
-            if (repl != NULL) {
+            if (repl != nullptr) {
                 size_t repllen = strlen(repl);
                 if (repllen > 0) {
                     ucs4_t * buf = dstbuf.getBuf();
@@ -300,7 +300,7 @@ UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T
                     }
                 }
             } else {
-                c = Fast_NormalizeWordFolder::ToFold(c);
+                c = Fast_NormalizeWordFolder::lowercase_and_fold(c);
                 dstbuf.onCharacter(c, (oldP - b));
             }
             if (c == Fast_UnicodeUtil::_BadUTF8Char) {
author	Henning Baldersheim <balder@yahoo-inc.com>	2023-12-21 10:56:49 +0000
committer	Henning Baldersheim <balder@yahoo-inc.com>	2023-12-21 10:56:49 +0000
commit	543366294b1e8ae8a01186c25b74e36ed4c3ae35 (patch)
tree	f62fb82c65a6152fabe944caa4e719051f4ab032 /streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
parent	ef3db955e75e6df68a2a358feb5b95e44979377f (diff)