diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-21 10:56:49 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-12-21 10:56:49 +0000 |
commit | 543366294b1e8ae8a01186c25b74e36ed4c3ae35 (patch) | |
tree | f62fb82c65a6152fabe944caa4e719051f4ab032 /streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp | |
parent | ef3db955e75e6df68a2a358feb5b95e44979377f (diff) |
- Separate methods for lowercasing, and lowercasing and folding.
- Hide implementations and use accessors.
- Minor code cleanup.
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp')
-rw-r--r-- | streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp | 26 |
1 files changed, 13 insertions, 13 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp index 4daea693e95..c31102ec0ab 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp @@ -25,8 +25,8 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * if (c < 128) { if (!c) { break; } p++; - if (__builtin_expect(Fast_NormalizeWordFolder::_isWord[c], false)) { - *q++ = Fast_NormalizeWordFolder::_foldCase[c]; + if (__builtin_expect(Fast_NormalizeWordFolder::is_wordchar_ascii7bit(c), false)) { + *q++ = Fast_NormalizeWordFolder::lowercase_and_fold_ascii(c); c = 0; } else { c = *p; @@ -37,13 +37,13 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * if (Fast_UnicodeUtil::IsWordChar(c)) { _utf8Count[p-oldP-1]++; const char *repl = Fast_NormalizeWordFolder::ReplacementString(c); - if (repl != NULL) { + if (repl != nullptr) { size_t repllen = strlen(repl); if (repllen > 0) { q = Fast_UnicodeUtil::ucs4copy(q,repl); } } else { - c = Fast_NormalizeWordFolder::ToFold(c); + c = Fast_NormalizeWordFolder::lowercase_and_fold(c); *q++ = c; } break; @@ -63,10 +63,10 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * if (c < 128) { // Common case, ASCII if (!c) { break; } p++; - if (__builtin_expect(!Fast_NormalizeWordFolder::_isWord[c], false)) { + if (__builtin_expect(!Fast_NormalizeWordFolder::is_wordchar_ascii7bit(c), false)) { c = 0; } else { - *q++ = Fast_NormalizeWordFolder::_foldCase[c]; + *q++ = Fast_NormalizeWordFolder::lowercase_and_fold_ascii(c); c = *p; } } else { @@ -75,13 +75,13 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * if (__builtin_expect(Fast_UnicodeUtil::IsWordChar(c), false)) { _utf8Count[p-oldP-1]++; const char *repl = Fast_NormalizeWordFolder::ReplacementString(c); - if (repl != NULL) { + if (repl != nullptr) { size_t repllen = strlen(repl); if (repllen > 0) { q = Fast_UnicodeUtil::ucs4copy(q,repl); } } else { - c = Fast_NormalizeWordFolder::ToFold(c); + c = Fast_NormalizeWordFolder::lowercase_and_fold(c); *q++ = c; } @@ -144,9 +144,9 @@ UTF8StringFieldSearcherBase::matchTermExact(const FieldRef & f, QueryTerm & qt) bool equal(true); for (; equal && (n < e) && (term < eterm); term++) { if (*term < 0x80) { - equal = (*term == Fast_NormalizeWordFolder::_foldCase[*n++]); + equal = (*term == Fast_NormalizeWordFolder::lowercase_ascii(*n++)); } else { - cmptype_t c = Fast_NormalizeWordFolder::ToFold(Fast_UnicodeUtil::GetUTF8CharNonAscii(n)); + cmptype_t c = Fast_NormalizeWordFolder::lowercase(Fast_UnicodeUtil::GetUTF8CharNonAscii(n)); equal = (*term == c); } } @@ -280,12 +280,12 @@ UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T if (c < 128) { p++; if (!isSeparatorCharacter(c)) { - dstbuf.onCharacter(Fast_NormalizeWordFolder::_foldCase[c], (oldP - b)); + dstbuf.onCharacter(Fast_NormalizeWordFolder::lowercase_and_fold_ascii(c), (oldP - b)); } } else { c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p); const char *repl = Fast_NormalizeWordFolder::ReplacementString(c); - if (repl != NULL) { + if (repl != nullptr) { size_t repllen = strlen(repl); if (repllen > 0) { ucs4_t * buf = dstbuf.getBuf(); @@ -300,7 +300,7 @@ UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T } } } else { - c = Fast_NormalizeWordFolder::ToFold(c); + c = Fast_NormalizeWordFolder::lowercase_and_fold(c); dstbuf.onCharacter(c, (oldP - b)); } if (c == Fast_UnicodeUtil::_BadUTF8Char) { |