diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-03 10:03:12 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-05 08:29:15 +0000 |
commit | 192af4443cb572791c8f11520e8ebec4ee4e5a8e (patch) | |
tree | 755a603c0fe1b28116a24749f4f919ffee756c84 /vespalib/src | |
parent | d8b50e4eaea708fed984c7c6ccdd06ac48b358bf (diff) |
- Fold query for streaming search based on either query item type, or field definition.
- This ensures that query processing and document processing is symmetric for streaming search.
No longer rely on java query processing being symmetric with backend c++ variant.
- Indexed search does no normalization in backend and uses query as is.
Diffstat (limited to 'vespalib/src')
-rw-r--r-- | vespalib/src/vespa/fastlib/text/normwordfolder.cpp | 13 | ||||
-rw-r--r-- | vespalib/src/vespa/fastlib/text/normwordfolder.h | 4 |
2 files changed, 12 insertions, 5 deletions
diff --git a/vespalib/src/vespa/fastlib/text/normwordfolder.cpp b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp index 8d3ccad9900..97b4b5aabb7 100644 --- a/vespalib/src/vespa/fastlib/text/normwordfolder.cpp +++ b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp @@ -5,7 +5,7 @@ #include <cstring> bool Fast_NormalizeWordFolder::_isInitialized = false; -std::mutex _initMutex; + bool Fast_NormalizeWordFolder::_doAccentRemoval = false; bool Fast_NormalizeWordFolder::_doSharpSSubstitution = false; bool Fast_NormalizeWordFolder::_doLigatureSubstitution = false; @@ -19,12 +19,19 @@ ucs4_t Fast_NormalizeWordFolder::_lowerCaseHighAscii[256]; ucs4_t Fast_NormalizeWordFolder::_kanaMap[192]; ucs4_t Fast_NormalizeWordFolder::_halfwidth_fullwidthMap[240]; +namespace { + +std::mutex G_initMutex; +Fast_NormalizeWordFolder G_forceWorldFolderInit; +} + + void Fast_NormalizeWordFolder::Setup(uint32_t flags) { // Only allow setting these when not initialized or initializing... { - std::lock_guard<std::mutex> initGuard(_initMutex); + std::lock_guard<std::mutex> initGuard(G_initMutex); _doAccentRemoval = (DO_ACCENT_REMOVAL & flags) != 0; _doSharpSSubstitution = (DO_SHARP_S_SUBSTITUTION & flags) != 0; _doLigatureSubstitution = (DO_LIGATURE_SUBSTITUTION & flags) != 0; @@ -39,7 +46,7 @@ Fast_NormalizeWordFolder::Initialize() { unsigned int i; if (!_isInitialized) { - std::lock_guard<std::mutex> initGuard(_initMutex); + std::lock_guard<std::mutex> initGuard(G_initMutex); if (!_isInitialized) { for (i = 0; i < 128; i++) diff --git a/vespalib/src/vespa/fastlib/text/normwordfolder.h b/vespalib/src/vespa/fastlib/text/normwordfolder.h index 121a83e260d..5a77fe73e01 100644 --- a/vespalib/src/vespa/fastlib/text/normwordfolder.h +++ b/vespalib/src/vespa/fastlib/text/normwordfolder.h @@ -35,8 +35,8 @@ public: * added together. */ static void Setup(uint32_t flags); - static ucs4_t lowercase_and_fold_ascii(ucs4_t c) noexcept { return _lowerCase[c]; } - static ucs4_t lowercase_ascii(ucs4_t c) noexcept { return _foldCase[c]; } + static ucs4_t lowercase_and_fold_ascii(ucs4_t c) noexcept { return _foldCase[c]; } + static ucs4_t lowercase_ascii(ucs4_t c) noexcept { return _lowerCase[c]; } static bool is_wordchar_ascii7bit(ucs4_t c) noexcept { return _isWord[c]; } static ucs4_t lowercase(ucs4_t c) { if (c < 767) |