From 192af4443cb572791c8f11520e8ebec4ee4e5a8e Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Wed, 3 Jan 2024 10:03:12 +0000 Subject: - Fold query for streaming search based on either query item type, or field definition. - This ensures that query processing and document processing is symmetric for streaming search. No longer rely on java query processing being symmetric with backend c++ variant. - Indexed search does no normalization in backend and uses query as is. --- vespalib/src/vespa/fastlib/text/normwordfolder.cpp | 13 ++++++++++--- vespalib/src/vespa/fastlib/text/normwordfolder.h | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'vespalib/src') diff --git a/vespalib/src/vespa/fastlib/text/normwordfolder.cpp b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp index 8d3ccad9900..97b4b5aabb7 100644 --- a/vespalib/src/vespa/fastlib/text/normwordfolder.cpp +++ b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp @@ -5,7 +5,7 @@ #include bool Fast_NormalizeWordFolder::_isInitialized = false; -std::mutex _initMutex; + bool Fast_NormalizeWordFolder::_doAccentRemoval = false; bool Fast_NormalizeWordFolder::_doSharpSSubstitution = false; bool Fast_NormalizeWordFolder::_doLigatureSubstitution = false; @@ -19,12 +19,19 @@ ucs4_t Fast_NormalizeWordFolder::_lowerCaseHighAscii[256]; ucs4_t Fast_NormalizeWordFolder::_kanaMap[192]; ucs4_t Fast_NormalizeWordFolder::_halfwidth_fullwidthMap[240]; +namespace { + +std::mutex G_initMutex; +Fast_NormalizeWordFolder G_forceWorldFolderInit; +} + + void Fast_NormalizeWordFolder::Setup(uint32_t flags) { // Only allow setting these when not initialized or initializing... { - std::lock_guard initGuard(_initMutex); + std::lock_guard initGuard(G_initMutex); _doAccentRemoval = (DO_ACCENT_REMOVAL & flags) != 0; _doSharpSSubstitution = (DO_SHARP_S_SUBSTITUTION & flags) != 0; _doLigatureSubstitution = (DO_LIGATURE_SUBSTITUTION & flags) != 0; @@ -39,7 +46,7 @@ Fast_NormalizeWordFolder::Initialize() { unsigned int i; if (!_isInitialized) { - std::lock_guard initGuard(_initMutex); + std::lock_guard initGuard(G_initMutex); if (!_isInitialized) { for (i = 0; i < 128; i++) diff --git a/vespalib/src/vespa/fastlib/text/normwordfolder.h b/vespalib/src/vespa/fastlib/text/normwordfolder.h index 121a83e260d..5a77fe73e01 100644 --- a/vespalib/src/vespa/fastlib/text/normwordfolder.h +++ b/vespalib/src/vespa/fastlib/text/normwordfolder.h @@ -35,8 +35,8 @@ public: * added together. */ static void Setup(uint32_t flags); - static ucs4_t lowercase_and_fold_ascii(ucs4_t c) noexcept { return _lowerCase[c]; } - static ucs4_t lowercase_ascii(ucs4_t c) noexcept { return _foldCase[c]; } + static ucs4_t lowercase_and_fold_ascii(ucs4_t c) noexcept { return _foldCase[c]; } + static ucs4_t lowercase_ascii(ucs4_t c) noexcept { return _lowerCase[c]; } static bool is_wordchar_ascii7bit(ucs4_t c) noexcept { return _isWord[c]; } static ucs4_t lowercase(ucs4_t c) { if (c < 767) -- cgit v1.2.3