aboutsummaryrefslogtreecommitdiffstats
path: root/vespalib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-03 10:03:12 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-01-05 08:29:15 +0000
commit192af4443cb572791c8f11520e8ebec4ee4e5a8e (patch)
tree755a603c0fe1b28116a24749f4f919ffee756c84 /vespalib
parentd8b50e4eaea708fed984c7c6ccdd06ac48b358bf (diff)
- Fold query for streaming search based on either query item type, or field definition.
- This ensures that query processing and document processing is symmetric for streaming search. No longer rely on java query processing being symmetric with backend c++ variant. - Indexed search does no normalization in backend and uses query as is.
Diffstat (limited to 'vespalib')
-rw-r--r--vespalib/src/vespa/fastlib/text/normwordfolder.cpp13
-rw-r--r--vespalib/src/vespa/fastlib/text/normwordfolder.h4
2 files changed, 12 insertions, 5 deletions
diff --git a/vespalib/src/vespa/fastlib/text/normwordfolder.cpp b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp
index 8d3ccad9900..97b4b5aabb7 100644
--- a/vespalib/src/vespa/fastlib/text/normwordfolder.cpp
+++ b/vespalib/src/vespa/fastlib/text/normwordfolder.cpp
@@ -5,7 +5,7 @@
#include <cstring>
bool Fast_NormalizeWordFolder::_isInitialized = false;
-std::mutex _initMutex;
+
bool Fast_NormalizeWordFolder::_doAccentRemoval = false;
bool Fast_NormalizeWordFolder::_doSharpSSubstitution = false;
bool Fast_NormalizeWordFolder::_doLigatureSubstitution = false;
@@ -19,12 +19,19 @@ ucs4_t Fast_NormalizeWordFolder::_lowerCaseHighAscii[256];
ucs4_t Fast_NormalizeWordFolder::_kanaMap[192];
ucs4_t Fast_NormalizeWordFolder::_halfwidth_fullwidthMap[240];
+namespace {
+
+std::mutex G_initMutex;
+Fast_NormalizeWordFolder G_forceWorldFolderInit;
+}
+
+
void
Fast_NormalizeWordFolder::Setup(uint32_t flags)
{
// Only allow setting these when not initialized or initializing...
{
- std::lock_guard<std::mutex> initGuard(_initMutex);
+ std::lock_guard<std::mutex> initGuard(G_initMutex);
_doAccentRemoval = (DO_ACCENT_REMOVAL & flags) != 0;
_doSharpSSubstitution = (DO_SHARP_S_SUBSTITUTION & flags) != 0;
_doLigatureSubstitution = (DO_LIGATURE_SUBSTITUTION & flags) != 0;
@@ -39,7 +46,7 @@ Fast_NormalizeWordFolder::Initialize()
{
unsigned int i;
if (!_isInitialized) {
- std::lock_guard<std::mutex> initGuard(_initMutex);
+ std::lock_guard<std::mutex> initGuard(G_initMutex);
if (!_isInitialized) {
for (i = 0; i < 128; i++)
diff --git a/vespalib/src/vespa/fastlib/text/normwordfolder.h b/vespalib/src/vespa/fastlib/text/normwordfolder.h
index 121a83e260d..5a77fe73e01 100644
--- a/vespalib/src/vespa/fastlib/text/normwordfolder.h
+++ b/vespalib/src/vespa/fastlib/text/normwordfolder.h
@@ -35,8 +35,8 @@ public:
* added together.
*/
static void Setup(uint32_t flags);
- static ucs4_t lowercase_and_fold_ascii(ucs4_t c) noexcept { return _lowerCase[c]; }
- static ucs4_t lowercase_ascii(ucs4_t c) noexcept { return _foldCase[c]; }
+ static ucs4_t lowercase_and_fold_ascii(ucs4_t c) noexcept { return _foldCase[c]; }
+ static ucs4_t lowercase_ascii(ucs4_t c) noexcept { return _lowerCase[c]; }
static bool is_wordchar_ascii7bit(ucs4_t c) noexcept { return _isWord[c]; }
static ucs4_t lowercase(ucs4_t c) {
if (c < 767)