diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-07-23 05:29:32 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2023-07-25 07:56:57 +0000 |
commit | c703043e1d0ff1501ecd5c19c490a4911240744a (patch) | |
tree | db84e9461bce0f766658afb03c8f27de99f2b897 /streamingvisitors/src | |
parent | 78a211072a21ec5f368b99bce19c1b703d98152d (diff) |
Use WordFolder as helper instead of inheriting static stuff.
Diffstat (limited to 'streamingvisitors/src')
9 files changed, 31 insertions, 29 deletions
diff --git a/streamingvisitors/src/tests/textutil/textutil.cpp b/streamingvisitors/src/tests/textutil/textutil.cpp index 160c734ee20..aeff4600781 100644 --- a/streamingvisitors/src/tests/textutil/textutil.cpp +++ b/streamingvisitors/src/tests/textutil/textutil.cpp @@ -1,7 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vespalib/testkit/testapp.h> -#include <vespa/fastlib/text/unicodeutil.h> +#include <vespa/fastlib/text/normwordfolder.h> #include <vespa/searchlib/query/base.h> #include <vespa/vsm/searcher/fold.h> #include <vespa/vsm/searcher/futf8strchrfieldsearcher.h> diff --git a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp index 2119364c2bc..148ad7daaed 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.cpp @@ -11,6 +11,7 @@ #include <vespa/searchlib/fef/ranking_assets_repo.h> #include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/searchsummary/config/config-juniperrc.h> +#include <vespa/fastlib/text/normwordfolder.h> #include <cassert> #include <vespa/log/log.h> @@ -108,6 +109,7 @@ SearchEnvironment::Env::~Env() SearchEnvironment::SearchEnvironment(const config::ConfigUri & configUri, FNET_Transport* transport, const vespalib::string& file_distributor_connection_spec) : VisitorEnvironment(), _envMap(), + _wordFolder(std::make_unique<Fast_NormalizeWordFolder>()), _configUri(configUri), _transport(transport), _file_distributor_connection_spec(file_distributor_connection_spec) @@ -137,7 +139,7 @@ SearchEnvironment::getEnv(const vespalib::string & searchCluster) auto found = _envMap.find(searchCluster); if (found == _envMap.end()) { LOG(debug, "Init VSMAdapter with config id = '%s'", searchCluster.c_str()); - Env::SP env = std::make_shared<Env>(searchClusterUri, _wordFolder, _transport, _file_distributor_connection_spec); + Env::SP env = std::make_shared<Env>(searchClusterUri, *_wordFolder, _transport, _file_distributor_connection_spec); _envMap[searchCluster] = std::move(env); found = _envMap.find(searchCluster); } diff --git a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h index f3bbfddd76c..05909c71ccb 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchenvironment.h @@ -10,10 +10,10 @@ #include <vespa/config/retriever/simpleconfigurer.h> #include <vespa/config/subscription/configuri.h> #include <vespa/vsm/vsm/vsm-adapter.h> -#include <vespa/fastlib/text/normwordfolder.h> #include <mutex> class FNET_Transport; +class Fast_NormalizeWordFolder; namespace search::fef { @@ -70,7 +70,7 @@ private: EnvMap _envMap; ThreadLocals _threadLocals; std::mutex _lock; - Fast_NormalizeWordFolder _wordFolder; + std::unique_ptr<Fast_NormalizeWordFolder> _wordFolder; config::ConfigUri _configUri; FNET_Transport* const _transport; vespalib::string _file_distributor_connection_spec; diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 95cd4788d7f..1cfa0224b69 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -26,6 +26,7 @@ #include <vespa/vespalib/data/slime/slime.h> #include <vespa/vespalib/text/stringtokenizer.h> #include <vespa/fnet/databuffer.h> +#include <vespa/fastlib/text/normwordfolder.h> #include <optional> #include <vespa/log/log.h> diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h index abc2bc9d870..dedf20021e9 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h @@ -20,6 +20,7 @@ using termsize_t = size_t; #define NEED_CHAR_STAT(a) #endif +using ucs4_t = unsigned int; using cmptype_t = ucs4_t; using SearcherBuf = vespalib::Array<cmptype_t>; using SharedSearcherBuf = std::shared_ptr<SearcherBuf>; diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp index f991722d623..a7f17cb9006 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "utf8stringfieldsearcherbase.h" +#include <vespa/fastlib/text/normwordfolder.h> #include <cassert> using search::streaming::QueryTerm; @@ -24,8 +25,8 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * if (c < 128) { if (!c) { break; } p++; - if (__builtin_expect(_isWord[c], false)) { - *q++ = _foldCase[c]; + if (__builtin_expect(Fast_NormalizeWordFolder::_isWord[c], false)) { + *q++ = Fast_NormalizeWordFolder::_foldCase[c]; c = 0; } else { c = *p; @@ -35,19 +36,19 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p); if (Fast_UnicodeUtil::IsWordChar(c)) { _utf8Count[p-oldP-1]++; - const char *repl = ReplacementString(c); + const char *repl = Fast_NormalizeWordFolder::ReplacementString(c); if (repl != NULL) { size_t repllen = strlen(repl); if (repllen > 0) { q = Fast_UnicodeUtil::ucs4copy(q,repl); } } else { - c = ToFold(c); + c = Fast_NormalizeWordFolder::ToFold(c); *q++ = c; } break; } else { - if (c == _BadUTF8Char) { + if (c == Fast_UnicodeUtil::_BadUTF8Char) { _badUtf8Count++; } else { _utf8Count[p-oldP-1]++; @@ -62,10 +63,10 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * if (c < 128) { // Common case, ASCII if (!c) { break; } p++; - if (__builtin_expect(!_isWord[c], false)) { + if (__builtin_expect(!Fast_NormalizeWordFolder::_isWord[c], false)) { c = 0; } else { - *q++ = _foldCase[c]; + *q++ = Fast_NormalizeWordFolder::_foldCase[c]; c = *p; } } else { @@ -73,20 +74,20 @@ UTF8StringFieldSearcherBase::tokenize(const byte * p, size_t maxSz, cmptype_t * c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p); if (__builtin_expect(Fast_UnicodeUtil::IsWordChar(c), false)) { _utf8Count[p-oldP-1]++; - const char *repl = ReplacementString(c); + const char *repl = Fast_NormalizeWordFolder::ReplacementString(c); if (repl != NULL) { size_t repllen = strlen(repl); if (repllen > 0) { q = Fast_UnicodeUtil::ucs4copy(q,repl); } } else { - c = ToFold(c); + c = Fast_NormalizeWordFolder::ToFold(c); *q++ = c; } c = *p; } else { - if (c == _BadUTF8Char) { + if (c == Fast_UnicodeUtil::_BadUTF8Char) { _badUtf8Count++; } else { _utf8Count[p-oldP-1]++; @@ -143,9 +144,9 @@ UTF8StringFieldSearcherBase::matchTermExact(const FieldRef & f, QueryTerm & qt) bool equal(true); for (; equal && (n < e) && (term < eterm); term++) { if (*term < 0x80) { - equal = (*term == _foldCase[*n++]); + equal = (*term == Fast_NormalizeWordFolder::_foldCase[*n++]); } else { - cmptype_t c = ToFold(Fast_UnicodeUtil::GetUTF8CharNonAscii(n)); + cmptype_t c = Fast_NormalizeWordFolder::ToFold(Fast_UnicodeUtil::GetUTF8CharNonAscii(n)); equal = (*term == c); } } @@ -220,20 +221,16 @@ UTF8StringFieldSearcherBase::matchTermSuffix(const FieldRef & f, QueryTerm & qt) } UTF8StringFieldSearcherBase::UTF8StringFieldSearcherBase() : - StrChrFieldSearcher(), - Fast_NormalizeWordFolder(), - Fast_UnicodeUtil() + StrChrFieldSearcher() { } UTF8StringFieldSearcherBase::UTF8StringFieldSearcherBase(FieldIdT fId) : - StrChrFieldSearcher(fId), - Fast_NormalizeWordFolder(), - Fast_UnicodeUtil() + StrChrFieldSearcher(fId) { } -UTF8StringFieldSearcherBase::~UTF8StringFieldSearcherBase() {} +UTF8StringFieldSearcherBase::~UTF8StringFieldSearcherBase() = default; void UTF8StringFieldSearcherBase::prepare(search::streaming::QueryTermList& qtl, @@ -283,11 +280,11 @@ UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T if (c < 128) { p++; if (!isSeparatorCharacter(c)) { - dstbuf.onCharacter(_foldCase[c], (oldP - b)); + dstbuf.onCharacter(Fast_NormalizeWordFolder::_foldCase[c], (oldP - b)); } } else { c = Fast_UnicodeUtil::GetUTF8CharNonAscii(p); - const char *repl = ReplacementString(c); + const char *repl = Fast_NormalizeWordFolder::ReplacementString(c); if (repl != NULL) { size_t repllen = strlen(repl); if (repllen > 0) { @@ -303,10 +300,10 @@ UTF8StringFieldSearcherBase::skipSeparators(const search::byte * p, size_t sz, T } } } else { - c = ToFold(c); + c = Fast_NormalizeWordFolder::ToFold(c); dstbuf.onCharacter(c, (oldP - b)); } - if (c == _BadUTF8Char) { + if (c == Fast_UnicodeUtil::_BadUTF8Char) { _badUtf8Count++; } else { _utf8Count[p-oldP-1]++; diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h index a017b501660..f4da5960fd3 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.h @@ -2,7 +2,6 @@ #pragma once #include "strchrfieldsearcher.h" -#include <vespa/fastlib/text/normwordfolder.h> namespace vsm { @@ -15,7 +14,7 @@ namespace vsm { * Reuse of this buffer ensures better cache hit ratio because this is just a * scratchpad for tokenizing. It will grow till the max size and stay there. **/ -class UTF8StringFieldSearcherBase : public StrChrFieldSearcher, protected Fast_NormalizeWordFolder, public Fast_UnicodeUtil +class UTF8StringFieldSearcherBase : public StrChrFieldSearcher { public: /** diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp index adcf7a937c1..046341b069f 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <vespa/vsm/searcher/utf8substringsearcher.h> +#include <vespa/fastlib/text/unicodeutil.h> using search::byte; using search::streaming::QueryTerm; diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp index 89388c01354..ce14d2bf8e2 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsnippetmodifier.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "utf8substringsnippetmodifier.h" #include <vespa/juniper/juniper_separators.h> +#include <vespa/fastlib/text/unicodeutil.h> #include <cassert> using search::byte; |