diff options
author | Tor Egge <Tor.Egge@online.no> | 2024-03-27 12:29:36 +0100 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2024-03-27 12:29:36 +0100 |
commit | 796b4c88c5b990b9446e3166394d8248080bcb05 (patch) | |
tree | dcc0bfcdecaea82af2f50e2fb29177341cf4daf3 /streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp | |
parent | 98f6fe0150b96f38cf09fa19eb892f2ba51555a2 (diff) |
Move UTF8StringFieldSearcherBase tokenize member function to TokenizeReader.
Move anonymous normalize_mode funtion to a public static
FieldSearchSpecMap::convert_normalize_mode member function.
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp')
-rw-r--r-- | streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp | 21 |
1 files changed, 2 insertions, 19 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp index 5036e9bedb1..f016d08ece8 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp @@ -10,21 +10,6 @@ using search::byte; namespace vsm { -template<typename Reader> -void -UTF8StringFieldSearcherBase::tokenize(Reader & reader) { - ucs4_t c(0); - Normalizing norm_mode = normalize_mode(); - while (reader.hasNext() && ! Fast_UnicodeUtil::IsWordChar(c = reader.next())); - - if (Fast_UnicodeUtil::IsWordChar(c)) { - reader.normalize(c, norm_mode); - while (reader.hasNext() && Fast_UnicodeUtil::IsWordChar(c = reader.next())) { - reader.normalize(c, norm_mode); - } - } -} - size_t UTF8StringFieldSearcherBase::matchTermRegular(const FieldRef & f, QueryTerm & qt) { @@ -38,8 +23,7 @@ UTF8StringFieldSearcherBase::matchTermRegular(const FieldRef & f, QueryTerm & qt TokenizeReader reader(reinterpret_cast<const byte *> (f.data()), f.size(), fn); while ( reader.hasNext() ) { - tokenize(reader); - size_t fl = reader.complete(); + size_t fl = reader.tokenize(normalize_mode()); if ((tsz <= fl) && (prefix() || qt.isPrefix() || (tsz == fl))) { const cmptype_t *tt=term, *et=term+tsz; for (const cmptype_t *fnt=fn; (tt < et) && (*tt == *fnt); tt++, fnt++); @@ -127,8 +111,7 @@ UTF8StringFieldSearcherBase::matchTermSuffix(const FieldRef & f, QueryTerm & qt) TokenizeReader reader(reinterpret_cast<const byte *> (f.data()), f.size(), dstbuf); while ( reader.hasNext() ) { - tokenize(reader); - size_t tokenlen = reader.complete(); + size_t tokenlen = reader.tokenize(normalize_mode()); if (matchTermSuffix(term, tsz, dstbuf, tokenlen)) { addHit(qt, words); } |