diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-12 08:29:43 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-12 08:29:43 +0000 |
commit | 679087c481fbb1aa02b21162f5a96e3c9ce56abc (patch) | |
tree | 28bf2f0f6b7b89dd4c8f4c3fd94fa4d65e2319ce /streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp | |
parent | 13d6a727e8c2c23d04e8bca980588abbd0424d69 (diff) |
Also handle different normalization during query time.
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp')
-rw-r--r-- | streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp | 22 |
1 files changed, 13 insertions, 9 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp index d9ac47a3431..5036e9bedb1 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp @@ -55,22 +55,26 @@ UTF8StringFieldSearcherBase::matchTermRegular(const FieldRef & f, QueryTerm & qt size_t UTF8StringFieldSearcherBase::matchTermExact(const FieldRef & f, QueryTerm & qt) { - const byte * n = reinterpret_cast<const byte *> (f.data()); const cmptype_t * term; termsize_t tsz = qt.term(term); const cmptype_t * eterm = term+tsz; - const byte * e = n + f.size(); + if ( f.size() >= _buf->size()) { + _buf->reserve(f.size() + 1); + } + cmptype_t * fn = _buf->data(); if (tsz <= f.size()) { bool equal(true); - for (; equal && (n < e) && (term < eterm); term++) { - if (*term < 0x80) { - equal = (*term == Fast_NormalizeWordFolder::lowercase_ascii(*n++)); - } else { - cmptype_t c = Fast_NormalizeWordFolder::lowercase(Fast_UnicodeUtil::GetUTF8CharNonAscii(n)); - equal = (*term == c); + Normalizing norm_mode = normalize_mode(); + TokenizeReader reader(reinterpret_cast<const byte *> (f.data()), f.size(), fn); + while (equal && reader.hasNext() && (term < eterm)) { + reader.normalize(reader.next(), norm_mode); + size_t len = reader.complete(); + for (size_t i(0); i < len; i++) { + equal = (term[i] == fn[i]); } + term += len; } - if (equal && (term == eterm) && (qt.isPrefix() || (n == e))) { + if (equal && (term == eterm) && (qt.isPrefix() || ! reader.hasNext())) { addHit(qt,0); } } |