aboutsummaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-12 08:29:43 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-01-12 08:29:43 +0000
commit679087c481fbb1aa02b21162f5a96e3c9ce56abc (patch)
tree28bf2f0f6b7b89dd4c8f4c3fd94fa4d65e2319ce /streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
parent13d6a727e8c2c23d04e8bca980588abbd0424d69 (diff)
Also handle different normalization during query time.
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp')
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp22
1 files changed, 13 insertions, 9 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
index d9ac47a3431..5036e9bedb1 100644
--- a/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8stringfieldsearcherbase.cpp
@@ -55,22 +55,26 @@ UTF8StringFieldSearcherBase::matchTermRegular(const FieldRef & f, QueryTerm & qt
size_t
UTF8StringFieldSearcherBase::matchTermExact(const FieldRef & f, QueryTerm & qt)
{
- const byte * n = reinterpret_cast<const byte *> (f.data());
const cmptype_t * term;
termsize_t tsz = qt.term(term);
const cmptype_t * eterm = term+tsz;
- const byte * e = n + f.size();
+ if ( f.size() >= _buf->size()) {
+ _buf->reserve(f.size() + 1);
+ }
+ cmptype_t * fn = _buf->data();
if (tsz <= f.size()) {
bool equal(true);
- for (; equal && (n < e) && (term < eterm); term++) {
- if (*term < 0x80) {
- equal = (*term == Fast_NormalizeWordFolder::lowercase_ascii(*n++));
- } else {
- cmptype_t c = Fast_NormalizeWordFolder::lowercase(Fast_UnicodeUtil::GetUTF8CharNonAscii(n));
- equal = (*term == c);
+ Normalizing norm_mode = normalize_mode();
+ TokenizeReader reader(reinterpret_cast<const byte *> (f.data()), f.size(), fn);
+ while (equal && reader.hasNext() && (term < eterm)) {
+ reader.normalize(reader.next(), norm_mode);
+ size_t len = reader.complete();
+ for (size_t i(0); i < len; i++) {
+ equal = (term[i] == fn[i]);
}
+ term += len;
}
- if (equal && (term == eterm) && (qt.isPrefix() || (n == e))) {
+ if (equal && (term == eterm) && (qt.isPrefix() || ! reader.hasNext())) {
addHit(qt,0);
}
}