summaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp')
-rw-r--r--streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp59
1 files changed, 59 insertions, 0 deletions
diff --git a/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
new file mode 100644
index 00000000000..fd327d3a3df
--- /dev/null
+++ b/streamingvisitors/src/vespa/vsm/searcher/utf8substringsearcher.cpp
@@ -0,0 +1,59 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vsm/searcher/utf8substringsearcher.h>
+
+using search::byte;
+using search::streaming::QueryTerm;
+using search::streaming::QueryTermList;
+
+namespace vsm {
+
+std::unique_ptr<FieldSearcher>
+UTF8SubStringFieldSearcher::duplicate() const
+{
+ return std::make_unique<UTF8SubStringFieldSearcher>(*this);
+}
+
+size_t
+UTF8SubStringFieldSearcher::matchTerms(const FieldRef & f, const size_t mintsz)
+{
+ const byte * n = reinterpret_cast<const byte *> (f.data());
+ if ( f.size() >= _buf->size()) {
+ _buf->reserve(f.size() + 1);
+ }
+ cmptype_t * fntemp = &(*_buf.get())[0];
+ BufferWrapper wrapper(fntemp);
+ size_t fl = skipSeparators(n, f.size(), wrapper);
+ const cmptype_t * fn(fntemp);
+ const cmptype_t * fe = fn + fl;
+ const cmptype_t * fre = fe - mintsz;
+ termcount_t words(0);
+ for(words = 0; fn <= fre; ) {
+ for(QueryTermList::iterator it=_qtl.begin(), mt=_qtl.end(); it != mt; it++) {
+ QueryTerm & qt = **it;
+ const cmptype_t * term;
+ termsize_t tsz = qt.term(term);
+
+ const cmptype_t *tt=term, *et=term+tsz, *fnt=fn;
+ for (; (tt < et) && (*tt == *fnt); tt++, fnt++);
+ if (tt == et) {
+ addHit(qt, words);
+ }
+ }
+ if ( ! Fast_UnicodeUtil::IsWordChar(*fn++) ) {
+ words++;
+ for(; (fn < fre) && ! Fast_UnicodeUtil::IsWordChar(*fn); fn++ );
+ }
+ }
+
+ NEED_CHAR_STAT(addAnyUtf8Field(f.size()));
+ return words + 1; // we must also count the last word
+}
+
+size_t
+UTF8SubStringFieldSearcher::matchTerm(const FieldRef & f, QueryTerm & qt)
+{
+ return matchTermSubstring(f, qt);
+}
+
+}