diff options
author | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-01-18 13:04:53 +0000 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@vespa.ai> | 2024-01-18 15:08:07 +0000 |
commit | dc973997098c239d71a57b1c692cb79b868ea8b8 (patch) | |
tree | a5613e6d687d6968924a845545d0f39cf238824c /searchlib/src/vespa/searchlib/query/streaming/querynode.cpp | |
parent | ab54f9c7cbd1bc3c1434717b875e1dfeb7b27dc4 (diff) |
Support fuzzy term matching in streaming search
Uses a DFA-based matcher for max edits in {1, 2} and falls back
to the legacy non-DFA matcher for all other values (including 0).
Currently only supports fuzzy matching across the full field
string, i.e. there's no implicit tokenization or whitespace
removal. This matches the semantics we currently have for fuzzy
search over attributes in a non-streaming case
Diffstat (limited to 'searchlib/src/vespa/searchlib/query/streaming/querynode.cpp')
-rw-r--r-- | searchlib/src/vespa/searchlib/query/streaming/querynode.cpp | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 2ee515f062a..e71529a8aca 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -1,7 +1,8 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "query.h" +#include "fuzzy_term.h" #include "nearest_neighbor_query_node.h" +#include "query.h" #include "regexp_term.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/searchlib/query/streaming/dot_product_term.h> @@ -147,17 +148,16 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor } else { Normalizing normalize_mode = factory.normalizing_mode(ssIndex); std::unique_ptr<QueryTerm> qt; - if (sTerm != TermType::REGEXP) { - qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode); - } else { + if (sTerm == TermType::REGEXP) { qt = std::make_unique<RegexpTerm>(factory.create(), ssTerm, ssIndex, TermType::REGEXP, normalize_mode); + } else if (sTerm == TermType::FUZZYTERM) { + qt = std::make_unique<FuzzyTerm>(factory.create(), ssTerm, ssIndex, TermType::FUZZYTERM, normalize_mode, + queryRep.getFuzzyMaxEditDistance(), queryRep.getFuzzyPrefixLength()); + } else [[likely]] { + qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode); } qt->setWeight(queryRep.GetWeight()); qt->setUniqueId(queryRep.getUniqueId()); - if (qt->isFuzzy()) { - qt->setFuzzyMaxEditDistance(queryRep.getFuzzyMaxEditDistance()); - qt->setFuzzyPrefixLength(queryRep.getFuzzyPrefixLength()); - } if (allowRewrite && possibleFloat(*qt, ssTerm) && factory.allow_float_terms_rewrite(ssIndex)) { auto phrase = std::make_unique<PhraseQueryNode>(); auto dotPos = ssTerm.find('.'); |