diff options
author | Alexey Chernyshev <aleksei@spotify.com> | 2022-03-10 16:33:07 +0100 |
---|---|---|
committer | Alexey Chernyshev <aleksei@spotify.com> | 2022-03-23 16:20:59 +0100 |
commit | d9805209e3b0e33be3c0cc454c4604043663c1c4 (patch) | |
tree | 7446c79f68acd8775233ace4d5a70058f90c8406 /searchlib | |
parent | a2b1e6654cabc90ddf7422e58adf641876e5201c (diff) |
Introducing fuzzy search
Diffstat (limited to 'searchlib')
31 files changed, 211 insertions, 9 deletions
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp index 4f037415b35..65de302ae04 100644 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -242,6 +242,12 @@ private: void testPrefixSearch(const AttributePtr & ptr); void testPrefixSearch(); + // test fuzzy search + void performFuzzySearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, TermType termType); + void testFuzzySearch(const AttributePtr & ptr); + void testFuzzySearch(); + // test that search is working after clear doc template <typename VectorType, typename ValueType> void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg, @@ -402,6 +408,7 @@ SearchContextTest::buildTermQuery(std::vector<char> & buffer, const vespalib::st switch (termType) { case TermType::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break; case TermType::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break; + case TermType::FUZZYTERM: buffer[p++] = ParseItem::ITEM_FUZZY; break; default: buffer[p++] = ParseItem::ITEM_TERM; break; @@ -1498,6 +1505,70 @@ SearchContextTest::testPrefixSearch() } } +//----------------------------------------------------------------------------- +// Test fuzzy search +//----------------------------------------------------------------------------- + +void +SearchContextTest::performFuzzySearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, TermType termType) +{ + performSearch(vec, term, expected, termType); +} + +void +SearchContextTest::testFuzzySearch(const AttributePtr & ptr) +{ + LOG(info, "testFuzzySearch: vector '%s'", ptr->getName().c_str()); + + auto & vec = dynamic_cast<StringAttribute &>(*ptr.get()); + + uint32_t numDocs = 2; + addDocs(*ptr.get(), numDocs); + + const char * strings [] = {"fuzzysearch", "FUZZYSEARCH"}; + const char * terms[][2] = { + {"fuzzysearch", "FUZZYSEARCH"}, + {"fuzzysearck", "FUZZYSEARCK"}, + {"fuzzysekkkk", "FUZZYSEKKKK"} + }; + + for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, strings[doc - 1])); + } + + ptr->commit(true); + + std::vector<DocSet> expected; + DocSet empty; + { + uint32_t docs[] = {1, 2}; + expected.emplace_back(docs, docs + 2); // normal search + } + { + uint32_t docs[] = {1, 2}; + expected.emplace_back(docs, docs + 2); // fuzzy search + } + + expected.emplace_back(); // results + + for (uint32_t i = 0; i < 3; ++i) { + for (uint32_t j = 0; j < 2; ++j) { + performFuzzySearch(vec, terms[i][j], expected[i], TermType::FUZZYTERM); + } + } +} + +void +SearchContextTest::testFuzzySearch() +{ + for (const auto & cfg : _stringCfg) { + testFuzzySearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + + template <typename VectorType, typename ValueType> void SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, @@ -2028,6 +2099,7 @@ SearchContextTest::Main() testPrefixSearch(); testSearchIteratorConformance(); testSearchIteratorUnpacking(); + testFuzzySearch(); TEST_DO(requireThatSearchIsWorkingAfterClearDoc()); TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc()); TEST_DO(requireThatSearchIsWorkingAfterUpdates()); diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index 8a17114057c..2f0f0d5a6ae 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -386,8 +386,8 @@ testSingleValue(Attribute & svsa, Config &cfg) TEST("testSingleValue") { EXPECT_EQUAL(24u, sizeof(AttributeVector::SearchContext)); - EXPECT_EQUAL(24u, sizeof(StringSearchHelper)); - EXPECT_EQUAL(56u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); + EXPECT_EQUAL(32u, sizeof(StringSearchHelper)); + EXPECT_EQUAL(64u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); { Config cfg(BasicType::STRING, CollectionType::SINGLE); SingleValueStringAttribute svsa("svsa", cfg); @@ -494,4 +494,20 @@ TEST("test cased regex match") { EXPECT_FALSE(helper.isMatch("xY")); } +TEST("test fuzzy match") { + QueryTermUCS4 xyz("xyz", QueryTermSimple::Type::FUZZYTERM); + StringSearchHelper helper(xyz, false); + EXPECT_FALSE(helper.isCased()); + EXPECT_FALSE(helper.isPrefix()); + EXPECT_FALSE(helper.isRegex()); + EXPECT_TRUE(helper.isFuzzy()); + EXPECT_TRUE(helper.isMatch("xyz")); + EXPECT_TRUE(helper.isMatch("xyza")); + EXPECT_TRUE(helper.isMatch("xyv")); + EXPECT_TRUE(helper.isMatch("xy")); + EXPECT_TRUE(helper.isMatch("x")); + EXPECT_TRUE(helper.isMatch("xvv")); + EXPECT_FALSE(helper.isMatch("vvv")); +} + TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/customtypevisitor_test.cpp b/searchlib/src/tests/query/customtypevisitor_test.cpp index 35280fb0bd8..0e8155e23c3 100644 --- a/searchlib/src/tests/query/customtypevisitor_test.cpp +++ b/searchlib/src/tests/query/customtypevisitor_test.cpp @@ -36,6 +36,7 @@ struct MyRangeTerm : InitTerm<RangeTerm> {}; struct MyStringTerm : InitTerm<StringTerm> {}; struct MySubstrTerm : InitTerm<SubstringTerm> {}; struct MySuffixTerm : InitTerm<SuffixTerm> {}; +struct MyFuzzyTerm : InitTerm<FuzzyTerm> {}; struct MyWeakAnd : WeakAnd { MyWeakAnd() : WeakAnd(1234, "view") {} }; struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm(0, "view", 0, Weight(42)) {} }; struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct(0, "view", 0, Weight(42)) {} }; @@ -65,6 +66,7 @@ struct MyQueryNodeTypes { typedef MyStringTerm StringTerm; typedef MySubstrTerm SubstringTerm; typedef MySuffixTerm SuffixTerm; + typedef MyFuzzyTerm FuzzyTerm; typedef MyWeakAnd WeakAnd; typedef MyWeightedSetTerm WeightedSetTerm; typedef MyDotProduct DotProduct; @@ -112,6 +114,7 @@ public: void visit(MyNearestNeighborTerm &) override { setVisited<MyNearestNeighborTerm>(); } void visit(MyTrue &) override { setVisited<MyTrue>(); } void visit(MyFalse &) override { setVisited<MyFalse>(); } + void visit(MyFuzzyTerm &) override { setVisited<MyFuzzyTerm>(); } }; template <class T> @@ -148,6 +151,7 @@ TEST("customtypevisitor_test") { requireThatNodeIsVisited<MyNearestNeighborTerm>(); requireThatNodeIsVisited<MyTrue>(); requireThatNodeIsVisited<MyFalse>(); + requireThatNodeIsVisited<MyFuzzyTerm>(); } } // namespace diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp index 9f73c1ff585..f770213e8e5 100644 --- a/searchlib/src/tests/query/query_visitor_test.cpp +++ b/searchlib/src/tests/query/query_visitor_test.cpp @@ -49,6 +49,7 @@ public: void visit(NearestNeighborTerm &) override { isVisited<NearestNeighborTerm>() = true; } void visit(TrueQueryNode &) override { isVisited<TrueQueryNode>() = true; } void visit(FalseQueryNode &) override { isVisited<FalseQueryNode>() = true; } + void visit(FuzzyTerm &) override { isVisited<FuzzyTerm>() = true; } }; template <class T> @@ -85,6 +86,7 @@ TEST("requireThatAllNodesCanBeVisited") { checkVisit<NearestNeighborTerm>(new SimpleNearestNeighborTerm("query_tensor", "doc_tensor", 0, Weight(0), 123, true, 321, 100100.25)); checkVisit<TrueQueryNode>(new SimpleTrue()); checkVisit<FalseQueryNode>(new SimpleFalse()); + checkVisit<FuzzyTerm>(new SimpleFuzzyTerm("t", "field", 0, Weight(0))); } } // namespace diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp index 93cfad27742..2ea566027c4 100644 --- a/searchlib/src/tests/query/querybuilder_test.cpp +++ b/searchlib/src/tests/query/querybuilder_test.cpp @@ -47,7 +47,7 @@ PredicateQueryTerm::UP getPredicateQueryTerm() { template <class NodeTypes> Node::UP createQueryTree() { QueryBuilder<NodeTypes> builder; - builder.addAnd(12); + builder.addAnd(13); { builder.addRank(2); { @@ -115,6 +115,7 @@ Node::UP createQueryTree() { builder.add_true_node(); builder.add_false_node(); } + builder.addFuzzyTerm(str[5], view[5], id[5], weight[5]); } Node::UP node = builder.build(); ASSERT_TRUE(node.get()); @@ -179,10 +180,11 @@ void checkQueryTreeTypes(Node *node) { typedef typename NodeTypes::RegExpTerm RegExpTerm; typedef typename NodeTypes::TrueQueryNode TrueNode; typedef typename NodeTypes::FalseQueryNode FalseNode; + typedef typename NodeTypes::FuzzyTerm FuzzyTerm; ASSERT_TRUE(node); auto* and_node = as_node<And>(node); - EXPECT_EQUAL(12u, and_node->getChildren().size()); + EXPECT_EQUAL(13u, and_node->getChildren().size()); auto* rank = as_node<Rank>(and_node->getChildren()[0]); EXPECT_EQUAL(2u, rank->getChildren().size()); @@ -306,6 +308,9 @@ void checkQueryTreeTypes(Node *node) { auto* false_node = as_node<FalseNode>(and_not->getChildren()[1]); EXPECT_TRUE(true_node); EXPECT_TRUE(false_node); + + auto* fuzzy_term = as_node<FuzzyTerm>(and_node->getChildren()[12]); + EXPECT_TRUE(checkTerm(fuzzy_term, str[5], view[5], id[5], weight[5])); } struct AbstractTypes { @@ -332,6 +337,7 @@ struct AbstractTypes { typedef search::query::RegExpTerm RegExpTerm; typedef search::query::TrueQueryNode TrueQueryNode; typedef search::query::FalseQueryNode FalseQueryNode; + typedef search::query::FuzzyTerm FuzzyTerm; }; // Builds a tree with simplequery and checks that the results have the @@ -427,6 +433,11 @@ struct MyNearestNeighborTerm : NearestNeighborTerm { }; struct MyTrue : TrueQueryNode {}; struct MyFalse : FalseQueryNode {}; +struct MyFuzzyTerm : FuzzyTerm { + MyFuzzyTerm(const Type &t, const string &f, int32_t i, Weight w) + : FuzzyTerm(t, f, i, w) { + } +}; struct MyQueryNodeTypes { typedef MyAnd And; @@ -454,6 +465,7 @@ struct MyQueryNodeTypes { typedef MyNearestNeighborTerm NearestNeighborTerm; typedef MyTrue TrueQueryNode; typedef MyFalse FalseQueryNode; + typedef MyFuzzyTerm FuzzyTerm; }; TEST("require that Custom Query Trees Can Be Built") { diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 906400f50a5..f14966dbfc8 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -779,6 +779,8 @@ public: n.get_distance_threshold(), getRequestContext().get_attribute_blueprint_params().nearest_neighbor_brute_force_limit)); } + + void visit(query::FuzzyTerm &n) override { visitTerm(n); } }; template <typename WS> diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index ca440c2a249..e28b576319f 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -125,6 +125,10 @@ StringTemplSearchContext(QueryTermSimpleUP qTerm, const AttrType & toBeSearched) vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm())); auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str()); lookupRange(comp, comp); + } else if (this->isFuzzy()) { + vespalib::string prefix(this->getFuzzy().getPrefix()); + auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str()); + lookupRange(comp, comp); } else { auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm()); lookupTerm(comp); diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index 97c2c7d2b63..a1d79c6131b 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -282,6 +282,10 @@ StringPostingSearchContext(QueryTermSimpleUP qTerm, bool useBitVector, const Att vespalib::string prefix(RegexpUtil::get_prefix(this->queryTerm()->getTerm())); auto comp = _enumStore.make_folded_comparator_prefix(prefix.c_str()); this->lookupRange(comp, comp); + } else if (this->isFuzzy()) { + vespalib::string prefix(this->getFuzzy().getPrefix()); + auto comp = _enumStore.make_folded_comparator_prefix(prefix.c_str()); + this->lookupRange(comp, comp); } else { auto comp = _enumStore.make_folded_comparator(this->queryTerm()->getTerm()); this->lookupTerm(comp); @@ -301,6 +305,8 @@ StringPostingSearchContext<BaseSC, AttrT, DataT>::useThis(const PostingListSearc : false; } else if ( this->isCased() ) { return this->isMatch(_enumStore.get_value(it.getKey().load_acquire())); + } else if (this->isFuzzy()) { + return this->getFuzzy().isMatch(_enumStore.get_value(it.getKey().load_acquire())); } return true; } diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp index 730ad1107a7..a6feadac724 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp @@ -61,6 +61,10 @@ SingleValueStringAttributeT<B>::StringTemplSearchContext::StringTemplSearchConte vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm())); auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str()); lookupRange(comp, comp); + } else if (this->isFuzzy()) { + vespalib::string prefix(this->getFuzzy().getPrefix()); + auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str()); + lookupRange(comp, comp); } else { auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm()); lookupTerm(comp); diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 6062c4f2096..9cccce4b19d 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -18,11 +18,13 @@ namespace search { StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased) : _regex(), + _fuzzy(), _term(), _termLen(), _isPrefix(term.isPrefix()), _isRegex(term.isRegex()), - _isCased(cased) + _isCased(cased), + _isFuzzy(term.isFuzzy()) { if (isRegex()) { if (isCased()) { @@ -33,6 +35,8 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased) } else if (isCased()) { _term._char = term.getTerm(); _termLen = term.getTermLen(); + } else if (isFuzzy()) { + _fuzzy = vespalib::Fuzzy::from_term(term.getTerm()); } else { term.term(_term._ucs4); } @@ -54,6 +58,9 @@ StringSearchHelper::isMatch(const char *src) const { int res = strncmp(_term._char, src, _termLen); return (res == 0) && (src[_termLen] == 0 || isPrefix()); } + if (__builtin_expect(isFuzzy(), false)) { + return getFuzzy().isMatch(src); + } vespalib::Utf8ReaderForZTS u8reader(src); uint32_t j = 0; uint32_t val; diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index 495427d3e45..175f56f8b45 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -10,6 +10,7 @@ #include <vespa/vespalib/regex/regex.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/vespalib/text/utf8.h> +#include <vespa/vespalib/fuzzy/fuzzy.h> #include <optional> namespace search { @@ -26,9 +27,12 @@ public: bool isPrefix() const { return _isPrefix; } bool isRegex() const { return _isRegex; } bool isCased() const { return _isCased; } + bool isFuzzy() const { return _isFuzzy; } const vespalib::Regex & getRegex() const { return _regex; } + const vespalib::Fuzzy & getFuzzy() const { return _fuzzy; } private: vespalib::Regex _regex; + vespalib::Fuzzy _fuzzy; union { const ucs4_t *_ucs4; const char *_char; @@ -37,6 +41,7 @@ private: bool _isPrefix; bool _isRegex; bool _isCased; + bool _isFuzzy; }; class ReaderBase; @@ -126,7 +131,9 @@ protected: bool isPrefix() const { return _helper.isPrefix(); } bool isRegex() const { return _helper.isRegex(); } bool isCased() const { return _helper.isCased(); } + bool isFuzzy() const { return _helper.isFuzzy(); } const vespalib::Regex & getRegex() const { return _helper.getRegex(); } + const vespalib::Fuzzy & getFuzzy() const { return _helper.getFuzzy(); } class CollectHitCount { public: diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index 7cde1102bc1..eb8054317dc 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -409,6 +409,7 @@ public: void visit(RegExpTerm &n) override { visitTerm(n); } void visit(PredicateQuery &n) override { not_supported(n); } void visit(NearestNeighborTerm &n) override { not_supported(n); } + void visit(FuzzyTerm &n) override { visitTerm(n); } }; Blueprint::UP diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp index 330320d5047..f8ad85859fa 100644 --- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp +++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp @@ -28,6 +28,7 @@ using index::IFieldLengthInspector; using index::IndexBuilder; using index::Schema; using index::SchemaUtil; +using query::FuzzyTerm; using query::LocationTerm; using query::NearestNeighborTerm; using query::Node; @@ -168,6 +169,7 @@ public: void visit(SubstringTerm &n) override { visitTerm(n); } void visit(SuffixTerm &n) override { visitTerm(n); } void visit(RegExpTerm &n) override { visitTerm(n); } + void visit(FuzzyTerm &n) override { visitTerm(n); } void visit(PredicateQuery &n) override { not_supported(n); } void visit(NearestNeighborTerm &n) override { not_supported(n); } diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h index 34ea692c370..0d665d1f04d 100644 --- a/searchlib/src/vespa/searchlib/parsequery/parse.h +++ b/searchlib/src/vespa/searchlib/parsequery/parse.h @@ -56,8 +56,9 @@ public: ITEM_GEO_LOCATION_TERM = 27, ITEM_TRUE = 28, ITEM_FALSE = 29, - ITEM_MAX = 30, // Indicates how long tables must be. - ITEM_UNDEF = 31, + ITEM_FUZZY = 30, + ITEM_MAX = 31, // Indicates how long tables must be. + ITEM_UNDEF = 32, }; /** A tag identifying the origin of this query node. diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp index aa13c93810a..85b55284b35 100644 --- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp +++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp @@ -170,6 +170,7 @@ bool SimpleQueryStackDumpIterator::readNext() { case ParseItem::ITEM_EXACTSTRINGTERM: case ParseItem::ITEM_SUFFIXTERM: case ParseItem::ITEM_REGEXP: + case ParseItem::ITEM_FUZZY: _curr_index_name = read_stringref(p); _curr_term = read_stringref(p); _currArity = 0; diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.h b/searchlib/src/vespa/searchlib/query/query_term_simple.h index 433ab7d56dd..0d5dd116826 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_simple.h +++ b/searchlib/src/vespa/searchlib/query/query_term_simple.h @@ -22,7 +22,8 @@ public: EXACTSTRINGTERM = 3, SUFFIXTERM = 4, REGEXP = 5, - GEO_LOCATION = 6 + GEO_LOCATION = 6, + FUZZYTERM = 7 }; template <typename N> @@ -61,6 +62,7 @@ public: bool isWord() const { return (_type == Type::WORD); } bool isRegex() const { return (_type == Type::REGEXP); } bool isGeoLoc() const { return (_type == Type::GEO_LOCATION); } + bool isFuzzy() const { return (_type == Type::FUZZYTERM); } bool empty() const { return _term.empty(); } virtual void visitMembers(vespalib::ObjectVisitor &visitor) const; vespalib::string getClassName() const; diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 77fc97913a4..6f126c7a3eb 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -86,6 +86,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor case ParseItem::ITEM_SUFFIXTERM: case ParseItem::ITEM_PURE_WEIGHTED_STRING: case ParseItem::ITEM_PURE_WEIGHTED_LONG: + case ParseItem::ITEM_FUZZY: { vespalib::string index = queryRep.getIndexName(); if (index.empty()) { @@ -116,6 +117,9 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor case ParseItem::ITEM_SUFFIXTERM: sTerm = TermType::SUFFIXTERM; break; + case ParseItem::ITEM_FUZZY: + sTerm = TermType::FUZZYTERM; + break; default: break; } diff --git a/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h index 9f29c34aa05..abc48db9d87 100644 --- a/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h +++ b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h @@ -52,6 +52,7 @@ public: virtual void visit(typename NodeTypes::NearestNeighborTerm &) = 0; virtual void visit(typename NodeTypes::TrueQueryNode &) = 0; virtual void visit(typename NodeTypes::FalseQueryNode &) = 0; + virtual void visit(typename NodeTypes::FuzzyTerm &) = 0; private: // Route QueryVisit requests to the correct custom type. @@ -81,6 +82,7 @@ private: typedef typename NodeTypes::NearestNeighborTerm TNearestNeighborTerm; typedef typename NodeTypes::TrueQueryNode TTrueQueryNode; typedef typename NodeTypes::FalseQueryNode TFalseQueryNode; + typedef typename NodeTypes::FuzzyTerm TFuzzyTerm; void visit(And &n) override { visit(static_cast<TAnd&>(n)); } void visit(AndNot &n) override { visit(static_cast<TAndNot&>(n)); } @@ -107,6 +109,7 @@ private: void visit(NearestNeighborTerm &n) override { visit(static_cast<TNearestNeighborTerm&>(n)); } void visit(TrueQueryNode &n) override { visit(static_cast<TTrueQueryNode&>(n)); } void visit(FalseQueryNode &n) override { visit(static_cast<TFalseQueryNode&>(n)); } + void visit(FuzzyTerm &n) override { visit(static_cast<TFuzzyTerm &>(n)); } }; } diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h index 9631e2afded..ee3a944cce1 100644 --- a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h +++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h @@ -220,6 +220,12 @@ create_nearest_neighbor_term(vespalib::stringref query_tensor_name, vespalib::st target_num_hits, allow_approximate, explore_additional_hits, distance_threshold); } +template <class NodeTypes> +typename NodeTypes::FuzzyTerm * +createFuzzyTerm(vespalib::stringref term, vespalib::stringref view, int32_t id, Weight weight) { + return new typename NodeTypes::FuzzyTerm(term, view, id, weight); +} + template <class NodeTypes> class QueryBuilder : public QueryBuilderBase { @@ -327,6 +333,10 @@ public: adjustWeight(weight); return addTerm(createRegExpTerm<NodeTypes>(term, view, id, weight)); } + typename NodeTypes::FuzzyTerm &addFuzzyTerm(stringref term, stringref view, int32_t id, Weight weight) { + adjustWeight(weight); + return addTerm(createFuzzyTerm<NodeTypes>(term, view, id, weight)); + } typename NodeTypes::NearestNeighborTerm &add_nearest_neighbor_term(stringref query_tensor_name, stringref field_name, int32_t id, Weight weight, uint32_t target_num_hits, bool allow_approximate, uint32_t explore_additional_hits, diff --git a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h index 3fb72f93b23..ecaee350b21 100644 --- a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h +++ b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h @@ -193,6 +193,12 @@ private: void visit(FalseQueryNode &) override { _builder.add_false_node(); } + + void visit(FuzzyTerm &node) override { + replicate(node, _builder.addFuzzyTerm( + node.getTerm(), node.getView(), + node.getId(), node.getWeight())); + } }; } diff --git a/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h index 02887975085..90faa25bd99 100644 --- a/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h +++ b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h @@ -29,6 +29,7 @@ class SameElement; class NearestNeighborTerm; class TrueQueryNode; class FalseQueryNode; +class FuzzyTerm; struct QueryVisitor { virtual ~QueryVisitor() {} @@ -58,6 +59,7 @@ struct QueryVisitor { virtual void visit(NearestNeighborTerm &) = 0; virtual void visit(TrueQueryNode &) = 0; virtual void visit(FalseQueryNode &) = 0; + virtual void visit(FuzzyTerm &) = 0; }; } diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp b/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp index cad97279b4c..e3cad4ed33a 100644 --- a/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp @@ -52,4 +52,6 @@ SimpleRegExpTerm::~SimpleRegExpTerm() = default; SimpleNearestNeighborTerm::~SimpleNearestNeighborTerm() = default; +SimpleFuzzyTerm::~SimpleFuzzyTerm() = default; + } diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.h b/searchlib/src/vespa/searchlib/query/tree/simplequery.h index 5047e072cb7..00dad2597ce 100644 --- a/searchlib/src/vespa/searchlib/query/tree/simplequery.h +++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.h @@ -152,7 +152,13 @@ struct SimpleNearestNeighborTerm : NearestNeighborTerm { {} ~SimpleNearestNeighborTerm() override; }; - +struct SimpleFuzzyTerm : FuzzyTerm { + SimpleFuzzyTerm(const Type &term, vespalib::stringref view, + int32_t id, Weight weight) + : FuzzyTerm(term, view, id, weight) { + } + ~SimpleFuzzyTerm() override; +}; struct SimpleQueryNodeTypes { using And = SimpleAnd; @@ -180,6 +186,7 @@ struct SimpleQueryNodeTypes { using PredicateQuery = SimplePredicateQuery; using RegExpTerm = SimpleRegExpTerm; using NearestNeighborTerm = SimpleNearestNeighborTerm; + using FuzzyTerm = SimpleFuzzyTerm; }; } diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp index d45a72d316a..f36410d1845 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp @@ -278,6 +278,10 @@ class QueryNodeConverter : public QueryVisitor { createTerm(node, ParseItem::ITEM_REGEXP); } + void visit(FuzzyTerm &node) override { + createTerm(node, ParseItem::ITEM_FUZZY); + } + void visit(NearestNeighborTerm &node) override { createTermNode(node, ParseItem::ITEM_NEAREST_NEIGHBOR); appendString(node.get_query_tensor_name()); diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h index 5a6f315205e..a5f3be3e618 100644 --- a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h +++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h @@ -197,6 +197,8 @@ private: t = &builder.addPredicateQuery(queryStack.getPredicateQueryTerm(), view, id, weight); } else if (type == ParseItem::ITEM_REGEXP) { t = &builder.addRegExpTerm(term, view, id, weight); + } else if (type == ParseItem::ITEM_FUZZY) { + t = &builder.addFuzzyTerm(term, view, id, weight); } else { vespalib::Issue::report("query builder: Unable to create query tree from stack dump. node type = %d.", type); } diff --git a/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h index fc3570f44d8..a6eae257afd 100644 --- a/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h +++ b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h @@ -32,6 +32,7 @@ class TemplateTermVisitor : public CustomTypeTermVisitor<NodeTypes> { void visit(typename NodeTypes::PredicateQuery &n) override { myVisit(n); } void visit(typename NodeTypes::RegExpTerm &n) override { myVisit(n); } void visit(typename NodeTypes::NearestNeighborTerm &n) override { myVisit(n); } + void visit(typename NodeTypes::FuzzyTerm &n) override { myVisit(n); } // Phrases are terms with children. This visitor will not visit // the phrase's children, unless this member function is diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp index dcf0533ff7a..6e889e76f21 100644 --- a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp +++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp @@ -24,6 +24,7 @@ RegExpTerm::~RegExpTerm() = default; WeightedSetTerm::~WeightedSetTerm() = default; DotProduct::~DotProduct() = default; WandTerm::~WandTerm() = default; +FuzzyTerm::~FuzzyTerm() = default; namespace { diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.h b/searchlib/src/vespa/searchlib/query/tree/termnodes.h index a728b674999..7aa867e25ed 100644 --- a/searchlib/src/vespa/searchlib/query/tree/termnodes.h +++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.h @@ -115,6 +115,18 @@ public: virtual ~RegExpTerm() = 0; }; +//----------------------------------------------------------------------------- + +class FuzzyTerm : public QueryNodeMixin<FuzzyTerm, StringBase> +{ +public: + FuzzyTerm(const Type &term, vespalib::stringref view, + int32_t id, Weight weight) + : QueryNodeMixinType(term, view, id, weight) + {} + virtual ~FuzzyTerm() = 0; +}; + /** * Term matching the K nearest neighbors in a multi-dimensional vector space. * diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h index 86cde64a197..30c7e1722fb 100644 --- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h +++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h @@ -73,6 +73,7 @@ public: void visit(query::SuffixTerm &n) override = 0; void visit(query::RegExpTerm &n) override = 0; void visit(query::NearestNeighborTerm &n) override = 0; + void visit(query::FuzzyTerm &n) override = 0; void visit(query::TrueQueryNode &) final override; void visit(query::FalseQueryNode &) final override; diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp index 519f6e81774..614f219cbcb 100644 --- a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp @@ -6,6 +6,7 @@ #include "create_blueprint_visitor_helper.h" #include <vespa/vespalib/objects/visit.h> +using search::query::FuzzyTerm; using search::query::LocationTerm; using search::query::NearestNeighborTerm; using search::query::Node; @@ -66,6 +67,7 @@ public: void visit(PredicateQuery &n) override { visitTerm(n); } void visit(RegExpTerm &n) override { visitTerm(n); } void visit(NearestNeighborTerm &n) override { visitTerm(n); } + void visit(FuzzyTerm &n) override { visitTerm(n); } }; template <class Map> diff --git a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp index 08c0280ee68..63bf16e6016 100644 --- a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp @@ -17,6 +17,7 @@ using search::query::AndNot; using search::query::DotProduct; using search::query::Equiv; using search::query::FalseQueryNode; +using search::query::FuzzyTerm; using search::query::LocationTerm; using search::query::Near; using search::query::NearestNeighborTerm; @@ -105,6 +106,7 @@ struct TermAsStringVisitor : public QueryVisitor { void visit(SubstringTerm &n) override {visitTerm(n); } void visit(SuffixTerm &n) override {visitTerm(n); } void visit(RegExpTerm &n) override {visitTerm(n); } + void visit(FuzzyTerm &n) override { visitTerm(n); } void visit(PredicateQuery &) override {illegalVisit(); } void visit(NearestNeighborTerm &) override { illegalVisit(); } void visit(TrueQueryNode &) override { illegalVisit(); } |