summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorAlexey Chernyshev <aleksei@spotify.com>2022-03-10 16:33:07 +0100
committerAlexey Chernyshev <aleksei@spotify.com>2022-03-23 16:20:59 +0100
commitd9805209e3b0e33be3c0cc454c4604043663c1c4 (patch)
tree7446c79f68acd8775233ace4d5a70058f90c8406 /searchlib
parenta2b1e6654cabc90ddf7422e58adf641876e5201c (diff)
Introducing fuzzy search
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp72
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp20
-rw-r--r--searchlib/src/tests/query/customtypevisitor_test.cpp4
-rw-r--r--searchlib/src/tests/query/query_visitor_test.cpp2
-rw-r--r--searchlib/src/tests/query/querybuilder_test.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h7
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/diskindex.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/parse.h5
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_simple.h4
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h3
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/querybuilder.h10
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/queryreplicator.h6
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/queryvisitor.h2
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/simplequery.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/simplequery.h9
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h2
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h1
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/termnodes.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/termnodes.h12
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/termasstring.cpp2
31 files changed, 211 insertions, 9 deletions
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
index 4f037415b35..65de302ae04 100644
--- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
@@ -242,6 +242,12 @@ private:
void testPrefixSearch(const AttributePtr & ptr);
void testPrefixSearch();
+ // test fuzzy search
+ void performFuzzySearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, TermType termType);
+ void testFuzzySearch(const AttributePtr & ptr);
+ void testFuzzySearch();
+
// test that search is working after clear doc
template <typename VectorType, typename ValueType>
void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg,
@@ -402,6 +408,7 @@ SearchContextTest::buildTermQuery(std::vector<char> & buffer, const vespalib::st
switch (termType) {
case TermType::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break;
case TermType::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break;
+ case TermType::FUZZYTERM: buffer[p++] = ParseItem::ITEM_FUZZY; break;
default:
buffer[p++] = ParseItem::ITEM_TERM;
break;
@@ -1498,6 +1505,70 @@ SearchContextTest::testPrefixSearch()
}
}
+//-----------------------------------------------------------------------------
+// Test fuzzy search
+//-----------------------------------------------------------------------------
+
+void
+SearchContextTest::performFuzzySearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, TermType termType)
+{
+ performSearch(vec, term, expected, termType);
+}
+
+void
+SearchContextTest::testFuzzySearch(const AttributePtr & ptr)
+{
+ LOG(info, "testFuzzySearch: vector '%s'", ptr->getName().c_str());
+
+ auto & vec = dynamic_cast<StringAttribute &>(*ptr.get());
+
+ uint32_t numDocs = 2;
+ addDocs(*ptr.get(), numDocs);
+
+ const char * strings [] = {"fuzzysearch", "FUZZYSEARCH"};
+ const char * terms[][2] = {
+ {"fuzzysearch", "FUZZYSEARCH"},
+ {"fuzzysearck", "FUZZYSEARCK"},
+ {"fuzzysekkkk", "FUZZYSEKKKK"}
+ };
+
+ for (uint32_t doc = 1; doc < numDocs + 1; ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc, strings[doc - 1]));
+ }
+
+ ptr->commit(true);
+
+ std::vector<DocSet> expected;
+ DocSet empty;
+ {
+ uint32_t docs[] = {1, 2};
+ expected.emplace_back(docs, docs + 2); // normal search
+ }
+ {
+ uint32_t docs[] = {1, 2};
+ expected.emplace_back(docs, docs + 2); // fuzzy search
+ }
+
+ expected.emplace_back(); // results
+
+ for (uint32_t i = 0; i < 3; ++i) {
+ for (uint32_t j = 0; j < 2; ++j) {
+ performFuzzySearch(vec, terms[i][j], expected[i], TermType::FUZZYTERM);
+ }
+ }
+}
+
+void
+SearchContextTest::testFuzzySearch()
+{
+ for (const auto & cfg : _stringCfg) {
+ testFuzzySearch(AttributeFactory::createAttribute(cfg.first, cfg.second));
+ }
+}
+
+
template <typename VectorType, typename ValueType>
void
SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name,
@@ -2028,6 +2099,7 @@ SearchContextTest::Main()
testPrefixSearch();
testSearchIteratorConformance();
testSearchIteratorUnpacking();
+ testFuzzySearch();
TEST_DO(requireThatSearchIsWorkingAfterClearDoc());
TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc());
TEST_DO(requireThatSearchIsWorkingAfterUpdates());
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index 8a17114057c..2f0f0d5a6ae 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -386,8 +386,8 @@ testSingleValue(Attribute & svsa, Config &cfg)
TEST("testSingleValue")
{
EXPECT_EQUAL(24u, sizeof(AttributeVector::SearchContext));
- EXPECT_EQUAL(24u, sizeof(StringSearchHelper));
- EXPECT_EQUAL(56u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext));
+ EXPECT_EQUAL(32u, sizeof(StringSearchHelper));
+ EXPECT_EQUAL(64u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext));
{
Config cfg(BasicType::STRING, CollectionType::SINGLE);
SingleValueStringAttribute svsa("svsa", cfg);
@@ -494,4 +494,20 @@ TEST("test cased regex match") {
EXPECT_FALSE(helper.isMatch("xY"));
}
+TEST("test fuzzy match") {
+ QueryTermUCS4 xyz("xyz", QueryTermSimple::Type::FUZZYTERM);
+ StringSearchHelper helper(xyz, false);
+ EXPECT_FALSE(helper.isCased());
+ EXPECT_FALSE(helper.isPrefix());
+ EXPECT_FALSE(helper.isRegex());
+ EXPECT_TRUE(helper.isFuzzy());
+ EXPECT_TRUE(helper.isMatch("xyz"));
+ EXPECT_TRUE(helper.isMatch("xyza"));
+ EXPECT_TRUE(helper.isMatch("xyv"));
+ EXPECT_TRUE(helper.isMatch("xy"));
+ EXPECT_TRUE(helper.isMatch("x"));
+ EXPECT_TRUE(helper.isMatch("xvv"));
+ EXPECT_FALSE(helper.isMatch("vvv"));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/query/customtypevisitor_test.cpp b/searchlib/src/tests/query/customtypevisitor_test.cpp
index 35280fb0bd8..0e8155e23c3 100644
--- a/searchlib/src/tests/query/customtypevisitor_test.cpp
+++ b/searchlib/src/tests/query/customtypevisitor_test.cpp
@@ -36,6 +36,7 @@ struct MyRangeTerm : InitTerm<RangeTerm> {};
struct MyStringTerm : InitTerm<StringTerm> {};
struct MySubstrTerm : InitTerm<SubstringTerm> {};
struct MySuffixTerm : InitTerm<SuffixTerm> {};
+struct MyFuzzyTerm : InitTerm<FuzzyTerm> {};
struct MyWeakAnd : WeakAnd { MyWeakAnd() : WeakAnd(1234, "view") {} };
struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm(0, "view", 0, Weight(42)) {} };
struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct(0, "view", 0, Weight(42)) {} };
@@ -65,6 +66,7 @@ struct MyQueryNodeTypes {
typedef MyStringTerm StringTerm;
typedef MySubstrTerm SubstringTerm;
typedef MySuffixTerm SuffixTerm;
+ typedef MyFuzzyTerm FuzzyTerm;
typedef MyWeakAnd WeakAnd;
typedef MyWeightedSetTerm WeightedSetTerm;
typedef MyDotProduct DotProduct;
@@ -112,6 +114,7 @@ public:
void visit(MyNearestNeighborTerm &) override { setVisited<MyNearestNeighborTerm>(); }
void visit(MyTrue &) override { setVisited<MyTrue>(); }
void visit(MyFalse &) override { setVisited<MyFalse>(); }
+ void visit(MyFuzzyTerm &) override { setVisited<MyFuzzyTerm>(); }
};
template <class T>
@@ -148,6 +151,7 @@ TEST("customtypevisitor_test") {
requireThatNodeIsVisited<MyNearestNeighborTerm>();
requireThatNodeIsVisited<MyTrue>();
requireThatNodeIsVisited<MyFalse>();
+ requireThatNodeIsVisited<MyFuzzyTerm>();
}
} // namespace
diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp
index 9f73c1ff585..f770213e8e5 100644
--- a/searchlib/src/tests/query/query_visitor_test.cpp
+++ b/searchlib/src/tests/query/query_visitor_test.cpp
@@ -49,6 +49,7 @@ public:
void visit(NearestNeighborTerm &) override { isVisited<NearestNeighborTerm>() = true; }
void visit(TrueQueryNode &) override { isVisited<TrueQueryNode>() = true; }
void visit(FalseQueryNode &) override { isVisited<FalseQueryNode>() = true; }
+ void visit(FuzzyTerm &) override { isVisited<FuzzyTerm>() = true; }
};
template <class T>
@@ -85,6 +86,7 @@ TEST("requireThatAllNodesCanBeVisited") {
checkVisit<NearestNeighborTerm>(new SimpleNearestNeighborTerm("query_tensor", "doc_tensor", 0, Weight(0), 123, true, 321, 100100.25));
checkVisit<TrueQueryNode>(new SimpleTrue());
checkVisit<FalseQueryNode>(new SimpleFalse());
+ checkVisit<FuzzyTerm>(new SimpleFuzzyTerm("t", "field", 0, Weight(0)));
}
} // namespace
diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp
index 93cfad27742..2ea566027c4 100644
--- a/searchlib/src/tests/query/querybuilder_test.cpp
+++ b/searchlib/src/tests/query/querybuilder_test.cpp
@@ -47,7 +47,7 @@ PredicateQueryTerm::UP getPredicateQueryTerm() {
template <class NodeTypes>
Node::UP createQueryTree() {
QueryBuilder<NodeTypes> builder;
- builder.addAnd(12);
+ builder.addAnd(13);
{
builder.addRank(2);
{
@@ -115,6 +115,7 @@ Node::UP createQueryTree() {
builder.add_true_node();
builder.add_false_node();
}
+ builder.addFuzzyTerm(str[5], view[5], id[5], weight[5]);
}
Node::UP node = builder.build();
ASSERT_TRUE(node.get());
@@ -179,10 +180,11 @@ void checkQueryTreeTypes(Node *node) {
typedef typename NodeTypes::RegExpTerm RegExpTerm;
typedef typename NodeTypes::TrueQueryNode TrueNode;
typedef typename NodeTypes::FalseQueryNode FalseNode;
+ typedef typename NodeTypes::FuzzyTerm FuzzyTerm;
ASSERT_TRUE(node);
auto* and_node = as_node<And>(node);
- EXPECT_EQUAL(12u, and_node->getChildren().size());
+ EXPECT_EQUAL(13u, and_node->getChildren().size());
auto* rank = as_node<Rank>(and_node->getChildren()[0]);
EXPECT_EQUAL(2u, rank->getChildren().size());
@@ -306,6 +308,9 @@ void checkQueryTreeTypes(Node *node) {
auto* false_node = as_node<FalseNode>(and_not->getChildren()[1]);
EXPECT_TRUE(true_node);
EXPECT_TRUE(false_node);
+
+ auto* fuzzy_term = as_node<FuzzyTerm>(and_node->getChildren()[12]);
+ EXPECT_TRUE(checkTerm(fuzzy_term, str[5], view[5], id[5], weight[5]));
}
struct AbstractTypes {
@@ -332,6 +337,7 @@ struct AbstractTypes {
typedef search::query::RegExpTerm RegExpTerm;
typedef search::query::TrueQueryNode TrueQueryNode;
typedef search::query::FalseQueryNode FalseQueryNode;
+ typedef search::query::FuzzyTerm FuzzyTerm;
};
// Builds a tree with simplequery and checks that the results have the
@@ -427,6 +433,11 @@ struct MyNearestNeighborTerm : NearestNeighborTerm {
};
struct MyTrue : TrueQueryNode {};
struct MyFalse : FalseQueryNode {};
+struct MyFuzzyTerm : FuzzyTerm {
+ MyFuzzyTerm(const Type &t, const string &f, int32_t i, Weight w)
+ : FuzzyTerm(t, f, i, w) {
+ }
+};
struct MyQueryNodeTypes {
typedef MyAnd And;
@@ -454,6 +465,7 @@ struct MyQueryNodeTypes {
typedef MyNearestNeighborTerm NearestNeighborTerm;
typedef MyTrue TrueQueryNode;
typedef MyFalse FalseQueryNode;
+ typedef MyFuzzyTerm FuzzyTerm;
};
TEST("require that Custom Query Trees Can Be Built") {
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 906400f50a5..f14966dbfc8 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -779,6 +779,8 @@ public:
n.get_distance_threshold(),
getRequestContext().get_attribute_blueprint_params().nearest_neighbor_brute_force_limit));
}
+
+ void visit(query::FuzzyTerm &n) override { visitTerm(n); }
};
template <typename WS>
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
index ca440c2a249..e28b576319f 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
@@ -125,6 +125,10 @@ StringTemplSearchContext(QueryTermSimpleUP qTerm, const AttrType & toBeSearched)
vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm()));
auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str());
lookupRange(comp, comp);
+ } else if (this->isFuzzy()) {
+ vespalib::string prefix(this->getFuzzy().getPrefix());
+ auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str());
+ lookupRange(comp, comp);
} else {
auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm());
lookupTerm(comp);
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index 97c2c7d2b63..a1d79c6131b 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -282,6 +282,10 @@ StringPostingSearchContext(QueryTermSimpleUP qTerm, bool useBitVector, const Att
vespalib::string prefix(RegexpUtil::get_prefix(this->queryTerm()->getTerm()));
auto comp = _enumStore.make_folded_comparator_prefix(prefix.c_str());
this->lookupRange(comp, comp);
+ } else if (this->isFuzzy()) {
+ vespalib::string prefix(this->getFuzzy().getPrefix());
+ auto comp = _enumStore.make_folded_comparator_prefix(prefix.c_str());
+ this->lookupRange(comp, comp);
} else {
auto comp = _enumStore.make_folded_comparator(this->queryTerm()->getTerm());
this->lookupTerm(comp);
@@ -301,6 +305,8 @@ StringPostingSearchContext<BaseSC, AttrT, DataT>::useThis(const PostingListSearc
: false;
} else if ( this->isCased() ) {
return this->isMatch(_enumStore.get_value(it.getKey().load_acquire()));
+ } else if (this->isFuzzy()) {
+ return this->getFuzzy().isMatch(_enumStore.get_value(it.getKey().load_acquire()));
}
return true;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
index 730ad1107a7..a6feadac724 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
@@ -61,6 +61,10 @@ SingleValueStringAttributeT<B>::StringTemplSearchContext::StringTemplSearchConte
vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm()));
auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str());
lookupRange(comp, comp);
+ } else if (this->isFuzzy()) {
+ vespalib::string prefix(this->getFuzzy().getPrefix());
+ auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str());
+ lookupRange(comp, comp);
} else {
auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm());
lookupTerm(comp);
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
index 6062c4f2096..9cccce4b19d 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
@@ -18,11 +18,13 @@ namespace search {
StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
: _regex(),
+ _fuzzy(),
_term(),
_termLen(),
_isPrefix(term.isPrefix()),
_isRegex(term.isRegex()),
- _isCased(cased)
+ _isCased(cased),
+ _isFuzzy(term.isFuzzy())
{
if (isRegex()) {
if (isCased()) {
@@ -33,6 +35,8 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
} else if (isCased()) {
_term._char = term.getTerm();
_termLen = term.getTermLen();
+ } else if (isFuzzy()) {
+ _fuzzy = vespalib::Fuzzy::from_term(term.getTerm());
} else {
term.term(_term._ucs4);
}
@@ -54,6 +58,9 @@ StringSearchHelper::isMatch(const char *src) const {
int res = strncmp(_term._char, src, _termLen);
return (res == 0) && (src[_termLen] == 0 || isPrefix());
}
+ if (__builtin_expect(isFuzzy(), false)) {
+ return getFuzzy().isMatch(src);
+ }
vespalib::Utf8ReaderForZTS u8reader(src);
uint32_t j = 0;
uint32_t val;
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index 495427d3e45..175f56f8b45 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -10,6 +10,7 @@
#include <vespa/vespalib/regex/regex.h>
#include <vespa/vespalib/text/lowercase.h>
#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/fuzzy/fuzzy.h>
#include <optional>
namespace search {
@@ -26,9 +27,12 @@ public:
bool isPrefix() const { return _isPrefix; }
bool isRegex() const { return _isRegex; }
bool isCased() const { return _isCased; }
+ bool isFuzzy() const { return _isFuzzy; }
const vespalib::Regex & getRegex() const { return _regex; }
+ const vespalib::Fuzzy & getFuzzy() const { return _fuzzy; }
private:
vespalib::Regex _regex;
+ vespalib::Fuzzy _fuzzy;
union {
const ucs4_t *_ucs4;
const char *_char;
@@ -37,6 +41,7 @@ private:
bool _isPrefix;
bool _isRegex;
bool _isCased;
+ bool _isFuzzy;
};
class ReaderBase;
@@ -126,7 +131,9 @@ protected:
bool isPrefix() const { return _helper.isPrefix(); }
bool isRegex() const { return _helper.isRegex(); }
bool isCased() const { return _helper.isCased(); }
+ bool isFuzzy() const { return _helper.isFuzzy(); }
const vespalib::Regex & getRegex() const { return _helper.getRegex(); }
+ const vespalib::Fuzzy & getFuzzy() const { return _helper.getFuzzy(); }
class CollectHitCount {
public:
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
index 7cde1102bc1..eb8054317dc 100644
--- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
@@ -409,6 +409,7 @@ public:
void visit(RegExpTerm &n) override { visitTerm(n); }
void visit(PredicateQuery &n) override { not_supported(n); }
void visit(NearestNeighborTerm &n) override { not_supported(n); }
+ void visit(FuzzyTerm &n) override { visitTerm(n); }
};
Blueprint::UP
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
index 330320d5047..f8ad85859fa 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
@@ -28,6 +28,7 @@ using index::IFieldLengthInspector;
using index::IndexBuilder;
using index::Schema;
using index::SchemaUtil;
+using query::FuzzyTerm;
using query::LocationTerm;
using query::NearestNeighborTerm;
using query::Node;
@@ -168,6 +169,7 @@ public:
void visit(SubstringTerm &n) override { visitTerm(n); }
void visit(SuffixTerm &n) override { visitTerm(n); }
void visit(RegExpTerm &n) override { visitTerm(n); }
+ void visit(FuzzyTerm &n) override { visitTerm(n); }
void visit(PredicateQuery &n) override { not_supported(n); }
void visit(NearestNeighborTerm &n) override { not_supported(n); }
diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h
index 34ea692c370..0d665d1f04d 100644
--- a/searchlib/src/vespa/searchlib/parsequery/parse.h
+++ b/searchlib/src/vespa/searchlib/parsequery/parse.h
@@ -56,8 +56,9 @@ public:
ITEM_GEO_LOCATION_TERM = 27,
ITEM_TRUE = 28,
ITEM_FALSE = 29,
- ITEM_MAX = 30, // Indicates how long tables must be.
- ITEM_UNDEF = 31,
+ ITEM_FUZZY = 30,
+ ITEM_MAX = 31, // Indicates how long tables must be.
+ ITEM_UNDEF = 32,
};
/** A tag identifying the origin of this query node.
diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
index aa13c93810a..85b55284b35 100644
--- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
+++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
@@ -170,6 +170,7 @@ bool SimpleQueryStackDumpIterator::readNext() {
case ParseItem::ITEM_EXACTSTRINGTERM:
case ParseItem::ITEM_SUFFIXTERM:
case ParseItem::ITEM_REGEXP:
+ case ParseItem::ITEM_FUZZY:
_curr_index_name = read_stringref(p);
_curr_term = read_stringref(p);
_currArity = 0;
diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.h b/searchlib/src/vespa/searchlib/query/query_term_simple.h
index 433ab7d56dd..0d5dd116826 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_simple.h
+++ b/searchlib/src/vespa/searchlib/query/query_term_simple.h
@@ -22,7 +22,8 @@ public:
EXACTSTRINGTERM = 3,
SUFFIXTERM = 4,
REGEXP = 5,
- GEO_LOCATION = 6
+ GEO_LOCATION = 6,
+ FUZZYTERM = 7
};
template <typename N>
@@ -61,6 +62,7 @@ public:
bool isWord() const { return (_type == Type::WORD); }
bool isRegex() const { return (_type == Type::REGEXP); }
bool isGeoLoc() const { return (_type == Type::GEO_LOCATION); }
+ bool isFuzzy() const { return (_type == Type::FUZZYTERM); }
bool empty() const { return _term.empty(); }
virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
vespalib::string getClassName() const;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
index 77fc97913a4..6f126c7a3eb 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
@@ -86,6 +86,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
case ParseItem::ITEM_SUFFIXTERM:
case ParseItem::ITEM_PURE_WEIGHTED_STRING:
case ParseItem::ITEM_PURE_WEIGHTED_LONG:
+ case ParseItem::ITEM_FUZZY:
{
vespalib::string index = queryRep.getIndexName();
if (index.empty()) {
@@ -116,6 +117,9 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
case ParseItem::ITEM_SUFFIXTERM:
sTerm = TermType::SUFFIXTERM;
break;
+ case ParseItem::ITEM_FUZZY:
+ sTerm = TermType::FUZZYTERM;
+ break;
default:
break;
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h
index 9f29c34aa05..abc48db9d87 100644
--- a/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h
+++ b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h
@@ -52,6 +52,7 @@ public:
virtual void visit(typename NodeTypes::NearestNeighborTerm &) = 0;
virtual void visit(typename NodeTypes::TrueQueryNode &) = 0;
virtual void visit(typename NodeTypes::FalseQueryNode &) = 0;
+ virtual void visit(typename NodeTypes::FuzzyTerm &) = 0;
private:
// Route QueryVisit requests to the correct custom type.
@@ -81,6 +82,7 @@ private:
typedef typename NodeTypes::NearestNeighborTerm TNearestNeighborTerm;
typedef typename NodeTypes::TrueQueryNode TTrueQueryNode;
typedef typename NodeTypes::FalseQueryNode TFalseQueryNode;
+ typedef typename NodeTypes::FuzzyTerm TFuzzyTerm;
void visit(And &n) override { visit(static_cast<TAnd&>(n)); }
void visit(AndNot &n) override { visit(static_cast<TAndNot&>(n)); }
@@ -107,6 +109,7 @@ private:
void visit(NearestNeighborTerm &n) override { visit(static_cast<TNearestNeighborTerm&>(n)); }
void visit(TrueQueryNode &n) override { visit(static_cast<TTrueQueryNode&>(n)); }
void visit(FalseQueryNode &n) override { visit(static_cast<TFalseQueryNode&>(n)); }
+ void visit(FuzzyTerm &n) override { visit(static_cast<TFuzzyTerm &>(n)); }
};
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
index 9631e2afded..ee3a944cce1 100644
--- a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
+++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
@@ -220,6 +220,12 @@ create_nearest_neighbor_term(vespalib::stringref query_tensor_name, vespalib::st
target_num_hits, allow_approximate, explore_additional_hits,
distance_threshold);
}
+template <class NodeTypes>
+typename NodeTypes::FuzzyTerm *
+createFuzzyTerm(vespalib::stringref term, vespalib::stringref view, int32_t id, Weight weight) {
+ return new typename NodeTypes::FuzzyTerm(term, view, id, weight);
+}
+
template <class NodeTypes>
class QueryBuilder : public QueryBuilderBase {
@@ -327,6 +333,10 @@ public:
adjustWeight(weight);
return addTerm(createRegExpTerm<NodeTypes>(term, view, id, weight));
}
+ typename NodeTypes::FuzzyTerm &addFuzzyTerm(stringref term, stringref view, int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createFuzzyTerm<NodeTypes>(term, view, id, weight));
+ }
typename NodeTypes::NearestNeighborTerm &add_nearest_neighbor_term(stringref query_tensor_name, stringref field_name,
int32_t id, Weight weight, uint32_t target_num_hits,
bool allow_approximate, uint32_t explore_additional_hits,
diff --git a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
index 3fb72f93b23..ecaee350b21 100644
--- a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
+++ b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
@@ -193,6 +193,12 @@ private:
void visit(FalseQueryNode &) override {
_builder.add_false_node();
}
+
+ void visit(FuzzyTerm &node) override {
+ replicate(node, _builder.addFuzzyTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h
index 02887975085..90faa25bd99 100644
--- a/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h
+++ b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h
@@ -29,6 +29,7 @@ class SameElement;
class NearestNeighborTerm;
class TrueQueryNode;
class FalseQueryNode;
+class FuzzyTerm;
struct QueryVisitor {
virtual ~QueryVisitor() {}
@@ -58,6 +59,7 @@ struct QueryVisitor {
virtual void visit(NearestNeighborTerm &) = 0;
virtual void visit(TrueQueryNode &) = 0;
virtual void visit(FalseQueryNode &) = 0;
+ virtual void visit(FuzzyTerm &) = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp b/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp
index cad97279b4c..e3cad4ed33a 100644
--- a/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp
+++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp
@@ -52,4 +52,6 @@ SimpleRegExpTerm::~SimpleRegExpTerm() = default;
SimpleNearestNeighborTerm::~SimpleNearestNeighborTerm() = default;
+SimpleFuzzyTerm::~SimpleFuzzyTerm() = default;
+
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.h b/searchlib/src/vespa/searchlib/query/tree/simplequery.h
index 5047e072cb7..00dad2597ce 100644
--- a/searchlib/src/vespa/searchlib/query/tree/simplequery.h
+++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.h
@@ -152,7 +152,13 @@ struct SimpleNearestNeighborTerm : NearestNeighborTerm {
{}
~SimpleNearestNeighborTerm() override;
};
-
+struct SimpleFuzzyTerm : FuzzyTerm {
+ SimpleFuzzyTerm(const Type &term, vespalib::stringref view,
+ int32_t id, Weight weight)
+ : FuzzyTerm(term, view, id, weight) {
+ }
+ ~SimpleFuzzyTerm() override;
+};
struct SimpleQueryNodeTypes {
using And = SimpleAnd;
@@ -180,6 +186,7 @@ struct SimpleQueryNodeTypes {
using PredicateQuery = SimplePredicateQuery;
using RegExpTerm = SimpleRegExpTerm;
using NearestNeighborTerm = SimpleNearestNeighborTerm;
+ using FuzzyTerm = SimpleFuzzyTerm;
};
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
index d45a72d316a..f36410d1845 100644
--- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
+++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
@@ -278,6 +278,10 @@ class QueryNodeConverter : public QueryVisitor {
createTerm(node, ParseItem::ITEM_REGEXP);
}
+ void visit(FuzzyTerm &node) override {
+ createTerm(node, ParseItem::ITEM_FUZZY);
+ }
+
void visit(NearestNeighborTerm &node) override {
createTermNode(node, ParseItem::ITEM_NEAREST_NEIGHBOR);
appendString(node.get_query_tensor_name());
diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
index 5a6f315205e..a5f3be3e618 100644
--- a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
+++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
@@ -197,6 +197,8 @@ private:
t = &builder.addPredicateQuery(queryStack.getPredicateQueryTerm(), view, id, weight);
} else if (type == ParseItem::ITEM_REGEXP) {
t = &builder.addRegExpTerm(term, view, id, weight);
+ } else if (type == ParseItem::ITEM_FUZZY) {
+ t = &builder.addFuzzyTerm(term, view, id, weight);
} else {
vespalib::Issue::report("query builder: Unable to create query tree from stack dump. node type = %d.", type);
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h
index fc3570f44d8..a6eae257afd 100644
--- a/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h
+++ b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h
@@ -32,6 +32,7 @@ class TemplateTermVisitor : public CustomTypeTermVisitor<NodeTypes> {
void visit(typename NodeTypes::PredicateQuery &n) override { myVisit(n); }
void visit(typename NodeTypes::RegExpTerm &n) override { myVisit(n); }
void visit(typename NodeTypes::NearestNeighborTerm &n) override { myVisit(n); }
+ void visit(typename NodeTypes::FuzzyTerm &n) override { myVisit(n); }
// Phrases are terms with children. This visitor will not visit
// the phrase's children, unless this member function is
diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp
index dcf0533ff7a..6e889e76f21 100644
--- a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp
+++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp
@@ -24,6 +24,7 @@ RegExpTerm::~RegExpTerm() = default;
WeightedSetTerm::~WeightedSetTerm() = default;
DotProduct::~DotProduct() = default;
WandTerm::~WandTerm() = default;
+FuzzyTerm::~FuzzyTerm() = default;
namespace {
diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.h b/searchlib/src/vespa/searchlib/query/tree/termnodes.h
index a728b674999..7aa867e25ed 100644
--- a/searchlib/src/vespa/searchlib/query/tree/termnodes.h
+++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.h
@@ -115,6 +115,18 @@ public:
virtual ~RegExpTerm() = 0;
};
+//-----------------------------------------------------------------------------
+
+class FuzzyTerm : public QueryNodeMixin<FuzzyTerm, StringBase>
+{
+public:
+ FuzzyTerm(const Type &term, vespalib::stringref view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight)
+ {}
+ virtual ~FuzzyTerm() = 0;
+};
+
/**
* Term matching the K nearest neighbors in a multi-dimensional vector space.
*
diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
index 86cde64a197..30c7e1722fb 100644
--- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
+++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
@@ -73,6 +73,7 @@ public:
void visit(query::SuffixTerm &n) override = 0;
void visit(query::RegExpTerm &n) override = 0;
void visit(query::NearestNeighborTerm &n) override = 0;
+ void visit(query::FuzzyTerm &n) override = 0;
void visit(query::TrueQueryNode &) final override;
void visit(query::FalseQueryNode &) final override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp
index 519f6e81774..614f219cbcb 100644
--- a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp
@@ -6,6 +6,7 @@
#include "create_blueprint_visitor_helper.h"
#include <vespa/vespalib/objects/visit.h>
+using search::query::FuzzyTerm;
using search::query::LocationTerm;
using search::query::NearestNeighborTerm;
using search::query::Node;
@@ -66,6 +67,7 @@ public:
void visit(PredicateQuery &n) override { visitTerm(n); }
void visit(RegExpTerm &n) override { visitTerm(n); }
void visit(NearestNeighborTerm &n) override { visitTerm(n); }
+ void visit(FuzzyTerm &n) override { visitTerm(n); }
};
template <class Map>
diff --git a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp
index 08c0280ee68..63bf16e6016 100644
--- a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp
@@ -17,6 +17,7 @@ using search::query::AndNot;
using search::query::DotProduct;
using search::query::Equiv;
using search::query::FalseQueryNode;
+using search::query::FuzzyTerm;
using search::query::LocationTerm;
using search::query::Near;
using search::query::NearestNeighborTerm;
@@ -105,6 +106,7 @@ struct TermAsStringVisitor : public QueryVisitor {
void visit(SubstringTerm &n) override {visitTerm(n); }
void visit(SuffixTerm &n) override {visitTerm(n); }
void visit(RegExpTerm &n) override {visitTerm(n); }
+ void visit(FuzzyTerm &n) override { visitTerm(n); }
void visit(PredicateQuery &) override {illegalVisit(); }
void visit(NearestNeighborTerm &) override { illegalVisit(); }
void visit(TrueQueryNode &) override { illegalVisit(); }