summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--container-search/abi-spec.json25
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java106
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/Item.java3
-rw-r--r--container-search/src/main/java/com/yahoo/search/query/SelectParser.java98
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java118
-rw-r--r--container-search/src/main/java/com/yahoo/search/yql/YqlParser.java59
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java6
-rw-r--r--container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java25
-rw-r--r--container-search/src/test/java/com/yahoo/select/SelectTestCase.java6
-rw-r--r--docproc/abi-spec.json4
-rw-r--r--documentapi/src/vespa/documentapi/messagebus/policies/mirror_with_all.cpp3
-rw-r--r--searchcore/src/tests/proton/matching/query_test.cpp2
-rw-r--r--searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp1
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp1
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/querynodes.h2
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/same_element_builder.cpp1
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/termdatafromnode.cpp1
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp1
-rw-r--r--searchcorespi/src/vespa/searchcorespi/index/indexcollection.cpp1
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp72
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp20
-rw-r--r--searchlib/src/tests/query/customtypevisitor_test.cpp4
-rw-r--r--searchlib/src/tests/query/query_visitor_test.cpp2
-rw-r--r--searchlib/src/tests/query/querybuilder_test.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.cpp9
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h7
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/diskindex.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/parse.h5
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_simple.h4
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h3
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/querybuilder.h10
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/queryreplicator.h6
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/queryvisitor.h2
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/simplequery.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/simplequery.h9
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h2
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h1
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/termnodes.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/termnodes.h12
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h1
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/termasstring.cpp2
-rw-r--r--vespalib/CMakeLists.txt2
-rw-r--r--vespalib/src/tests/fuzzy/.gitignore4
-rw-r--r--vespalib/src/tests/fuzzy/CMakeLists.txt8
-rw-r--r--vespalib/src/tests/fuzzy/fuzzy.cpp33
-rw-r--r--vespalib/src/vespa/vespalib/CMakeLists.txt1
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt7
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/fuzzy.cpp144
-rw-r--r--vespalib/src/vespa/vespalib/fuzzy/fuzzy.h60
58 files changed, 695 insertions, 247 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 73d4b99b382..d634fe7795f 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -541,6 +541,30 @@
],
"fields": []
},
+ "com.yahoo.prelude.query.FuzzyItem": {
+ "superClass": "com.yahoo.prelude.query.TermItem",
+ "interfaces": [],
+ "attributes": [
+ "public"
+ ],
+ "methods": [
+ "public void <init>(java.lang.String, boolean, java.lang.String)",
+ "public void setValue(java.lang.String)",
+ "public java.lang.String getRawWord()",
+ "public boolean isWords()",
+ "public com.yahoo.prelude.query.Item$ItemType getItemType()",
+ "public java.lang.String getName()",
+ "public java.lang.String stringValue()",
+ "public boolean isStemmed()",
+ "public java.lang.String getIndexedString()",
+ "public int getNumWords()",
+ "public boolean equals(java.lang.Object)",
+ "public int hashCode()",
+ "public java.lang.String toString()",
+ "protected void encodeThis(java.nio.ByteBuffer)"
+ ],
+ "fields": []
+ },
"com.yahoo.prelude.query.GeoLocationItem": {
"superClass": "com.yahoo.prelude.query.TermItem",
"interfaces": [],
@@ -742,6 +766,7 @@
"public static final enum com.yahoo.prelude.query.Item$ItemType GEO_LOCATION_TERM",
"public static final enum com.yahoo.prelude.query.Item$ItemType TRUE",
"public static final enum com.yahoo.prelude.query.Item$ItemType FALSE",
+ "public static final enum com.yahoo.prelude.query.Item$ItemType FUZZY",
"public final int code"
]
},
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
new file mode 100644
index 00000000000..9a710deca67
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
@@ -0,0 +1,106 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.prelude.query;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Fuzzy search term
+ *
+ * @author alexeyche
+ */
+public class FuzzyItem extends TermItem {
+ private String fuzzyQuery;
+
+ public FuzzyItem(String indexName, boolean isFromQuery, String fuzzyQuery) {
+ super(indexName, isFromQuery, null);
+ setValue(fuzzyQuery);
+ }
+
+ @Override
+ public void setValue(String value) {
+ this.fuzzyQuery = value;
+ }
+
+ @Override
+ public String getRawWord() {
+ return stringValue();
+ }
+
+ @Override
+ public boolean isWords() {
+ return false;
+ }
+
+ @Override
+ public ItemType getItemType() {
+ return ItemType.FUZZY;
+ }
+
+ @Override
+ public String getName() {
+ return "FUZZY";
+ }
+
+ @Override
+ public String stringValue() {
+ return fuzzyQuery;
+ }
+
+ @Override
+ public boolean isStemmed() {
+ return false;
+ }
+
+ @Override
+ public String getIndexedString() {
+ return stringValue();
+ }
+
+ @Override
+ public int getNumWords() {
+ return 1;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (!super.equals(obj)) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ FuzzyItem other = (FuzzyItem) obj;
+ if (fuzzyQuery == null) {
+ if (other.fuzzyQuery != null) {
+ return false;
+ }
+ } else if (!fuzzyQuery.equals(other.fuzzyQuery)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + ((fuzzyQuery == null) ? 0 : fuzzyQuery.hashCode());
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append("FuzzyItem [fuzzyQuery=").append(fuzzyQuery).append("]");
+ return builder.toString();
+ }
+
+ protected void encodeThis(ByteBuffer buffer) {
+ super.encodeThis(buffer);
+ putString(getIndexedString(), buffer);
+ }
+}
+
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/Item.java b/container-search/src/main/java/com/yahoo/prelude/query/Item.java
index 2e0c3cf8593..02b208b6ce1 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/Item.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/Item.java
@@ -56,7 +56,8 @@ public abstract class Item implements Cloneable {
NEAREST_NEIGHBOR(26),
GEO_LOCATION_TERM(27),
TRUE(28),
- FALSE(29);
+ FALSE(29),
+ FUZZY(30);
public final int code;
diff --git a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
index 1805a11ff5e..6f7aff798a5 100644
--- a/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
+++ b/container-search/src/main/java/com/yahoo/search/query/SelectParser.java
@@ -2,6 +2,7 @@
package com.yahoo.search.query;
import com.google.common.base.Preconditions;
+import com.yahoo.prelude.query.*;
import com.yahoo.processing.IllegalInputException;
import com.yahoo.collections.LazyMap;
import com.yahoo.geo.DistanceParser;
@@ -10,38 +11,6 @@ import com.yahoo.language.Language;
import com.yahoo.language.process.Normalizer;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.Location;
-import com.yahoo.prelude.query.AndItem;
-import com.yahoo.prelude.query.BoolItem;
-import com.yahoo.prelude.query.CompositeItem;
-import com.yahoo.prelude.query.DotProductItem;
-import com.yahoo.prelude.query.EquivItem;
-import com.yahoo.prelude.query.ExactStringItem;
-import com.yahoo.prelude.query.IntItem;
-import com.yahoo.prelude.query.Item;
-import com.yahoo.prelude.query.Limit;
-import com.yahoo.prelude.query.GeoLocationItem;
-import com.yahoo.prelude.query.NearItem;
-import com.yahoo.prelude.query.NearestNeighborItem;
-import com.yahoo.prelude.query.NotItem;
-import com.yahoo.prelude.query.ONearItem;
-import com.yahoo.prelude.query.OrItem;
-import com.yahoo.prelude.query.PhraseItem;
-import com.yahoo.prelude.query.PredicateQueryItem;
-import com.yahoo.prelude.query.PrefixItem;
-import com.yahoo.prelude.query.RangeItem;
-import com.yahoo.prelude.query.RankItem;
-import com.yahoo.prelude.query.RegExpItem;
-import com.yahoo.prelude.query.SameElementItem;
-import com.yahoo.prelude.query.SegmentingRule;
-import com.yahoo.prelude.query.Substring;
-import com.yahoo.prelude.query.SubstringItem;
-import com.yahoo.prelude.query.SuffixItem;
-import com.yahoo.prelude.query.TaggableItem;
-import com.yahoo.prelude.query.WandItem;
-import com.yahoo.prelude.query.WeakAndItem;
-import com.yahoo.prelude.query.WeightedSetItem;
-import com.yahoo.prelude.query.WordAlternativesItem;
-import com.yahoo.prelude.query.WordItem;
import com.yahoo.search.grouping.request.GroupingOperation;
import com.yahoo.search.query.parser.Parsable;
import com.yahoo.search.query.parser.Parser;
@@ -60,65 +29,13 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
+import static com.yahoo.search.yql.YqlParser.*;
import static com.yahoo.slime.Type.ARRAY;
import static com.yahoo.slime.Type.DOUBLE;
import static com.yahoo.slime.Type.LONG;
import static com.yahoo.slime.Type.OBJECT;
import static com.yahoo.slime.Type.STRING;
-import static com.yahoo.search.yql.YqlParser.ACCENT_DROP;
-import static com.yahoo.search.yql.YqlParser.ALTERNATIVES;
-import static com.yahoo.search.yql.YqlParser.AND_SEGMENTING;
-import static com.yahoo.search.yql.YqlParser.ANNOTATIONS;
-import static com.yahoo.search.yql.YqlParser.APPROXIMATE;
-import static com.yahoo.search.yql.YqlParser.ASCENDING_HITS_ORDER;
-import static com.yahoo.search.yql.YqlParser.CONNECTION_ID;
-import static com.yahoo.search.yql.YqlParser.CONNECTION_WEIGHT;
-import static com.yahoo.search.yql.YqlParser.CONNECTIVITY;
-import static com.yahoo.search.yql.YqlParser.DEFAULT_TARGET_NUM_HITS;
-import static com.yahoo.search.yql.YqlParser.DESCENDING_HITS_ORDER;
-import static com.yahoo.search.yql.YqlParser.DISTANCE;
-import static com.yahoo.search.yql.YqlParser.DISTANCE_THRESHOLD;
-import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT;
-import static com.yahoo.search.yql.YqlParser.EQUIV;
-import static com.yahoo.search.yql.YqlParser.FILTER;
-import static com.yahoo.search.yql.YqlParser.GEO_LOCATION;
-import static com.yahoo.search.yql.YqlParser.HIT_LIMIT;
-import static com.yahoo.search.yql.YqlParser.HNSW_EXPLORE_ADDITIONAL_HITS;
-import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS;
-import static com.yahoo.search.yql.YqlParser.LABEL;
-import static com.yahoo.search.yql.YqlParser.NEAR;
-import static com.yahoo.search.yql.YqlParser.NEAREST_NEIGHBOR;
-import static com.yahoo.search.yql.YqlParser.NFKC;
-import static com.yahoo.search.yql.YqlParser.NORMALIZE_CASE;
-import static com.yahoo.search.yql.YqlParser.ONEAR;
-import static com.yahoo.search.yql.YqlParser.ORIGIN;
-import static com.yahoo.search.yql.YqlParser.ORIGIN_LENGTH;
-import static com.yahoo.search.yql.YqlParser.ORIGIN_OFFSET;
-import static com.yahoo.search.yql.YqlParser.ORIGIN_ORIGINAL;
-import static com.yahoo.search.yql.YqlParser.PHRASE;
-import static com.yahoo.search.yql.YqlParser.PREDICATE;
-import static com.yahoo.search.yql.YqlParser.PREFIX;
-import static com.yahoo.search.yql.YqlParser.RANGE;
-import static com.yahoo.search.yql.YqlParser.RANK;
-import static com.yahoo.search.yql.YqlParser.RANKED;
-import static com.yahoo.search.yql.YqlParser.SAME_ELEMENT;
-import static com.yahoo.search.yql.YqlParser.SCORE_THRESHOLD;
-import static com.yahoo.search.yql.YqlParser.SIGNIFICANCE;
-import static com.yahoo.search.yql.YqlParser.STEM;
-import static com.yahoo.search.yql.YqlParser.SUBSTRING;
-import static com.yahoo.search.yql.YqlParser.SUFFIX;
-import static com.yahoo.search.yql.YqlParser.TARGET_HITS;
-import static com.yahoo.search.yql.YqlParser.TARGET_NUM_HITS;
-import static com.yahoo.search.yql.YqlParser.THRESHOLD_BOOST_FACTOR;
-import static com.yahoo.search.yql.YqlParser.UNIQUE_ID;
-import static com.yahoo.search.yql.YqlParser.USE_POSITION_DATA;
-import static com.yahoo.search.yql.YqlParser.USER_INPUT_LANGUAGE;
-import static com.yahoo.search.yql.YqlParser.WAND;
-import static com.yahoo.search.yql.YqlParser.WEAK_AND;
-import static com.yahoo.search.yql.YqlParser.WEIGHT;
-import static com.yahoo.search.yql.YqlParser.WEIGHTED_SET;
-
/**
* The Select query language.
*
@@ -926,6 +843,8 @@ public class SelectParser implements Parser {
return instantiateONearItem(field, key, value);
case EQUIV:
return instantiateEquivItem(field, key, value);
+ case FUZZY:
+ return instantiateFuzzyItem(field, key, value);
case ALTERNATIVES:
return instantiateWordAlternativesItem(field, key, value);
default:
@@ -1155,6 +1074,15 @@ public class SelectParser implements Parser {
return leafStyleSettings(getAnnotations(value), equiv);
}
+ private Item instantiateFuzzyItem(String field, String key, Inspector value) {
+ HashMap<Integer, Inspector> children = childMap(value);
+ Preconditions.checkArgument(children.size() == 1, "Expected 1 argument, got %s.", children.size());
+ String wordData = children.get(0).asString();
+ FuzzyItem fuzzy = new FuzzyItem(field, true, wordData);
+
+ return leafStyleSettings(getAnnotations(value), fuzzy);
+ }
+
private Item instantiateWordAlternativesItem(String field, String key, Inspector value) {
HashMap<Integer, Inspector> children = childMap(value);
Preconditions.checkArgument(children.size() >= 1, "Expected 1 or more arguments, got %s.", children.size());
diff --git a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java
index cc441eb0c3d..afc903ba7cf 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/VespaSerializer.java
@@ -1,55 +1,6 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.yql;
-import static com.yahoo.search.yql.YqlParser.ACCENT_DROP;
-import static com.yahoo.search.yql.YqlParser.ALTERNATIVES;
-import static com.yahoo.search.yql.YqlParser.AND_SEGMENTING;
-import static com.yahoo.search.yql.YqlParser.BOUNDS;
-import static com.yahoo.search.yql.YqlParser.BOUNDS_LEFT_OPEN;
-import static com.yahoo.search.yql.YqlParser.BOUNDS_OPEN;
-import static com.yahoo.search.yql.YqlParser.BOUNDS_RIGHT_OPEN;
-import static com.yahoo.search.yql.YqlParser.CONNECTION_ID;
-import static com.yahoo.search.yql.YqlParser.CONNECTION_WEIGHT;
-import static com.yahoo.search.yql.YqlParser.CONNECTIVITY;
-import static com.yahoo.search.yql.YqlParser.DISTANCE;
-import static com.yahoo.search.yql.YqlParser.DOT_PRODUCT;
-import static com.yahoo.search.yql.YqlParser.END_ANCHOR;
-import static com.yahoo.search.yql.YqlParser.EQUIV;
-import static com.yahoo.search.yql.YqlParser.FILTER;
-import static com.yahoo.search.yql.YqlParser.GEO_LOCATION;
-import static com.yahoo.search.yql.YqlParser.HIT_LIMIT;
-import static com.yahoo.search.yql.YqlParser.IMPLICIT_TRANSFORMS;
-import static com.yahoo.search.yql.YqlParser.LABEL;
-import static com.yahoo.search.yql.YqlParser.NEAR;
-import static com.yahoo.search.yql.YqlParser.NEAREST_NEIGHBOR;
-import static com.yahoo.search.yql.YqlParser.NORMALIZE_CASE;
-import static com.yahoo.search.yql.YqlParser.ONEAR;
-import static com.yahoo.search.yql.YqlParser.ORIGIN;
-import static com.yahoo.search.yql.YqlParser.ORIGIN_LENGTH;
-import static com.yahoo.search.yql.YqlParser.ORIGIN_OFFSET;
-import static com.yahoo.search.yql.YqlParser.ORIGIN_ORIGINAL;
-import static com.yahoo.search.yql.YqlParser.PHRASE;
-import static com.yahoo.search.yql.YqlParser.PREFIX;
-import static com.yahoo.search.yql.YqlParser.RANGE;
-import static com.yahoo.search.yql.YqlParser.RANK;
-import static com.yahoo.search.yql.YqlParser.RANKED;
-import static com.yahoo.search.yql.YqlParser.SAME_ELEMENT;
-import static com.yahoo.search.yql.YqlParser.SCORE_THRESHOLD;
-import static com.yahoo.search.yql.YqlParser.SIGNIFICANCE;
-import static com.yahoo.search.yql.YqlParser.START_ANCHOR;
-import static com.yahoo.search.yql.YqlParser.STEM;
-import static com.yahoo.search.yql.YqlParser.SUBSTRING;
-import static com.yahoo.search.yql.YqlParser.SUFFIX;
-import static com.yahoo.search.yql.YqlParser.TARGET_NUM_HITS;
-import static com.yahoo.search.yql.YqlParser.THRESHOLD_BOOST_FACTOR;
-import static com.yahoo.search.yql.YqlParser.UNIQUE_ID;
-import static com.yahoo.search.yql.YqlParser.URI;
-import static com.yahoo.search.yql.YqlParser.USE_POSITION_DATA;
-import static com.yahoo.search.yql.YqlParser.WAND;
-import static com.yahoo.search.yql.YqlParser.WEAK_AND;
-import static com.yahoo.search.yql.YqlParser.WEIGHT;
-import static com.yahoo.search.yql.YqlParser.WEIGHTED_SET;
-
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
@@ -64,51 +15,15 @@ import java.util.Map;
import java.util.Map.Entry;
import com.google.common.collect.ImmutableMap;
-import com.yahoo.prelude.query.AndItem;
-import com.yahoo.prelude.query.AndSegmentItem;
-import com.yahoo.prelude.query.BoolItem;
-import com.yahoo.prelude.query.DotProductItem;
-import com.yahoo.prelude.query.EquivItem;
-import com.yahoo.prelude.query.FalseItem;
-import com.yahoo.prelude.query.ExactStringItem;
-import com.yahoo.prelude.query.IndexedItem;
-import com.yahoo.prelude.query.IntItem;
-import com.yahoo.prelude.query.Item;
-import com.yahoo.prelude.query.GeoLocationItem;
-import com.yahoo.prelude.query.MarkerWordItem;
-import com.yahoo.prelude.query.NearItem;
-import com.yahoo.prelude.query.NearestNeighborItem;
-import com.yahoo.prelude.query.NotItem;
-import com.yahoo.prelude.query.NullItem;
-import com.yahoo.prelude.query.ONearItem;
-import com.yahoo.prelude.query.OrItem;
-import com.yahoo.prelude.query.PhraseItem;
-import com.yahoo.prelude.query.PhraseSegmentItem;
-import com.yahoo.prelude.query.PredicateQueryItem;
-import com.yahoo.prelude.query.PrefixItem;
-import com.yahoo.prelude.query.RangeItem;
-import com.yahoo.prelude.query.RankItem;
-import com.yahoo.prelude.query.RegExpItem;
-import com.yahoo.prelude.query.SameElementItem;
-import com.yahoo.prelude.query.SegmentingRule;
-import com.yahoo.prelude.query.Substring;
-import com.yahoo.prelude.query.SubstringItem;
-import com.yahoo.prelude.query.SuffixItem;
-import com.yahoo.prelude.query.TaggableItem;
-import com.yahoo.prelude.query.ToolBox;
+import com.yahoo.prelude.query.*;
import com.yahoo.prelude.query.ToolBox.QueryVisitor;
-import com.yahoo.prelude.query.TrueItem;
-import com.yahoo.prelude.query.UriItem;
-import com.yahoo.prelude.query.WandItem;
-import com.yahoo.prelude.query.WeakAndItem;
-import com.yahoo.prelude.query.WeightedSetItem;
-import com.yahoo.prelude.query.WordAlternativesItem;
-import com.yahoo.prelude.query.WordItem;
import com.yahoo.search.Query;
import com.yahoo.search.grouping.Continuation;
import com.yahoo.search.grouping.GroupingRequest;
import com.yahoo.search.query.QueryTree;
+import static com.yahoo.search.yql.YqlParser.*;
+
/**
* Serialize Vespa query trees to YQL+ strings.
*
@@ -517,6 +432,32 @@ public class VespaSerializer {
}
}
+ private static class FuzzySerializer extends Serializer<FuzzyItem> {
+
+ @Override
+ void onExit(StringBuilder destination, FuzzyItem item) { }
+
+ @Override
+ boolean serialize(StringBuilder destination, FuzzyItem fuzzy) {
+ String annotations = leafAnnotations(fuzzy);
+ destination.append(normalizeIndexName(fuzzy.getIndexName())).append(" contains ");
+
+ if (annotations.length() > 0) {
+ destination.append('(').append(annotations);
+ }
+
+ destination.append(FUZZY).append('(');
+ destination.append('"');
+ escape(fuzzy.getIndexedString(), destination).append('"');
+ destination.append(')');
+
+ if (annotations.length() > 0) {
+ destination.append(')');
+ }
+ return false;
+ }
+ }
+
private static class ONearSerializer extends Serializer<ONearItem> {
@Override
@@ -1239,6 +1180,7 @@ public class VespaSerializer {
dispatchBuilder.put(WordItem.class, new WordSerializer());
dispatchBuilder.put(RegExpItem.class, new RegExpSerializer());
dispatchBuilder.put(UriItem.class, new UriSerializer());
+ dispatchBuilder.put(FuzzyItem.class, new FuzzySerializer());
dispatch = ImmutableMap.copyOf(dispatchBuilder);
}
diff --git a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
index 26508fec3c4..99a449026d8 100644
--- a/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
+++ b/container-search/src/main/java/com/yahoo/search/yql/YqlParser.java
@@ -27,48 +27,8 @@ import com.yahoo.language.process.Normalizer;
import com.yahoo.language.process.Segmenter;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.Location;
-import com.yahoo.prelude.query.AndItem;
-import com.yahoo.prelude.query.AndSegmentItem;
-import com.yahoo.prelude.query.BoolItem;
-import com.yahoo.prelude.query.CompositeItem;
-import com.yahoo.prelude.query.DotProductItem;
-import com.yahoo.prelude.query.EquivItem;
-import com.yahoo.prelude.query.FalseItem;
-import com.yahoo.prelude.query.ExactStringItem;
-import com.yahoo.prelude.query.IntItem;
-import com.yahoo.prelude.query.Item;
-import com.yahoo.prelude.query.Limit;
-import com.yahoo.prelude.query.GeoLocationItem;
-import com.yahoo.prelude.query.NearItem;
-import com.yahoo.prelude.query.NearestNeighborItem;
-import com.yahoo.prelude.query.NotItem;
-import com.yahoo.prelude.query.NullItem;
-import com.yahoo.prelude.query.ONearItem;
-import com.yahoo.prelude.query.OrItem;
-import com.yahoo.prelude.query.PhraseItem;
-import com.yahoo.prelude.query.PhraseSegmentItem;
-import com.yahoo.prelude.query.PredicateQueryItem;
-import com.yahoo.prelude.query.PrefixItem;
-import com.yahoo.prelude.query.RangeItem;
-import com.yahoo.prelude.query.RankItem;
-import com.yahoo.prelude.query.RegExpItem;
-import com.yahoo.prelude.query.SameElementItem;
-import com.yahoo.prelude.query.SegmentItem;
-import com.yahoo.prelude.query.SegmentingRule;
-import com.yahoo.prelude.query.Substring;
-import com.yahoo.prelude.query.SubstringItem;
-import com.yahoo.prelude.query.SuffixItem;
-import com.yahoo.prelude.query.TaggableItem;
-import com.yahoo.prelude.query.TermItem;
-import com.yahoo.prelude.query.ToolBox;
+import com.yahoo.prelude.query.*;
import com.yahoo.prelude.query.ToolBox.QueryVisitor;
-import com.yahoo.prelude.query.TrueItem;
-import com.yahoo.prelude.query.UriItem;
-import com.yahoo.prelude.query.WandItem;
-import com.yahoo.prelude.query.WeakAndItem;
-import com.yahoo.prelude.query.WeightedSetItem;
-import com.yahoo.prelude.query.WordAlternativesItem;
-import com.yahoo.prelude.query.WordItem;
import com.yahoo.processing.IllegalInputException;
import com.yahoo.search.Query;
import com.yahoo.search.grouping.Continuation;
@@ -192,6 +152,7 @@ public class YqlParser implements Parser {
public static final String WEAK_AND = "weakAnd";
public static final String WEIGHT = "weight";
public static final String WEIGHTED_SET = "weightedSet";
+ public static final String FUZZY = "fuzzy";
private final IndexFacts indexFacts;
private final List<ConnectedItem> connectedItems = new ArrayList<>();
@@ -1171,7 +1132,7 @@ public class YqlParser implements Parser {
assertHasOperator(ast, ExpressionOperator.CONTAINS);
String field = getIndex(ast.getArgument(0));
if (userQuery != null && indexFactsSession.getIndex(field).isAttribute()) {
- userQuery.trace("Field '" + field + "' is an attribute, 'contains' will only match exactly", 2);
+ userQuery.trace("Field '" + field + "' is an attribute, 'contains' will only match exactly (unless fuzzy query is used)", 2);
}
return instantiateLeafItem(field, ast.<OperatorNode<ExpressionOperator>> getArgument(1));
}
@@ -1298,11 +1259,23 @@ public class YqlParser implements Parser {
return instantiateWordAlternativesItem(field, ast);
case URI:
return instantiateUriItem(field, ast);
+ case FUZZY:
+ return instantiateFuzzyItem(field, ast);
default:
- throw newUnexpectedArgumentException(names.get(0), EQUIV, NEAR, ONEAR, PHRASE, SAME_ELEMENT, URI);
+ throw newUnexpectedArgumentException(names.get(0), EQUIV, NEAR, ONEAR, PHRASE, SAME_ELEMENT, URI, FUZZY);
}
}
+ private Item instantiateFuzzyItem(String field, OperatorNode<ExpressionOperator> ast) {
+ List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1);
+ Preconditions.checkArgument(args.size() == 1, "Expected 1 argument, got %s.", args.size());
+
+ String wordData = getStringContents(args.get(0));
+
+ FuzzyItem fuzzy = new FuzzyItem(field, true, wordData);
+ return leafStyleSettings(ast, fuzzy);
+ }
+
private Item instantiateEquivItem(String field, OperatorNode<ExpressionOperator> ast) {
List<OperatorNode<ExpressionOperator>> args = ast.getArgument(1);
Preconditions.checkArgument(args.size() >= 2, "Expected 2 or more arguments, got %s.", args.size());
diff --git a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
index 7057f996041..1269c2a5aef 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/VespaSerializerTestCase.java
@@ -443,4 +443,10 @@ public class VespaSerializerTestCase {
+ "alternatives({\"trees\": 1.0, \"tree\": 0.7}))"
+ ")");
}
+
+ @Test
+ public void testFuzzy() {
+ parseAndConfirm("foo contains fuzzy(\"a\")");
+ }
+
}
diff --git a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
index 15713dc1f97..a057d6f7c16 100644
--- a/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
+++ b/container-search/src/test/java/com/yahoo/search/yql/YqlParserTestCase.java
@@ -8,24 +8,7 @@ import com.yahoo.prelude.Index;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.IndexModel;
import com.yahoo.prelude.SearchDefinition;
-import com.yahoo.prelude.query.AndItem;
-import com.yahoo.prelude.query.BoolItem;
-import com.yahoo.prelude.query.ExactStringItem;
-import com.yahoo.prelude.query.IndexedItem;
-import com.yahoo.prelude.query.Item;
-import com.yahoo.prelude.query.MarkerWordItem;
-import com.yahoo.prelude.query.PhraseItem;
-import com.yahoo.prelude.query.PhraseSegmentItem;
-import com.yahoo.prelude.query.PrefixItem;
-import com.yahoo.prelude.query.QueryCanonicalizer;
-import com.yahoo.prelude.query.RegExpItem;
-import com.yahoo.prelude.query.SegmentingRule;
-import com.yahoo.prelude.query.Substring;
-import com.yahoo.prelude.query.SubstringItem;
-import com.yahoo.prelude.query.SuffixItem;
-import com.yahoo.prelude.query.WeakAndItem;
-import com.yahoo.prelude.query.WordAlternativesItem;
-import com.yahoo.prelude.query.WordItem;
+import com.yahoo.prelude.query.*;
import com.yahoo.prelude.querytransform.QueryRewrite;
import com.yahoo.processing.IllegalInputException;
import com.yahoo.search.Query;
@@ -382,6 +365,12 @@ public class YqlParserTestCase {
}
@Test
+ public void testFuzzy() {
+ assertParse("select foo from bar where baz contains fuzzy(\"a\")",
+ "FUZZY baz:a");
+ }
+
+ @Test
public void testStemming() {
assertTrue(getRootWord("select foo from bar where baz contains " +
"([ {stem: false} ]\"colors\")").isStemmed());
diff --git a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
index 0dcfb8392ef..1c8433541e1 100644
--- a/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
+++ b/container-search/src/test/java/com/yahoo/select/SelectTestCase.java
@@ -674,6 +674,12 @@ public class SelectTestCase {
checkWordAlternativesContent(alternatives);
}
+ @Test
+ public void testFuzzy() {
+ assertParse("{ \"contains\": [\"description\", { \"fuzzy\": [\"a\"] }] }",
+ "FUZZY description:a");
+ }
+
//------------------------------------------------------------------- grouping tests
@Test
diff --git a/docproc/abi-spec.json b/docproc/abi-spec.json
index e2bbfd8ec10..5ad2c6d454d 100644
--- a/docproc/abi-spec.json
+++ b/docproc/abi-spec.json
@@ -251,10 +251,10 @@
"methods": [
"public void <init>()",
"public abstract com.yahoo.docproc.DocumentProcessor$Progress process(com.yahoo.docproc.Processing)",
- "public java.lang.String toString()",
"public void setFieldMap(java.util.Map)",
"public java.util.Map getFieldMap()",
- "public java.util.Map getDocMap(java.lang.String)"
+ "public java.util.Map getDocMap(java.lang.String)",
+ "public java.lang.String toString()"
],
"fields": []
},
diff --git a/documentapi/src/vespa/documentapi/messagebus/policies/mirror_with_all.cpp b/documentapi/src/vespa/documentapi/messagebus/policies/mirror_with_all.cpp
index 4bcf87c14a6..9c67b3b3ef7 100644
--- a/documentapi/src/vespa/documentapi/messagebus/policies/mirror_with_all.cpp
+++ b/documentapi/src/vespa/documentapi/messagebus/policies/mirror_with_all.cpp
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <mirror_with_all.h>
+#include "mirror_with_all.h"
+
#include <vespa/slobrok/sbmirror.h>
#include <vespa/fnet/frt/supervisor.h>
#include <vespa/fnet/transport.h>
diff --git a/searchcore/src/tests/proton/matching/query_test.cpp b/searchcore/src/tests/proton/matching/query_test.cpp
index a09437be46b..fd3dd0da518 100644
--- a/searchcore/src/tests/proton/matching/query_test.cpp
+++ b/searchcore/src/tests/proton/matching/query_test.cpp
@@ -278,6 +278,7 @@ public:
void visit(ProtonSubstringTerm &n) override { checkNode(n, 0, true); }
void visit(ProtonSuffixTerm &n) override { checkNode(n, 2, false); }
void visit(ProtonPhrase &n) override { checkNode(n, 0, true); }
+ void visit(ProtonFuzzyTerm &n) override { checkNode(n, 1, false); }
void visit(ProtonWeightedSetTerm &) override {}
void visit(ProtonDotProduct &) override {}
void visit(ProtonWandTerm &) override {}
@@ -435,6 +436,7 @@ public:
void visit(ProtonPredicateQuery &) override {}
void visit(ProtonRegExpTerm &) override {}
void visit(ProtonNearestNeighborTerm &) override {}
+ void visit(ProtonFuzzyTerm &) override {}
};
void Test::requireThatTermDataIsFilledIn() {
diff --git a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp
index ffd08b3bbef..78486f54704 100644
--- a/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp
+++ b/searchcore/src/tests/proton/matching/unpacking_iterators_optimizer/unpacking_iterators_optimizer_test.cpp
@@ -69,6 +69,7 @@ struct DumpQuery : QueryVisitor {
void visit(NearestNeighborTerm &) override {}
void visit(TrueQueryNode &) override {}
void visit(FalseQueryNode &) override {}
+ void visit(FuzzyTerm &) override {}
};
std::string dump_query(Node &root) {
diff --git a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp
index 9be5920c0ed..92d7e385293 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp
@@ -165,6 +165,7 @@ protected:
void visit(ProtonFalse &) override {
_result = std::make_unique<EmptyBlueprint>();
}
+ void visit(ProtonFuzzyTerm &n) override { buildTerm(n); }
public:
BlueprintBuilderVisitor(const IRequestContext & requestContext, ISearchContext &context) :
diff --git a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h
index e7817dcecd2..d8060832d5d 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/querynodes.h
+++ b/searchcore/src/vespa/searchcore/proton/matching/querynodes.h
@@ -137,6 +137,7 @@ typedef ProtonTerm<search::query::WandTerm> ProtonWandTerm;
typedef ProtonTerm<search::query::PredicateQuery> ProtonPredicateQuery;
typedef ProtonTerm<search::query::RegExpTerm> ProtonRegExpTerm;
typedef ProtonTerm<search::query::NearestNeighborTerm> ProtonNearestNeighborTerm;
+typedef ProtonTerm<search::query::FuzzyTerm> ProtonFuzzyTerm;
struct ProtonNodeTypes {
typedef ProtonAnd And;
@@ -164,6 +165,7 @@ struct ProtonNodeTypes {
typedef ProtonNearestNeighborTerm NearestNeighborTerm;
typedef ProtonTrue TrueQueryNode;
typedef ProtonFalse FalseQueryNode;
+ typedef ProtonFuzzyTerm FuzzyTerm;
};
}
diff --git a/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.cpp b/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.cpp
index 34cb5369c1e..574df384a63 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/same_element_builder.cpp
@@ -69,6 +69,7 @@ public:
void visit(ProtonNearestNeighborTerm &) override {}
void visit(ProtonTrue &) override {}
void visit(ProtonFalse &) override {}
+ void visit(ProtonFuzzyTerm &n) override { visitTerm(n); }
};
} // namespace proton::matching::<unnamed>
diff --git a/searchcore/src/vespa/searchcore/proton/matching/termdatafromnode.cpp b/searchcore/src/vespa/searchcore/proton/matching/termdatafromnode.cpp
index c2abeaa36ee..a32107836ca 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/termdatafromnode.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/termdatafromnode.cpp
@@ -45,6 +45,7 @@ struct TermDataFromTermVisitor
void visit(ProtonPredicateQuery &) override { }
void visit(ProtonRegExpTerm &n) override { visitTerm(n); }
void visit(ProtonNearestNeighborTerm &n) override { visitTerm(n); }
+ void visit(ProtonFuzzyTerm &n) override { visitTerm(n); }
};
} // namespace
diff --git a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp
index 72a53a1a3d2..72e0153b5c2 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/unpacking_iterators_optimizer.cpp
@@ -59,6 +59,7 @@ struct TermExpander : QueryVisitor {
void visit(NearestNeighborTerm &) override {}
void visit(TrueQueryNode &) override {}
void visit(FalseQueryNode &) override {}
+ void visit(FuzzyTerm &) override {}
void flush(Intermediate &parent) {
for (Node::UP &term: terms) {
diff --git a/searchcorespi/src/vespa/searchcorespi/index/indexcollection.cpp b/searchcorespi/src/vespa/searchcorespi/index/indexcollection.cpp
index 5592215f9e2..d69f7d1b0a4 100644
--- a/searchcorespi/src/vespa/searchcorespi/index/indexcollection.cpp
+++ b/searchcorespi/src/vespa/searchcorespi/index/indexcollection.cpp
@@ -205,6 +205,7 @@ private:
void visit(PredicateQuery &n) override { visitTerm(n); }
void visit(RegExpTerm &n) override { visitTerm(n); }
void visit(NearestNeighborTerm &n) override { visitTerm(n); }
+ void visit(FuzzyTerm &n) override { visitTerm(n); }
public:
CreateBlueprintVisitor(const IIndexCollection &indexes,
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
index 4f037415b35..65de302ae04 100644
--- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
@@ -242,6 +242,12 @@ private:
void testPrefixSearch(const AttributePtr & ptr);
void testPrefixSearch();
+ // test fuzzy search
+ void performFuzzySearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, TermType termType);
+ void testFuzzySearch(const AttributePtr & ptr);
+ void testFuzzySearch();
+
// test that search is working after clear doc
template <typename VectorType, typename ValueType>
void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg,
@@ -402,6 +408,7 @@ SearchContextTest::buildTermQuery(std::vector<char> & buffer, const vespalib::st
switch (termType) {
case TermType::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break;
case TermType::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break;
+ case TermType::FUZZYTERM: buffer[p++] = ParseItem::ITEM_FUZZY; break;
default:
buffer[p++] = ParseItem::ITEM_TERM;
break;
@@ -1498,6 +1505,70 @@ SearchContextTest::testPrefixSearch()
}
}
+//-----------------------------------------------------------------------------
+// Test fuzzy search
+//-----------------------------------------------------------------------------
+
+void
+SearchContextTest::performFuzzySearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, TermType termType)
+{
+ performSearch(vec, term, expected, termType);
+}
+
+void
+SearchContextTest::testFuzzySearch(const AttributePtr & ptr)
+{
+ LOG(info, "testFuzzySearch: vector '%s'", ptr->getName().c_str());
+
+ auto & vec = dynamic_cast<StringAttribute &>(*ptr.get());
+
+ uint32_t numDocs = 2;
+ addDocs(*ptr.get(), numDocs);
+
+ const char * strings [] = {"fuzzysearch", "FUZZYSEARCH"};
+ const char * terms[][2] = {
+ {"fuzzysearch", "FUZZYSEARCH"},
+ {"fuzzysearck", "FUZZYSEARCK"},
+ {"fuzzysekkkk", "FUZZYSEKKKK"}
+ };
+
+ for (uint32_t doc = 1; doc < numDocs + 1; ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc, strings[doc - 1]));
+ }
+
+ ptr->commit(true);
+
+ std::vector<DocSet> expected;
+ DocSet empty;
+ {
+ uint32_t docs[] = {1, 2};
+ expected.emplace_back(docs, docs + 2); // normal search
+ }
+ {
+ uint32_t docs[] = {1, 2};
+ expected.emplace_back(docs, docs + 2); // fuzzy search
+ }
+
+ expected.emplace_back(); // results
+
+ for (uint32_t i = 0; i < 3; ++i) {
+ for (uint32_t j = 0; j < 2; ++j) {
+ performFuzzySearch(vec, terms[i][j], expected[i], TermType::FUZZYTERM);
+ }
+ }
+}
+
+void
+SearchContextTest::testFuzzySearch()
+{
+ for (const auto & cfg : _stringCfg) {
+ testFuzzySearch(AttributeFactory::createAttribute(cfg.first, cfg.second));
+ }
+}
+
+
template <typename VectorType, typename ValueType>
void
SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name,
@@ -2028,6 +2099,7 @@ SearchContextTest::Main()
testPrefixSearch();
testSearchIteratorConformance();
testSearchIteratorUnpacking();
+ testFuzzySearch();
TEST_DO(requireThatSearchIsWorkingAfterClearDoc());
TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc());
TEST_DO(requireThatSearchIsWorkingAfterUpdates());
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index 8a17114057c..2f0f0d5a6ae 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -386,8 +386,8 @@ testSingleValue(Attribute & svsa, Config &cfg)
TEST("testSingleValue")
{
EXPECT_EQUAL(24u, sizeof(AttributeVector::SearchContext));
- EXPECT_EQUAL(24u, sizeof(StringSearchHelper));
- EXPECT_EQUAL(56u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext));
+ EXPECT_EQUAL(32u, sizeof(StringSearchHelper));
+ EXPECT_EQUAL(64u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext));
{
Config cfg(BasicType::STRING, CollectionType::SINGLE);
SingleValueStringAttribute svsa("svsa", cfg);
@@ -494,4 +494,20 @@ TEST("test cased regex match") {
EXPECT_FALSE(helper.isMatch("xY"));
}
+TEST("test fuzzy match") {
+ QueryTermUCS4 xyz("xyz", QueryTermSimple::Type::FUZZYTERM);
+ StringSearchHelper helper(xyz, false);
+ EXPECT_FALSE(helper.isCased());
+ EXPECT_FALSE(helper.isPrefix());
+ EXPECT_FALSE(helper.isRegex());
+ EXPECT_TRUE(helper.isFuzzy());
+ EXPECT_TRUE(helper.isMatch("xyz"));
+ EXPECT_TRUE(helper.isMatch("xyza"));
+ EXPECT_TRUE(helper.isMatch("xyv"));
+ EXPECT_TRUE(helper.isMatch("xy"));
+ EXPECT_TRUE(helper.isMatch("x"));
+ EXPECT_TRUE(helper.isMatch("xvv"));
+ EXPECT_FALSE(helper.isMatch("vvv"));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/query/customtypevisitor_test.cpp b/searchlib/src/tests/query/customtypevisitor_test.cpp
index 35280fb0bd8..0e8155e23c3 100644
--- a/searchlib/src/tests/query/customtypevisitor_test.cpp
+++ b/searchlib/src/tests/query/customtypevisitor_test.cpp
@@ -36,6 +36,7 @@ struct MyRangeTerm : InitTerm<RangeTerm> {};
struct MyStringTerm : InitTerm<StringTerm> {};
struct MySubstrTerm : InitTerm<SubstringTerm> {};
struct MySuffixTerm : InitTerm<SuffixTerm> {};
+struct MyFuzzyTerm : InitTerm<FuzzyTerm> {};
struct MyWeakAnd : WeakAnd { MyWeakAnd() : WeakAnd(1234, "view") {} };
struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm(0, "view", 0, Weight(42)) {} };
struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct(0, "view", 0, Weight(42)) {} };
@@ -65,6 +66,7 @@ struct MyQueryNodeTypes {
typedef MyStringTerm StringTerm;
typedef MySubstrTerm SubstringTerm;
typedef MySuffixTerm SuffixTerm;
+ typedef MyFuzzyTerm FuzzyTerm;
typedef MyWeakAnd WeakAnd;
typedef MyWeightedSetTerm WeightedSetTerm;
typedef MyDotProduct DotProduct;
@@ -112,6 +114,7 @@ public:
void visit(MyNearestNeighborTerm &) override { setVisited<MyNearestNeighborTerm>(); }
void visit(MyTrue &) override { setVisited<MyTrue>(); }
void visit(MyFalse &) override { setVisited<MyFalse>(); }
+ void visit(MyFuzzyTerm &) override { setVisited<MyFuzzyTerm>(); }
};
template <class T>
@@ -148,6 +151,7 @@ TEST("customtypevisitor_test") {
requireThatNodeIsVisited<MyNearestNeighborTerm>();
requireThatNodeIsVisited<MyTrue>();
requireThatNodeIsVisited<MyFalse>();
+ requireThatNodeIsVisited<MyFuzzyTerm>();
}
} // namespace
diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp
index 9f73c1ff585..f770213e8e5 100644
--- a/searchlib/src/tests/query/query_visitor_test.cpp
+++ b/searchlib/src/tests/query/query_visitor_test.cpp
@@ -49,6 +49,7 @@ public:
void visit(NearestNeighborTerm &) override { isVisited<NearestNeighborTerm>() = true; }
void visit(TrueQueryNode &) override { isVisited<TrueQueryNode>() = true; }
void visit(FalseQueryNode &) override { isVisited<FalseQueryNode>() = true; }
+ void visit(FuzzyTerm &) override { isVisited<FuzzyTerm>() = true; }
};
template <class T>
@@ -85,6 +86,7 @@ TEST("requireThatAllNodesCanBeVisited") {
checkVisit<NearestNeighborTerm>(new SimpleNearestNeighborTerm("query_tensor", "doc_tensor", 0, Weight(0), 123, true, 321, 100100.25));
checkVisit<TrueQueryNode>(new SimpleTrue());
checkVisit<FalseQueryNode>(new SimpleFalse());
+ checkVisit<FuzzyTerm>(new SimpleFuzzyTerm("t", "field", 0, Weight(0)));
}
} // namespace
diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp
index 93cfad27742..2ea566027c4 100644
--- a/searchlib/src/tests/query/querybuilder_test.cpp
+++ b/searchlib/src/tests/query/querybuilder_test.cpp
@@ -47,7 +47,7 @@ PredicateQueryTerm::UP getPredicateQueryTerm() {
template <class NodeTypes>
Node::UP createQueryTree() {
QueryBuilder<NodeTypes> builder;
- builder.addAnd(12);
+ builder.addAnd(13);
{
builder.addRank(2);
{
@@ -115,6 +115,7 @@ Node::UP createQueryTree() {
builder.add_true_node();
builder.add_false_node();
}
+ builder.addFuzzyTerm(str[5], view[5], id[5], weight[5]);
}
Node::UP node = builder.build();
ASSERT_TRUE(node.get());
@@ -179,10 +180,11 @@ void checkQueryTreeTypes(Node *node) {
typedef typename NodeTypes::RegExpTerm RegExpTerm;
typedef typename NodeTypes::TrueQueryNode TrueNode;
typedef typename NodeTypes::FalseQueryNode FalseNode;
+ typedef typename NodeTypes::FuzzyTerm FuzzyTerm;
ASSERT_TRUE(node);
auto* and_node = as_node<And>(node);
- EXPECT_EQUAL(12u, and_node->getChildren().size());
+ EXPECT_EQUAL(13u, and_node->getChildren().size());
auto* rank = as_node<Rank>(and_node->getChildren()[0]);
EXPECT_EQUAL(2u, rank->getChildren().size());
@@ -306,6 +308,9 @@ void checkQueryTreeTypes(Node *node) {
auto* false_node = as_node<FalseNode>(and_not->getChildren()[1]);
EXPECT_TRUE(true_node);
EXPECT_TRUE(false_node);
+
+ auto* fuzzy_term = as_node<FuzzyTerm>(and_node->getChildren()[12]);
+ EXPECT_TRUE(checkTerm(fuzzy_term, str[5], view[5], id[5], weight[5]));
}
struct AbstractTypes {
@@ -332,6 +337,7 @@ struct AbstractTypes {
typedef search::query::RegExpTerm RegExpTerm;
typedef search::query::TrueQueryNode TrueQueryNode;
typedef search::query::FalseQueryNode FalseQueryNode;
+ typedef search::query::FuzzyTerm FuzzyTerm;
};
// Builds a tree with simplequery and checks that the results have the
@@ -427,6 +433,11 @@ struct MyNearestNeighborTerm : NearestNeighborTerm {
};
struct MyTrue : TrueQueryNode {};
struct MyFalse : FalseQueryNode {};
+struct MyFuzzyTerm : FuzzyTerm {
+ MyFuzzyTerm(const Type &t, const string &f, int32_t i, Weight w)
+ : FuzzyTerm(t, f, i, w) {
+ }
+};
struct MyQueryNodeTypes {
typedef MyAnd And;
@@ -454,6 +465,7 @@ struct MyQueryNodeTypes {
typedef MyNearestNeighborTerm NearestNeighborTerm;
typedef MyTrue TrueQueryNode;
typedef MyFalse FalseQueryNode;
+ typedef MyFuzzyTerm FuzzyTerm;
};
TEST("require that Custom Query Trees Can Be Built") {
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 906400f50a5..f14966dbfc8 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -779,6 +779,8 @@ public:
n.get_distance_threshold(),
getRequestContext().get_attribute_blueprint_params().nearest_neighbor_brute_force_limit));
}
+
+ void visit(query::FuzzyTerm &n) override { visitTerm(n); }
};
template <typename WS>
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
index ca440c2a249..e28b576319f 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
@@ -125,6 +125,10 @@ StringTemplSearchContext(QueryTermSimpleUP qTerm, const AttrType & toBeSearched)
vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm()));
auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str());
lookupRange(comp, comp);
+ } else if (this->isFuzzy()) {
+ vespalib::string prefix(this->getFuzzy().getPrefix());
+ auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str());
+ lookupRange(comp, comp);
} else {
auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm());
lookupTerm(comp);
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index 97c2c7d2b63..a1d79c6131b 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -282,6 +282,10 @@ StringPostingSearchContext(QueryTermSimpleUP qTerm, bool useBitVector, const Att
vespalib::string prefix(RegexpUtil::get_prefix(this->queryTerm()->getTerm()));
auto comp = _enumStore.make_folded_comparator_prefix(prefix.c_str());
this->lookupRange(comp, comp);
+ } else if (this->isFuzzy()) {
+ vespalib::string prefix(this->getFuzzy().getPrefix());
+ auto comp = _enumStore.make_folded_comparator_prefix(prefix.c_str());
+ this->lookupRange(comp, comp);
} else {
auto comp = _enumStore.make_folded_comparator(this->queryTerm()->getTerm());
this->lookupTerm(comp);
@@ -301,6 +305,8 @@ StringPostingSearchContext<BaseSC, AttrT, DataT>::useThis(const PostingListSearc
: false;
} else if ( this->isCased() ) {
return this->isMatch(_enumStore.get_value(it.getKey().load_acquire()));
+ } else if (this->isFuzzy()) {
+ return this->getFuzzy().isMatch(_enumStore.get_value(it.getKey().load_acquire()));
}
return true;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
index 730ad1107a7..a6feadac724 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
@@ -61,6 +61,10 @@ SingleValueStringAttributeT<B>::StringTemplSearchContext::StringTemplSearchConte
vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm()));
auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str());
lookupRange(comp, comp);
+ } else if (this->isFuzzy()) {
+ vespalib::string prefix(this->getFuzzy().getPrefix());
+ auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str());
+ lookupRange(comp, comp);
} else {
auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm());
lookupTerm(comp);
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
index 6062c4f2096..9cccce4b19d 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
@@ -18,11 +18,13 @@ namespace search {
StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
: _regex(),
+ _fuzzy(),
_term(),
_termLen(),
_isPrefix(term.isPrefix()),
_isRegex(term.isRegex()),
- _isCased(cased)
+ _isCased(cased),
+ _isFuzzy(term.isFuzzy())
{
if (isRegex()) {
if (isCased()) {
@@ -33,6 +35,8 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
} else if (isCased()) {
_term._char = term.getTerm();
_termLen = term.getTermLen();
+ } else if (isFuzzy()) {
+ _fuzzy = vespalib::Fuzzy::from_term(term.getTerm());
} else {
term.term(_term._ucs4);
}
@@ -54,6 +58,9 @@ StringSearchHelper::isMatch(const char *src) const {
int res = strncmp(_term._char, src, _termLen);
return (res == 0) && (src[_termLen] == 0 || isPrefix());
}
+ if (__builtin_expect(isFuzzy(), false)) {
+ return getFuzzy().isMatch(src);
+ }
vespalib::Utf8ReaderForZTS u8reader(src);
uint32_t j = 0;
uint32_t val;
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index 495427d3e45..175f56f8b45 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -10,6 +10,7 @@
#include <vespa/vespalib/regex/regex.h>
#include <vespa/vespalib/text/lowercase.h>
#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/fuzzy/fuzzy.h>
#include <optional>
namespace search {
@@ -26,9 +27,12 @@ public:
bool isPrefix() const { return _isPrefix; }
bool isRegex() const { return _isRegex; }
bool isCased() const { return _isCased; }
+ bool isFuzzy() const { return _isFuzzy; }
const vespalib::Regex & getRegex() const { return _regex; }
+ const vespalib::Fuzzy & getFuzzy() const { return _fuzzy; }
private:
vespalib::Regex _regex;
+ vespalib::Fuzzy _fuzzy;
union {
const ucs4_t *_ucs4;
const char *_char;
@@ -37,6 +41,7 @@ private:
bool _isPrefix;
bool _isRegex;
bool _isCased;
+ bool _isFuzzy;
};
class ReaderBase;
@@ -126,7 +131,9 @@ protected:
bool isPrefix() const { return _helper.isPrefix(); }
bool isRegex() const { return _helper.isRegex(); }
bool isCased() const { return _helper.isCased(); }
+ bool isFuzzy() const { return _helper.isFuzzy(); }
const vespalib::Regex & getRegex() const { return _helper.getRegex(); }
+ const vespalib::Fuzzy & getFuzzy() const { return _helper.getFuzzy(); }
class CollectHitCount {
public:
diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
index 7cde1102bc1..eb8054317dc 100644
--- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp
@@ -409,6 +409,7 @@ public:
void visit(RegExpTerm &n) override { visitTerm(n); }
void visit(PredicateQuery &n) override { not_supported(n); }
void visit(NearestNeighborTerm &n) override { not_supported(n); }
+ void visit(FuzzyTerm &n) override { visitTerm(n); }
};
Blueprint::UP
diff --git a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
index 330320d5047..f8ad85859fa 100644
--- a/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
+++ b/searchlib/src/vespa/searchlib/memoryindex/memory_index.cpp
@@ -28,6 +28,7 @@ using index::IFieldLengthInspector;
using index::IndexBuilder;
using index::Schema;
using index::SchemaUtil;
+using query::FuzzyTerm;
using query::LocationTerm;
using query::NearestNeighborTerm;
using query::Node;
@@ -168,6 +169,7 @@ public:
void visit(SubstringTerm &n) override { visitTerm(n); }
void visit(SuffixTerm &n) override { visitTerm(n); }
void visit(RegExpTerm &n) override { visitTerm(n); }
+ void visit(FuzzyTerm &n) override { visitTerm(n); }
void visit(PredicateQuery &n) override { not_supported(n); }
void visit(NearestNeighborTerm &n) override { not_supported(n); }
diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h
index 34ea692c370..0d665d1f04d 100644
--- a/searchlib/src/vespa/searchlib/parsequery/parse.h
+++ b/searchlib/src/vespa/searchlib/parsequery/parse.h
@@ -56,8 +56,9 @@ public:
ITEM_GEO_LOCATION_TERM = 27,
ITEM_TRUE = 28,
ITEM_FALSE = 29,
- ITEM_MAX = 30, // Indicates how long tables must be.
- ITEM_UNDEF = 31,
+ ITEM_FUZZY = 30,
+ ITEM_MAX = 31, // Indicates how long tables must be.
+ ITEM_UNDEF = 32,
};
/** A tag identifying the origin of this query node.
diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
index aa13c93810a..85b55284b35 100644
--- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
+++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.cpp
@@ -170,6 +170,7 @@ bool SimpleQueryStackDumpIterator::readNext() {
case ParseItem::ITEM_EXACTSTRINGTERM:
case ParseItem::ITEM_SUFFIXTERM:
case ParseItem::ITEM_REGEXP:
+ case ParseItem::ITEM_FUZZY:
_curr_index_name = read_stringref(p);
_curr_term = read_stringref(p);
_currArity = 0;
diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.h b/searchlib/src/vespa/searchlib/query/query_term_simple.h
index 433ab7d56dd..0d5dd116826 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_simple.h
+++ b/searchlib/src/vespa/searchlib/query/query_term_simple.h
@@ -22,7 +22,8 @@ public:
EXACTSTRINGTERM = 3,
SUFFIXTERM = 4,
REGEXP = 5,
- GEO_LOCATION = 6
+ GEO_LOCATION = 6,
+ FUZZYTERM = 7
};
template <typename N>
@@ -61,6 +62,7 @@ public:
bool isWord() const { return (_type == Type::WORD); }
bool isRegex() const { return (_type == Type::REGEXP); }
bool isGeoLoc() const { return (_type == Type::GEO_LOCATION); }
+ bool isFuzzy() const { return (_type == Type::FUZZYTERM); }
bool empty() const { return _term.empty(); }
virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
vespalib::string getClassName() const;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
index 77fc97913a4..6f126c7a3eb 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
@@ -86,6 +86,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
case ParseItem::ITEM_SUFFIXTERM:
case ParseItem::ITEM_PURE_WEIGHTED_STRING:
case ParseItem::ITEM_PURE_WEIGHTED_LONG:
+ case ParseItem::ITEM_FUZZY:
{
vespalib::string index = queryRep.getIndexName();
if (index.empty()) {
@@ -116,6 +117,9 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
case ParseItem::ITEM_SUFFIXTERM:
sTerm = TermType::SUFFIXTERM;
break;
+ case ParseItem::ITEM_FUZZY:
+ sTerm = TermType::FUZZYTERM;
+ break;
default:
break;
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h
index 9f29c34aa05..abc48db9d87 100644
--- a/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h
+++ b/searchlib/src/vespa/searchlib/query/tree/customtypevisitor.h
@@ -52,6 +52,7 @@ public:
virtual void visit(typename NodeTypes::NearestNeighborTerm &) = 0;
virtual void visit(typename NodeTypes::TrueQueryNode &) = 0;
virtual void visit(typename NodeTypes::FalseQueryNode &) = 0;
+ virtual void visit(typename NodeTypes::FuzzyTerm &) = 0;
private:
// Route QueryVisit requests to the correct custom type.
@@ -81,6 +82,7 @@ private:
typedef typename NodeTypes::NearestNeighborTerm TNearestNeighborTerm;
typedef typename NodeTypes::TrueQueryNode TTrueQueryNode;
typedef typename NodeTypes::FalseQueryNode TFalseQueryNode;
+ typedef typename NodeTypes::FuzzyTerm TFuzzyTerm;
void visit(And &n) override { visit(static_cast<TAnd&>(n)); }
void visit(AndNot &n) override { visit(static_cast<TAndNot&>(n)); }
@@ -107,6 +109,7 @@ private:
void visit(NearestNeighborTerm &n) override { visit(static_cast<TNearestNeighborTerm&>(n)); }
void visit(TrueQueryNode &n) override { visit(static_cast<TTrueQueryNode&>(n)); }
void visit(FalseQueryNode &n) override { visit(static_cast<TFalseQueryNode&>(n)); }
+ void visit(FuzzyTerm &n) override { visit(static_cast<TFuzzyTerm &>(n)); }
};
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
index 9631e2afded..ee3a944cce1 100644
--- a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
+++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
@@ -220,6 +220,12 @@ create_nearest_neighbor_term(vespalib::stringref query_tensor_name, vespalib::st
target_num_hits, allow_approximate, explore_additional_hits,
distance_threshold);
}
+template <class NodeTypes>
+typename NodeTypes::FuzzyTerm *
+createFuzzyTerm(vespalib::stringref term, vespalib::stringref view, int32_t id, Weight weight) {
+ return new typename NodeTypes::FuzzyTerm(term, view, id, weight);
+}
+
template <class NodeTypes>
class QueryBuilder : public QueryBuilderBase {
@@ -327,6 +333,10 @@ public:
adjustWeight(weight);
return addTerm(createRegExpTerm<NodeTypes>(term, view, id, weight));
}
+ typename NodeTypes::FuzzyTerm &addFuzzyTerm(stringref term, stringref view, int32_t id, Weight weight) {
+ adjustWeight(weight);
+ return addTerm(createFuzzyTerm<NodeTypes>(term, view, id, weight));
+ }
typename NodeTypes::NearestNeighborTerm &add_nearest_neighbor_term(stringref query_tensor_name, stringref field_name,
int32_t id, Weight weight, uint32_t target_num_hits,
bool allow_approximate, uint32_t explore_additional_hits,
diff --git a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
index 3fb72f93b23..ecaee350b21 100644
--- a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
+++ b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
@@ -193,6 +193,12 @@ private:
void visit(FalseQueryNode &) override {
_builder.add_false_node();
}
+
+ void visit(FuzzyTerm &node) override {
+ replicate(node, _builder.addFuzzyTerm(
+ node.getTerm(), node.getView(),
+ node.getId(), node.getWeight()));
+ }
};
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h
index 02887975085..90faa25bd99 100644
--- a/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h
+++ b/searchlib/src/vespa/searchlib/query/tree/queryvisitor.h
@@ -29,6 +29,7 @@ class SameElement;
class NearestNeighborTerm;
class TrueQueryNode;
class FalseQueryNode;
+class FuzzyTerm;
struct QueryVisitor {
virtual ~QueryVisitor() {}
@@ -58,6 +59,7 @@ struct QueryVisitor {
virtual void visit(NearestNeighborTerm &) = 0;
virtual void visit(TrueQueryNode &) = 0;
virtual void visit(FalseQueryNode &) = 0;
+ virtual void visit(FuzzyTerm &) = 0;
};
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp b/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp
index cad97279b4c..e3cad4ed33a 100644
--- a/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp
+++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.cpp
@@ -52,4 +52,6 @@ SimpleRegExpTerm::~SimpleRegExpTerm() = default;
SimpleNearestNeighborTerm::~SimpleNearestNeighborTerm() = default;
+SimpleFuzzyTerm::~SimpleFuzzyTerm() = default;
+
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.h b/searchlib/src/vespa/searchlib/query/tree/simplequery.h
index 5047e072cb7..00dad2597ce 100644
--- a/searchlib/src/vespa/searchlib/query/tree/simplequery.h
+++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.h
@@ -152,7 +152,13 @@ struct SimpleNearestNeighborTerm : NearestNeighborTerm {
{}
~SimpleNearestNeighborTerm() override;
};
-
+struct SimpleFuzzyTerm : FuzzyTerm {
+ SimpleFuzzyTerm(const Type &term, vespalib::stringref view,
+ int32_t id, Weight weight)
+ : FuzzyTerm(term, view, id, weight) {
+ }
+ ~SimpleFuzzyTerm() override;
+};
struct SimpleQueryNodeTypes {
using And = SimpleAnd;
@@ -180,6 +186,7 @@ struct SimpleQueryNodeTypes {
using PredicateQuery = SimplePredicateQuery;
using RegExpTerm = SimpleRegExpTerm;
using NearestNeighborTerm = SimpleNearestNeighborTerm;
+ using FuzzyTerm = SimpleFuzzyTerm;
};
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
index d45a72d316a..f36410d1845 100644
--- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
+++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
@@ -278,6 +278,10 @@ class QueryNodeConverter : public QueryVisitor {
createTerm(node, ParseItem::ITEM_REGEXP);
}
+ void visit(FuzzyTerm &node) override {
+ createTerm(node, ParseItem::ITEM_FUZZY);
+ }
+
void visit(NearestNeighborTerm &node) override {
createTermNode(node, ParseItem::ITEM_NEAREST_NEIGHBOR);
appendString(node.get_query_tensor_name());
diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
index 5a6f315205e..a5f3be3e618 100644
--- a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
+++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
@@ -197,6 +197,8 @@ private:
t = &builder.addPredicateQuery(queryStack.getPredicateQueryTerm(), view, id, weight);
} else if (type == ParseItem::ITEM_REGEXP) {
t = &builder.addRegExpTerm(term, view, id, weight);
+ } else if (type == ParseItem::ITEM_FUZZY) {
+ t = &builder.addFuzzyTerm(term, view, id, weight);
} else {
vespalib::Issue::report("query builder: Unable to create query tree from stack dump. node type = %d.", type);
}
diff --git a/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h
index fc3570f44d8..a6eae257afd 100644
--- a/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h
+++ b/searchlib/src/vespa/searchlib/query/tree/templatetermvisitor.h
@@ -32,6 +32,7 @@ class TemplateTermVisitor : public CustomTypeTermVisitor<NodeTypes> {
void visit(typename NodeTypes::PredicateQuery &n) override { myVisit(n); }
void visit(typename NodeTypes::RegExpTerm &n) override { myVisit(n); }
void visit(typename NodeTypes::NearestNeighborTerm &n) override { myVisit(n); }
+ void visit(typename NodeTypes::FuzzyTerm &n) override { myVisit(n); }
// Phrases are terms with children. This visitor will not visit
// the phrase's children, unless this member function is
diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp
index dcf0533ff7a..6e889e76f21 100644
--- a/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp
+++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.cpp
@@ -24,6 +24,7 @@ RegExpTerm::~RegExpTerm() = default;
WeightedSetTerm::~WeightedSetTerm() = default;
DotProduct::~DotProduct() = default;
WandTerm::~WandTerm() = default;
+FuzzyTerm::~FuzzyTerm() = default;
namespace {
diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.h b/searchlib/src/vespa/searchlib/query/tree/termnodes.h
index a728b674999..7aa867e25ed 100644
--- a/searchlib/src/vespa/searchlib/query/tree/termnodes.h
+++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.h
@@ -115,6 +115,18 @@ public:
virtual ~RegExpTerm() = 0;
};
+//-----------------------------------------------------------------------------
+
+class FuzzyTerm : public QueryNodeMixin<FuzzyTerm, StringBase>
+{
+public:
+ FuzzyTerm(const Type &term, vespalib::stringref view,
+ int32_t id, Weight weight)
+ : QueryNodeMixinType(term, view, id, weight)
+ {}
+ virtual ~FuzzyTerm() = 0;
+};
+
/**
* Term matching the K nearest neighbors in a multi-dimensional vector space.
*
diff --git a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
index 86cde64a197..30c7e1722fb 100644
--- a/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
+++ b/searchlib/src/vespa/searchlib/queryeval/create_blueprint_visitor_helper.h
@@ -73,6 +73,7 @@ public:
void visit(query::SuffixTerm &n) override = 0;
void visit(query::RegExpTerm &n) override = 0;
void visit(query::NearestNeighborTerm &n) override = 0;
+ void visit(query::FuzzyTerm &n) override = 0;
void visit(query::TrueQueryNode &) final override;
void visit(query::FalseQueryNode &) final override;
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp
index 519f6e81774..614f219cbcb 100644
--- a/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_searchable.cpp
@@ -6,6 +6,7 @@
#include "create_blueprint_visitor_helper.h"
#include <vespa/vespalib/objects/visit.h>
+using search::query::FuzzyTerm;
using search::query::LocationTerm;
using search::query::NearestNeighborTerm;
using search::query::Node;
@@ -66,6 +67,7 @@ public:
void visit(PredicateQuery &n) override { visitTerm(n); }
void visit(RegExpTerm &n) override { visitTerm(n); }
void visit(NearestNeighborTerm &n) override { visitTerm(n); }
+ void visit(FuzzyTerm &n) override { visitTerm(n); }
};
template <class Map>
diff --git a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp
index 08c0280ee68..63bf16e6016 100644
--- a/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/termasstring.cpp
@@ -17,6 +17,7 @@ using search::query::AndNot;
using search::query::DotProduct;
using search::query::Equiv;
using search::query::FalseQueryNode;
+using search::query::FuzzyTerm;
using search::query::LocationTerm;
using search::query::Near;
using search::query::NearestNeighborTerm;
@@ -105,6 +106,7 @@ struct TermAsStringVisitor : public QueryVisitor {
void visit(SubstringTerm &n) override {visitTerm(n); }
void visit(SuffixTerm &n) override {visitTerm(n); }
void visit(RegExpTerm &n) override {visitTerm(n); }
+ void visit(FuzzyTerm &n) override { visitTerm(n); }
void visit(PredicateQuery &) override {illegalVisit(); }
void visit(NearestNeighborTerm &) override { illegalVisit(); }
void visit(TrueQueryNode &) override { illegalVisit(); }
diff --git a/vespalib/CMakeLists.txt b/vespalib/CMakeLists.txt
index 29a0c1e7bb8..c3c7fee2cef 100644
--- a/vespalib/CMakeLists.txt
+++ b/vespalib/CMakeLists.txt
@@ -62,6 +62,7 @@ vespa_define_module(
src/tests/explore_modern_cpp
src/tests/false
src/tests/fiddle
+ src/tests/fuzzy
src/tests/gencnt
src/tests/guard
src/tests/host_name
@@ -169,6 +170,7 @@ vespa_define_module(
src/vespa/vespalib/data
src/vespa/vespalib/data/slime
src/vespa/vespalib/datastore
+ src/vespa/vespalib/fuzzy
src/vespa/vespalib/geo
src/vespa/vespalib/hwaccelrated
src/vespa/vespalib/io
diff --git a/vespalib/src/tests/fuzzy/.gitignore b/vespalib/src/tests/fuzzy/.gitignore
new file mode 100644
index 00000000000..cd5ca9b3b88
--- /dev/null
+++ b/vespalib/src/tests/fuzzy/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+fuzzy_test
+vespalib_fuzzy_test_app
diff --git a/vespalib/src/tests/fuzzy/CMakeLists.txt b/vespalib/src/tests/fuzzy/CMakeLists.txt
new file mode 100644
index 00000000000..f58602296a5
--- /dev/null
+++ b/vespalib/src/tests/fuzzy/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(vespalib_fuzzy_test_app TEST
+ SOURCES
+ fuzzy.cpp
+ DEPENDS
+ vespalib
+ )
+vespa_add_test(NAME vespalib_fuzzy_test_app COMMAND vespalib_fuzzy_test_app)
diff --git a/vespalib/src/tests/fuzzy/fuzzy.cpp b/vespalib/src/tests/fuzzy/fuzzy.cpp
new file mode 100644
index 00000000000..9ffb77b3742
--- /dev/null
+++ b/vespalib/src/tests/fuzzy/fuzzy.cpp
@@ -0,0 +1,33 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/fuzzy/fuzzy.h>
+
+using namespace vespalib;
+
+
+TEST("require that levenstein distance works") {
+ EXPECT_EQUAL(0u, Fuzzy::levenstein_distance("abc", "abc", 2).value());
+ EXPECT_EQUAL(0u, Fuzzy::levenstein_distance("abc", "ABC", 2).value());
+ EXPECT_EQUAL(1u, Fuzzy::levenstein_distance("abc", "abd", 2).value());
+ EXPECT_EQUAL(1u, Fuzzy::levenstein_distance("ABC", "abd", 2).value());
+ EXPECT_EQUAL(2u, Fuzzy::levenstein_distance("ABC", "add", 2).value());
+ EXPECT_FALSE(Fuzzy::levenstein_distance("ABC", "ddd", 2).has_value());
+}
+
+TEST("require that extracting of a prefix works") {
+ Fuzzy fuzzy(Fuzzy::folded_codepoints("prefix"), 2, 2);
+ EXPECT_EQUAL("pr", fuzzy.getPrefix());
+}
+
+TEST("require that empty prefix works") {
+ Fuzzy fuzzy(Fuzzy::folded_codepoints("prefix"), 0, 2);
+ EXPECT_EQUAL("", fuzzy.getPrefix());
+}
+
+TEST("require that longer prefix size works") {
+ Fuzzy fuzzy(Fuzzy::folded_codepoints("prefix"), 100, 2);
+ EXPECT_EQUAL("prefix", fuzzy.getPrefix());
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/vespalib/src/vespa/vespalib/CMakeLists.txt b/vespalib/src/vespa/vespalib/CMakeLists.txt
index dea0f509bad..1fffddf2f2f 100644
--- a/vespalib/src/vespa/vespalib/CMakeLists.txt
+++ b/vespalib/src/vespa/vespalib/CMakeLists.txt
@@ -7,6 +7,7 @@ vespa_add_library(vespalib
$<TARGET_OBJECTS:vespalib_vespalib_data>
$<TARGET_OBJECTS:vespalib_vespalib_data_slime>
$<TARGET_OBJECTS:vespalib_vespalib_datastore>
+ $<TARGET_OBJECTS:vespalib_vespalib_fuzzy>
$<TARGET_OBJECTS:vespalib_vespalib_geo>
$<TARGET_OBJECTS:vespalib_vespalib_hwaccelrated>
$<TARGET_OBJECTS:vespalib_vespalib_io>
diff --git a/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
new file mode 100644
index 00000000000..0c6c8f5c446
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/CMakeLists.txt
@@ -0,0 +1,7 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_library(vespalib_vespalib_fuzzy OBJECT
+ SOURCES
+ fuzzy.cpp
+ DEPENDS
+ )
+
diff --git a/vespalib/src/vespa/vespalib/fuzzy/fuzzy.cpp b/vespalib/src/vespa/vespalib/fuzzy/fuzzy.cpp
new file mode 100644
index 00000000000..55d11b74123
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/fuzzy.cpp
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Levenstein distance algorithm is based off Java implementation from apache commons-text library
+ */
+
+#include "fuzzy.h"
+
+#include <limits>
+
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/text/lowercase.h>
+
+vespalib::Fuzzy vespalib::Fuzzy::from_term(std::string_view term) {
+ return Fuzzy(folded_codepoints(term));
+}
+
+std::vector<uint32_t> vespalib::Fuzzy::folded_codepoints(const char* src, size_t srcSize) {
+ std::vector<uint32_t> result;
+ result.reserve(srcSize);
+ Utf8ReaderForZTS srcReader(src);
+ while (srcReader.hasMore()) {
+ result.emplace_back(LowerCase::convert(srcReader.getChar()));
+ }
+ return result;
+}
+
+std::vector<uint32_t> vespalib::Fuzzy::folded_codepoints(std::string_view src) {
+ return folded_codepoints(src.data(), src.size());
+}
+
+std::optional<uint32_t> vespalib::Fuzzy::levenstein_distance(std::string_view source, std::string_view target, uint32_t threshold) {
+ std::vector<uint32_t> sourceCodepoints = folded_codepoints(source.data(), source.size());
+ std::vector<uint32_t> targetCodepoints = folded_codepoints(target.data(), target.size());
+ return levenstein_distance(sourceCodepoints, targetCodepoints, threshold);
+}
+
+std::optional<uint32_t> vespalib::Fuzzy::levenstein_distance(const std::vector<uint32_t>& left, const std::vector<uint32_t>& right, uint32_t threshold) {
+ uint32_t n = left.size();
+ uint32_t m = right.size();
+
+ if (n > m) {
+ return levenstein_distance(right, left, threshold);
+ }
+
+ // if one string is empty, the edit distance is necessarily the length
+ // of the other
+ if (n == 0) {
+ return m <= threshold ? std::optional(m) : std::nullopt;
+ }
+ if (m == 0) {
+ return n <= threshold ? std::optional(n) : std::nullopt;
+ }
+
+
+ // the edit distance cannot be less than the length difference
+ if (m - n > threshold) {
+ return std::nullopt;
+ }
+
+ std::vector<uint32_t> p(n+1); // 'previous' cost array, horizontally
+ std::vector<uint32_t> d(n+1); // cost array, horizontally
+
+ const uint32_t boundary = std::min(n, threshold) + 1;
+
+ for (uint32_t i = 0; i < boundary; ++i) {
+ p[i] = i;
+ }
+ // these fills ensure that the value above the rightmost entry of our
+ // stripe will be ignored in following loop iterations
+ for (uint32_t i = boundary; i < p.size(); ++i) {
+ p[i] = std::numeric_limits<uint32_t>::max();
+ }
+ for (uint32_t i = 0; i < d.size(); ++i) {
+ d[i] = std::numeric_limits<uint32_t>::max();
+ }
+
+ // iterates through t
+ for (uint32_t j = 1; j <= m; ++j) {
+ uint32_t rightJ = right[j - 1]; // jth character of right
+ d[0] = j;
+
+ int32_t min = std::max(1, static_cast<int32_t>(j) - static_cast<int32_t>(threshold));
+
+ uint32_t max = j > std::numeric_limits<uint32_t>::max() - threshold ?
+ n : std::min(n, j + threshold);
+
+ // ignore entry left of leftmost
+ // printf("j = %d, min = %d, max = %d, n = %d\n", j, min, max, n);
+ if (min > 1) {
+ d[min - 1] = std::numeric_limits<uint32_t>::max();
+ }
+
+ uint32_t lowerBound = std::numeric_limits<uint32_t>::max();
+
+ for (uint32_t i = min; i <= max; ++i) {
+ if (left[i - 1] == rightJ) {
+ // diagonally left and up
+ d[i] = p[i - 1];
+ } else {
+ // 1 + minimum of cell to the left, to the top, diagonally
+ // left and up
+
+ d[i] = 1 + std::min(std::min(d[i - 1], p[i]), p[i - 1]);
+ }
+ lowerBound = std::min(lowerBound, d[i]);
+ }
+ if (lowerBound > threshold) {
+ return std::nullopt;
+ }
+ std::swap(p, d);
+ }
+ if (p[n] <= threshold) {
+ return {p[n]};
+ }
+ return std::nullopt;
+}
+
+bool vespalib::Fuzzy::isMatch(std::string_view src) const {
+ std::vector<uint32_t> srcCodepoints = folded_codepoints(src);
+ return levenstein_distance(_folded_term_codepoints, srcCodepoints, _edit_distance).has_value();
+}
+
+vespalib::string vespalib::Fuzzy::getPrefix() const {
+ vespalib::string prefix;
+ Utf8Writer writer(prefix);
+ for (uint8_t i=0; i <_prefix_size && i <_folded_term_codepoints.size(); ++i) {
+ writer.putChar(_folded_term_codepoints.at(i));
+ }
+ return prefix;
+}
diff --git a/vespalib/src/vespa/vespalib/fuzzy/fuzzy.h b/vespalib/src/vespa/vespalib/fuzzy/fuzzy.h
new file mode 100644
index 00000000000..775c25445e3
--- /dev/null
+++ b/vespalib/src/vespa/vespalib/fuzzy/fuzzy.h
@@ -0,0 +1,60 @@
+#pragma once
+
+#include <string_view>
+#include <utility>
+#include <vector>
+#include <optional>
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace vespalib {
+
+class Fuzzy {
+private:
+ static constexpr uint8_t DefaultPrefixSize = 0u;
+ static constexpr uint8_t DefaultEditDistance = 2u;
+
+ std::vector<uint32_t> _folded_term_codepoints;
+
+ uint8_t _prefix_size; // prefix of a term that is considered frozen, i.e non-fuzzy
+ uint8_t _edit_distance; // max edit distance
+
+
+public:
+ Fuzzy():
+ _folded_term_codepoints(),
+ _prefix_size(DefaultPrefixSize),
+ _edit_distance(DefaultEditDistance)
+ {}
+
+ Fuzzy(std::vector<uint32_t> codepoints):
+ _folded_term_codepoints(std::move(codepoints)),
+ _prefix_size(DefaultPrefixSize),
+ _edit_distance(DefaultEditDistance)
+ {}
+
+ Fuzzy(std::vector<uint32_t> codepoints, uint8_t prefix_size, uint8_t edit_distance):
+ _folded_term_codepoints(std::move(codepoints)),
+ _prefix_size(prefix_size),
+ _edit_distance(edit_distance)
+ {}
+
+ [[nodiscard]] bool isMatch(std::string_view src) const;
+
+ [[nodiscard]] vespalib::string getPrefix() const;
+
+ ///
+
+ static Fuzzy from_term(std::string_view term);
+
+ static std::optional<uint32_t> levenstein_distance(const std::vector<uint32_t>& source, const std::vector<uint32_t>& target, uint32_t threshold);
+
+ static std::optional<uint32_t> levenstein_distance(std::string_view source, std::string_view target, uint32_t threshold);
+
+ static std::vector<uint32_t> folded_codepoints(const char* src, size_t srcSize);
+
+ static std::vector<uint32_t> folded_codepoints(std::string_view src);
+
+};
+
+}