summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Chernyshev <aleksei@spotify.com>2022-04-22 15:06:31 +0200
committerAlexey Chernyshev <aleksei@spotify.com>2022-04-22 15:08:00 +0200
commit51938ec3ce101027d14b732ef44b8ac69951a800 (patch)
treed16df769c9da0c6a6140b7e1211b8fb552719bec
parent5e77ba78f83c8f9132f604614b5d7b1714f867a7 (diff)
Validating fuzzy query
-rw-r--r--container-search/abi-spec.json12
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java4
-rw-r--r--container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java1
-rw-r--r--container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java95
-rw-r--r--container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java133
5 files changed, 241 insertions, 4 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json
index 4dcc56ed2d8..3ad0f94511d 100644
--- a/container-search/abi-spec.json
+++ b/container-search/abi-spec.json
@@ -8268,6 +8268,18 @@
"public static final com.yahoo.processing.request.CompoundName dryRunKey"
]
},
+ "com.yahoo.search.searchers.ValidateFuzzySearcher": {
+ "superClass": "com.yahoo.search.Searcher",
+ "interfaces": [],
+ "attributes": [
+ "public"
+ ],
+ "methods": [
+ "public void <init>(com.yahoo.vespa.config.search.AttributesConfig)",
+ "public com.yahoo.search.Result search(com.yahoo.search.Query, com.yahoo.search.searchchain.Execution)"
+ ],
+ "fields": []
+ },
"com.yahoo.search.searchers.ValidateMatchPhaseSearcher": {
"superClass": "com.yahoo.search.Searcher",
"interfaces": [],
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
index b26205b74e9..ea2a7752809 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java
@@ -28,14 +28,10 @@ public class FuzzyItem extends TermItem {
}
public void setMaxEditDistance(int maxEditDistance) {
- if (maxEditDistance < 0)
- throw new IllegalArgumentException("Can not use negative maxEditDistance " + maxEditDistance);
this.maxEditDistance = maxEditDistance;
}
public void setPrefixLength(int prefixLength) {
- if (prefixLength < 0)
- throw new IllegalArgumentException("Can not use negative prefixLength " + prefixLength);
this.prefixLength = prefixLength;
}
diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java
index b8d6a050691..a12456f5354 100644
--- a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java
+++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java
@@ -44,6 +44,7 @@ public class LocalProviderSpec {
com.yahoo.prelude.searcher.ValidatePredicateSearcher.class,
com.yahoo.search.searchers.ValidateNearestNeighborSearcher.class,
com.yahoo.search.searchers.ValidateMatchPhaseSearcher.class,
+ com.yahoo.search.searchers.ValidateFuzzySearcher.class,
com.yahoo.search.yql.FieldFiller.class,
com.yahoo.search.searchers.InputCheckingSearcher.class,
com.yahoo.search.searchers.ContainerLatencySearcher.class);
diff --git a/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java
new file mode 100644
index 00000000000..249a6342da6
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java
@@ -0,0 +1,95 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.searchers;
+
+import com.yahoo.prelude.query.FuzzyItem;
+import com.yahoo.prelude.query.Item;
+import com.yahoo.prelude.query.ToolBox;
+import com.yahoo.search.Query;
+import com.yahoo.search.Result;
+import com.yahoo.search.Searcher;
+import com.yahoo.search.grouping.vespa.GroupingExecutor;
+import com.yahoo.search.query.ranking.RankProperties;
+import com.yahoo.search.result.ErrorMessage;
+import com.yahoo.search.searchchain.Execution;
+import com.yahoo.vespa.config.search.AttributesConfig;
+import com.yahoo.yolean.chain.Before;
+
+import java.util.HashSet;
+import java.util.Optional;
+import java.util.Set;
+
+/**
+ * Validates any FuzzyItem query items.
+ *
+ * @author alexeyche
+ */
+@Before(GroupingExecutor.COMPONENT_NAME) // Must happen before query.prepare()
+public class ValidateFuzzySearcher extends Searcher {
+
+ private final Set<String> validAttributes = new HashSet<>();
+
+ public ValidateFuzzySearcher(AttributesConfig attributesConfig) {
+ for (AttributesConfig.Attribute a : attributesConfig.attribute()) {
+ if (a.datatype() == AttributesConfig.Attribute.Datatype.STRING) {
+ validAttributes.add(a.name());
+ }
+ }
+ }
+
+ @Override
+ public Result search(Query query, Execution execution) {
+ Optional<ErrorMessage> e = validate(query);
+ return e.isEmpty() ? execution.search(query) : new Result(query, e.get());
+ }
+
+ private Optional<ErrorMessage> validate(Query query) {
+ FuzzyVisitor visitor = new FuzzyVisitor(query.getRanking().getProperties(), validAttributes, query);
+ ToolBox.visit(visitor, query.getModel().getQueryTree().getRoot());
+ return visitor.errorMessage;
+ }
+
+ private static class FuzzyVisitor extends ToolBox.QueryVisitor {
+
+ public Optional<ErrorMessage> errorMessage = Optional.empty();
+
+ private final Set<String> validAttributes;
+ private final Query query;
+
+ public FuzzyVisitor(RankProperties rankProperties, Set<String> validAttributes, Query query) {
+ this.validAttributes = validAttributes;
+ this.query = query;
+ }
+
+ @Override
+ public boolean visit(Item item) {
+ if (item instanceof FuzzyItem) {
+ String error = validate((FuzzyItem)item);
+ if (error != null)
+ errorMessage = Optional.of(ErrorMessage.createIllegalQuery(error));
+ }
+ return true;
+ }
+
+ /** Returns an error message if this is invalid, or null if it is valid */
+ private String validate(FuzzyItem item) {
+ if (!validAttributes.contains(item.getIndexName())) {
+ return item + " field is not a string attribute";
+ }
+ if (item.getPrefixLength() < 0) {
+ return item + " has invalid prefixLength " + item.getPrefixLength() + ": Must be >= 0";
+ }
+ if (item.getMaxEditDistance() < 0) {
+ return item + " has invalid maxEditDistance " + item.getMaxEditDistance() + ": Must be >= 0";
+ }
+ if (item.stringValue().isEmpty()) {
+ return item + " fuzzy query must be non-empty";
+ }
+ return null;
+ }
+
+ @Override
+ public void onExit() {}
+
+ }
+
+}
diff --git a/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java
new file mode 100644
index 00000000000..587b40dfd03
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java
@@ -0,0 +1,133 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.searchers;
+
+import com.yahoo.config.subscription.ConfigGetter;
+import com.yahoo.prelude.IndexFacts;
+import com.yahoo.prelude.IndexModel;
+import com.yahoo.prelude.SearchDefinition;
+import com.yahoo.search.Query;
+import com.yahoo.search.Result;
+import com.yahoo.search.query.QueryTree;
+import com.yahoo.search.query.parser.Parsable;
+import com.yahoo.search.query.parser.ParserEnvironment;
+import com.yahoo.search.result.ErrorMessage;
+import com.yahoo.search.searchchain.Execution;
+import com.yahoo.search.yql.YqlParser;
+import com.yahoo.vespa.config.search.AttributesConfig.Attribute;
+import com.yahoo.vespa.config.search.AttributesConfig;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+/**
+ * @author alexeyche
+ */
+public class ValidateFuzzySearcherTestCase {
+ ValidateFuzzySearcher searcher;
+
+ List<String> attributes;
+
+ public ValidateFuzzySearcherTestCase() {
+ int i = 0;
+ attributes = new ArrayList<>();
+ StringBuilder attributeConfig = new StringBuilder();
+ for (Attribute.Datatype.Enum attr: Attribute.Datatype.Enum.values()) {
+ for (Attribute.Collectiontype.Enum ctype: Attribute.Collectiontype.Enum.values()) {
+ String attributeName = attr.name().toLowerCase() + "_" + ctype.name().toLowerCase();
+
+ attributeConfig.append("attribute[" + i + "].name ");
+ attributeConfig.append(attributeName);
+ attributeConfig.append("\n");
+
+ attributeConfig.append("attribute[" + i + "].datatype ");
+ attributeConfig.append(attr.name());
+ attributeConfig.append("\n");
+
+ attributeConfig.append("attribute[" + i + "].collectiontype ");
+ attributeConfig.append(ctype.name());
+ attributeConfig.append("\n");
+
+ i += 1;
+ attributes.add(attributeName);
+ }
+ }
+
+ searcher = new ValidateFuzzySearcher(ConfigGetter.getConfig(
+ AttributesConfig.class,
+ "raw: " +
+ "attribute[" + attributes.size() + "]\n" +
+ attributeConfig));
+ }
+
+ private String makeQuery(String attribute, String query, int maxEditDistance, int prefixLength) {
+ return "select * from sources * where " + attribute +
+ " contains ({maxEditDistance:" + maxEditDistance + ", prefixLength:" + prefixLength +"}" +
+ "fuzzy(\"" + query + "\"))";
+ }
+
+ private String makeQuery(String attribute, String query) {
+ return makeQuery(attribute, query, 2, 0);
+ }
+
+
+ @Test
+ public void testQueriesToAllAttributes() {
+ final Set<String> validAttributes = Set.of("string_single", "string_array", "string_weightedset");
+
+ for (String attribute: attributes) {
+ String q = makeQuery(attribute, "fuzzy");
+ Result r = doSearch(searcher, q);
+ if (validAttributes.contains(attribute)) {
+ assertNull(r.hits().getError());
+ } else {
+ assertErrMsg("FUZZY(fuzzy,2,0) " + attribute + ":fuzzy field is not a string attribute", r);
+ }
+ }
+ }
+
+ @Test
+ public void testInvalidEmptyStringQuery() {
+ String q = makeQuery("string_single", "");
+ Result r = doSearch(searcher, q);
+ assertErrMsg("FUZZY(,2,0) string_single: fuzzy query must be non-empty", r);
+ }
+
+ @Test
+ public void testInvalidQueryWrongMaxEditDistance() {
+ String q = makeQuery("string_single", "fuzzy", -1, 0);
+ Result r = doSearch(searcher, q);
+ assertErrMsg("FUZZY(fuzzy,-1,0) string_single:fuzzy has invalid maxEditDistance -1: Must be >= 0", r);
+ }
+
+ @Test
+ public void testInvalidQueryWrongPrefixLength() {
+ String q = makeQuery("string_single", "fuzzy", 2, -1);
+ Result r = doSearch(searcher, q);
+ assertErrMsg("FUZZY(fuzzy,2,-1) string_single:fuzzy has invalid prefixLength -1: Must be >= 0", r);
+ }
+
+ @Test
+ public void testInvalidQueryWrongAttributeName() {
+ String q = makeQuery("wrong_name", "fuzzy");
+ Result r = doSearch(searcher, q);
+ assertErrMsg("FUZZY(fuzzy,2,0) wrong_name:fuzzy field is not a string attribute", r);
+ }
+
+ private static void assertErrMsg(String message, Result r) {
+ assertEquals(ErrorMessage.createIllegalQuery(message), r.hits().getError());
+ }
+
+ private static Result doSearch(ValidateFuzzySearcher searcher, String yqlQuery) {
+ QueryTree queryTree = new YqlParser(new ParserEnvironment()).parse(new Parsable().setQuery(yqlQuery));
+ Query query = new Query();
+ query.getModel().getQueryTree().setRoot(queryTree.getRoot());
+ SearchDefinition searchDefinition = new SearchDefinition("document");
+ IndexFacts indexFacts = new IndexFacts(new IndexModel(searchDefinition));
+ return new Execution(searcher, Execution.Context.createContextStub(indexFacts)).search(query);
+ }
+}