diff options
author | Alexey Chernyshev <aleksei@spotify.com> | 2022-04-22 15:06:31 +0200 |
---|---|---|
committer | Alexey Chernyshev <aleksei@spotify.com> | 2022-04-22 15:08:00 +0200 |
commit | 51938ec3ce101027d14b732ef44b8ac69951a800 (patch) | |
tree | d16df769c9da0c6a6140b7e1211b8fb552719bec | |
parent | 5e77ba78f83c8f9132f604614b5d7b1714f867a7 (diff) |
Validating fuzzy query
5 files changed, 241 insertions, 4 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 4dcc56ed2d8..3ad0f94511d 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -8268,6 +8268,18 @@ "public static final com.yahoo.processing.request.CompoundName dryRunKey" ] }, + "com.yahoo.search.searchers.ValidateFuzzySearcher": { + "superClass": "com.yahoo.search.Searcher", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public void <init>(com.yahoo.vespa.config.search.AttributesConfig)", + "public com.yahoo.search.Result search(com.yahoo.search.Query, com.yahoo.search.searchchain.Execution)" + ], + "fields": [] + }, "com.yahoo.search.searchers.ValidateMatchPhaseSearcher": { "superClass": "com.yahoo.search.Searcher", "interfaces": [], diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java index b26205b74e9..ea2a7752809 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java @@ -28,14 +28,10 @@ public class FuzzyItem extends TermItem { } public void setMaxEditDistance(int maxEditDistance) { - if (maxEditDistance < 0) - throw new IllegalArgumentException("Can not use negative maxEditDistance " + maxEditDistance); this.maxEditDistance = maxEditDistance; } public void setPrefixLength(int prefixLength) { - if (prefixLength < 0) - throw new IllegalArgumentException("Can not use negative prefixLength " + prefixLength); this.prefixLength = prefixLength; } diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java index b8d6a050691..a12456f5354 100644 --- a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java @@ -44,6 +44,7 @@ public class LocalProviderSpec { com.yahoo.prelude.searcher.ValidatePredicateSearcher.class, com.yahoo.search.searchers.ValidateNearestNeighborSearcher.class, com.yahoo.search.searchers.ValidateMatchPhaseSearcher.class, + com.yahoo.search.searchers.ValidateFuzzySearcher.class, com.yahoo.search.yql.FieldFiller.class, com.yahoo.search.searchers.InputCheckingSearcher.class, com.yahoo.search.searchers.ContainerLatencySearcher.class); diff --git a/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java new file mode 100644 index 00000000000..249a6342da6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java @@ -0,0 +1,95 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import com.yahoo.prelude.query.FuzzyItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.search.query.ranking.RankProperties; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.yolean.chain.Before; + +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +/** + * Validates any FuzzyItem query items. + * + * @author alexeyche + */ +@Before(GroupingExecutor.COMPONENT_NAME) // Must happen before query.prepare() +public class ValidateFuzzySearcher extends Searcher { + + private final Set<String> validAttributes = new HashSet<>(); + + public ValidateFuzzySearcher(AttributesConfig attributesConfig) { + for (AttributesConfig.Attribute a : attributesConfig.attribute()) { + if (a.datatype() == AttributesConfig.Attribute.Datatype.STRING) { + validAttributes.add(a.name()); + } + } + } + + @Override + public Result search(Query query, Execution execution) { + Optional<ErrorMessage> e = validate(query); + return e.isEmpty() ? execution.search(query) : new Result(query, e.get()); + } + + private Optional<ErrorMessage> validate(Query query) { + FuzzyVisitor visitor = new FuzzyVisitor(query.getRanking().getProperties(), validAttributes, query); + ToolBox.visit(visitor, query.getModel().getQueryTree().getRoot()); + return visitor.errorMessage; + } + + private static class FuzzyVisitor extends ToolBox.QueryVisitor { + + public Optional<ErrorMessage> errorMessage = Optional.empty(); + + private final Set<String> validAttributes; + private final Query query; + + public FuzzyVisitor(RankProperties rankProperties, Set<String> validAttributes, Query query) { + this.validAttributes = validAttributes; + this.query = query; + } + + @Override + public boolean visit(Item item) { + if (item instanceof FuzzyItem) { + String error = validate((FuzzyItem)item); + if (error != null) + errorMessage = Optional.of(ErrorMessage.createIllegalQuery(error)); + } + return true; + } + + /** Returns an error message if this is invalid, or null if it is valid */ + private String validate(FuzzyItem item) { + if (!validAttributes.contains(item.getIndexName())) { + return item + " field is not a string attribute"; + } + if (item.getPrefixLength() < 0) { + return item + " has invalid prefixLength " + item.getPrefixLength() + ": Must be >= 0"; + } + if (item.getMaxEditDistance() < 0) { + return item + " has invalid maxEditDistance " + item.getMaxEditDistance() + ": Must be >= 0"; + } + if (item.stringValue().isEmpty()) { + return item + " fuzzy query must be non-empty"; + } + return null; + } + + @Override + public void onExit() {} + + } + +} diff --git a/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java new file mode 100644 index 00000000000..587b40dfd03 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java @@ -0,0 +1,133 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import com.yahoo.config.subscription.ConfigGetter; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.IndexModel; +import com.yahoo.prelude.SearchDefinition; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.yql.YqlParser; +import com.yahoo.vespa.config.search.AttributesConfig.Attribute; +import com.yahoo.vespa.config.search.AttributesConfig; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +/** + * @author alexeyche + */ +public class ValidateFuzzySearcherTestCase { + ValidateFuzzySearcher searcher; + + List<String> attributes; + + public ValidateFuzzySearcherTestCase() { + int i = 0; + attributes = new ArrayList<>(); + StringBuilder attributeConfig = new StringBuilder(); + for (Attribute.Datatype.Enum attr: Attribute.Datatype.Enum.values()) { + for (Attribute.Collectiontype.Enum ctype: Attribute.Collectiontype.Enum.values()) { + String attributeName = attr.name().toLowerCase() + "_" + ctype.name().toLowerCase(); + + attributeConfig.append("attribute[" + i + "].name "); + attributeConfig.append(attributeName); + attributeConfig.append("\n"); + + attributeConfig.append("attribute[" + i + "].datatype "); + attributeConfig.append(attr.name()); + attributeConfig.append("\n"); + + attributeConfig.append("attribute[" + i + "].collectiontype "); + attributeConfig.append(ctype.name()); + attributeConfig.append("\n"); + + i += 1; + attributes.add(attributeName); + } + } + + searcher = new ValidateFuzzySearcher(ConfigGetter.getConfig( + AttributesConfig.class, + "raw: " + + "attribute[" + attributes.size() + "]\n" + + attributeConfig)); + } + + private String makeQuery(String attribute, String query, int maxEditDistance, int prefixLength) { + return "select * from sources * where " + attribute + + " contains ({maxEditDistance:" + maxEditDistance + ", prefixLength:" + prefixLength +"}" + + "fuzzy(\"" + query + "\"))"; + } + + private String makeQuery(String attribute, String query) { + return makeQuery(attribute, query, 2, 0); + } + + + @Test + public void testQueriesToAllAttributes() { + final Set<String> validAttributes = Set.of("string_single", "string_array", "string_weightedset"); + + for (String attribute: attributes) { + String q = makeQuery(attribute, "fuzzy"); + Result r = doSearch(searcher, q); + if (validAttributes.contains(attribute)) { + assertNull(r.hits().getError()); + } else { + assertErrMsg("FUZZY(fuzzy,2,0) " + attribute + ":fuzzy field is not a string attribute", r); + } + } + } + + @Test + public void testInvalidEmptyStringQuery() { + String q = makeQuery("string_single", ""); + Result r = doSearch(searcher, q); + assertErrMsg("FUZZY(,2,0) string_single: fuzzy query must be non-empty", r); + } + + @Test + public void testInvalidQueryWrongMaxEditDistance() { + String q = makeQuery("string_single", "fuzzy", -1, 0); + Result r = doSearch(searcher, q); + assertErrMsg("FUZZY(fuzzy,-1,0) string_single:fuzzy has invalid maxEditDistance -1: Must be >= 0", r); + } + + @Test + public void testInvalidQueryWrongPrefixLength() { + String q = makeQuery("string_single", "fuzzy", 2, -1); + Result r = doSearch(searcher, q); + assertErrMsg("FUZZY(fuzzy,2,-1) string_single:fuzzy has invalid prefixLength -1: Must be >= 0", r); + } + + @Test + public void testInvalidQueryWrongAttributeName() { + String q = makeQuery("wrong_name", "fuzzy"); + Result r = doSearch(searcher, q); + assertErrMsg("FUZZY(fuzzy,2,0) wrong_name:fuzzy field is not a string attribute", r); + } + + private static void assertErrMsg(String message, Result r) { + assertEquals(ErrorMessage.createIllegalQuery(message), r.hits().getError()); + } + + private static Result doSearch(ValidateFuzzySearcher searcher, String yqlQuery) { + QueryTree queryTree = new YqlParser(new ParserEnvironment()).parse(new Parsable().setQuery(yqlQuery)); + Query query = new Query(); + query.getModel().getQueryTree().setRoot(queryTree.getRoot()); + SearchDefinition searchDefinition = new SearchDefinition("document"); + IndexFacts indexFacts = new IndexFacts(new IndexModel(searchDefinition)); + return new Execution(searcher, Execution.Context.createContextStub(indexFacts)).search(query); + } +} |