diff options
Diffstat (limited to 'container-search')
10 files changed, 312 insertions, 69 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 4dcc56ed2d8..67bc553c478 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -6635,11 +6635,13 @@ ], "methods": [ "public void <init>()", - "public java.lang.Object get(com.yahoo.processing.request.CompoundName, java.util.Map, com.yahoo.processing.request.Properties)" + "public java.lang.Object get(com.yahoo.processing.request.CompoundName, java.util.Map, com.yahoo.processing.request.Properties)", + "public static void requireNotPresentIn(java.util.Map)" ], "fields": [ "public static final com.yahoo.processing.request.CompoundName MAX_OFFSET", "public static final com.yahoo.processing.request.CompoundName MAX_HITS", + "public static final com.yahoo.processing.request.CompoundName MAX_QUERY_ITEMS", "public static final com.yahoo.search.query.profile.types.QueryProfileType argumentType" ] }, @@ -8268,6 +8270,18 @@ "public static final com.yahoo.processing.request.CompoundName dryRunKey" ] }, + "com.yahoo.search.searchers.ValidateFuzzySearcher": { + "superClass": "com.yahoo.search.Searcher", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public void <init>(com.yahoo.vespa.config.search.AttributesConfig)", + "public com.yahoo.search.Result search(com.yahoo.search.Query, com.yahoo.search.searchchain.Execution)" + ], + "fields": [] + }, "com.yahoo.search.searchers.ValidateMatchPhaseSearcher": { "superClass": "com.yahoo.search.Searcher", "interfaces": [], diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java index b26205b74e9..ea2a7752809 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java @@ -28,14 +28,10 @@ public class FuzzyItem extends TermItem { } public void setMaxEditDistance(int maxEditDistance) { - if (maxEditDistance < 0) - throw new IllegalArgumentException("Can not use negative maxEditDistance " + maxEditDistance); this.maxEditDistance = maxEditDistance; } public void setPrefixLength(int prefixLength) { - if (prefixLength < 0) - throw new IllegalArgumentException("Can not use negative prefixLength " + prefixLength); this.prefixLength = prefixLength; } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java index a93dd1b9de4..916f23bd768 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java @@ -4,6 +4,7 @@ package com.yahoo.prelude.query; import com.yahoo.processing.request.CompoundName; import com.yahoo.search.Query; import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.properties.DefaultProperties; import java.util.HashSet; import java.util.ListIterator; @@ -20,8 +21,6 @@ public class QueryCanonicalizer { /** The name of the operation performed by this, for use in search chain ordering */ public static final String queryCanonicalization = "queryCanonicalization"; - private static final CompoundName MAX_QUERY_ITEMS = new CompoundName("maxQueryItems"); - /** * Validates this query and carries out possible operations on this query * which simplifies it without changing its semantics. @@ -29,8 +28,8 @@ public class QueryCanonicalizer { * @return null if the query is valid, an error message if it is invalid */ public static String canonicalize(Query query) { - Integer maxQueryItems = query.properties().getInteger(MAX_QUERY_ITEMS, Integer.MAX_VALUE); - return canonicalize(query.getModel().getQueryTree(), maxQueryItems); + return canonicalize(query.getModel().getQueryTree(), + query.properties().getInteger(DefaultProperties.MAX_QUERY_ITEMS)); } /** @@ -52,7 +51,8 @@ public class QueryCanonicalizer { CanonicalizationResult result = recursivelyCanonicalize(rootItemIterator.next(), rootItemIterator); if (query.isEmpty() && ! result.isError()) result = CanonicalizationResult.error("No query"); int itemCount = query.treeSize(); - if (itemCount > maxQueryItems) result = CanonicalizationResult.error(String.format("Query tree exceeds allowed item count. Configured limit: %d - Item count: %d", maxQueryItems, itemCount)); + if (itemCount > maxQueryItems) + result = CanonicalizationResult.error(String.format("Query tree exceeds allowed item count. Configured limit: %d - Item count: %d", maxQueryItems, itemCount)); return result.error().orElse(null); // preserve old API, unfortunately } diff --git a/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java index af6374ba245..54d8ac40556 100644 --- a/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java +++ b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java @@ -580,11 +580,7 @@ public class SearchHandler extends LoggingRequestHandler { } private Result validateQuery(Query query) { - if (query.getHttpRequest().getProperty(DefaultProperties.MAX_HITS.toString()) != null) - throw new RuntimeException(DefaultProperties.MAX_HITS + " must be specified in a query profile."); - - if (query.getHttpRequest().getProperty(DefaultProperties.MAX_OFFSET.toString()) != null) - throw new RuntimeException(DefaultProperties.MAX_OFFSET + " must be specified in a query profile."); + DefaultProperties.requireNotPresentIn(query.getHttpRequest().propertyMap()); int maxHits = query.properties().getInteger(DefaultProperties.MAX_HITS); int maxOffset = query.properties().getInteger(DefaultProperties.MAX_OFFSET); diff --git a/container-search/src/main/java/com/yahoo/search/query/QueryTree.java b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java index 3dac5648660..0655727b46b 100644 --- a/container-search/src/main/java/com/yahoo/search/query/QueryTree.java +++ b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java @@ -185,7 +185,7 @@ public class QueryTree extends CompositeItem { */ public int treeSize() { if (isEmpty()) return 0; - return(countItemsRecursively(getItemIterator().next())); + return countItemsRecursively(getItemIterator().next()); } private int countItemsRecursively(Item item) { diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java index b94ddde4733..221368afeb6 100644 --- a/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java +++ b/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java @@ -6,6 +6,7 @@ import com.yahoo.search.query.Properties; import com.yahoo.search.query.profile.types.FieldDescription; import com.yahoo.search.query.profile.types.QueryProfileType; +import java.util.List; import java.util.Map; /** @@ -17,26 +18,30 @@ public final class DefaultProperties extends Properties { public static final CompoundName MAX_OFFSET = new CompoundName("maxOffset"); public static final CompoundName MAX_HITS = new CompoundName("maxHits"); + public static final CompoundName MAX_QUERY_ITEMS = new CompoundName("maxQueryItems"); public static final QueryProfileType argumentType = new QueryProfileType("DefaultProperties"); + private static final List<CompoundName> properties = List.of(MAX_OFFSET, MAX_HITS, MAX_QUERY_ITEMS); + static { argumentType.setBuiltin(true); - - argumentType.addField(new FieldDescription(MAX_OFFSET.toString(), "integer")); - argumentType.addField(new FieldDescription(MAX_HITS.toString(), "integer")); - + properties.forEach(property -> argumentType.addField(new FieldDescription(property.toString(), "integer"))); argumentType.freeze(); } @Override public Object get(CompoundName name, Map<String, String> context, com.yahoo.processing.request.Properties substitution) { - if (MAX_OFFSET.equals(name)) { - return 1000; - } else if (MAX_HITS.equals(name)) { - return 400; - } else { - return super.get(name, context, substitution); + if (name.equals(MAX_OFFSET)) return 1000; + if (name.equals(MAX_HITS)) return 400; + if (name.equals(MAX_QUERY_ITEMS)) return 10000; + return super.get(name, context, substitution); + } + + public static void requireNotPresentIn(Map<String, String> map) { + for (var property : properties) { + if (map.containsKey(property.toString())) + throw new IllegalArgumentException(property + " must be specified in a query profile."); } } diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java index bb5792d81b7..c3905b8200b 100644 --- a/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java @@ -68,7 +68,7 @@ public class RewriterFeatures { oldRoot.equals(origQueryItem)) { PhraseItem phrase = convertAndToPhrase((AndItem)oldRoot); - if(!keepOriginalQuery) { + if (!keepOriginalQuery) { qTree.setRoot(phrase); } else { OrItem newRoot = new OrItem(); @@ -145,12 +145,12 @@ public class RewriterFeatures { } StringTokenizer rewrite_list = new StringTokenizer(rewrites, "\t"); - Item rI = null; + Item rI; // Convert matching string to query tree item Item matchingStrItem = convertStringToQTree(query, matchingStr); PhraseItem matchingStrPhraseItem = null; - if(matchingStrItem instanceof AndItem) { + if (matchingStrItem instanceof AndItem) { matchingStrPhraseItem = convertAndToPhrase(((AndItem)matchingStrItem)); } @@ -166,30 +166,32 @@ public class RewriterFeatures { // - matchingStr: (AND aa bb) // - for this case, should use getNonOverlappingMatches instead OrItem newRoot; - if(oldRoot instanceof OrItem) { - if(((OrItem)oldRoot).getItemIndex(matchingStrItem)==-1) { + if (oldRoot instanceof OrItem) { + if (((OrItem)oldRoot).getItemIndex(matchingStrItem)==-1) { RewriterUtils.log(logger, query, "Whole query matching is used, skipping rewrite"); return query; } newRoot = (OrItem)oldRoot; - } else if(oldRoot.equals(matchingStrItem) || oldRoot.equals(matchingStrPhraseItem)) { + } + else if(oldRoot.equals(matchingStrItem) || oldRoot.equals(matchingStrPhraseItem)) { newRoot = new OrItem(); newRoot.addItem(oldRoot); - } else { + } + else { RewriterUtils.log(logger, query, "Whole query matching is used, skipping rewrite"); return query; } int numRewrites = 0; - while(rewrite_list.hasMoreTokens() && - (maxNumRewrites==0 || numRewrites < maxNumRewrites)) { + while (rewrite_list.hasMoreTokens() && (maxNumRewrites == 0 || numRewrites < maxNumRewrites)) { rI = convertStringToQTree(query, rewrite_list.nextToken()); - if(addUnitToRewrites && rI instanceof AndItem) { + if (addUnitToRewrites && rI instanceof AndItem) { rI = convertAndToPhrase((AndItem)rI); } - if(newRoot.getItemIndex(rI)==-1) { + if(newRoot.getItemIndex(rI) == -1) { newRoot.addItem(rI); numRewrites++; - } else { + } + else { RewriterUtils.log(logger, query, "Rewrite already exist, skipping"); } } @@ -229,19 +231,19 @@ public class RewriterFeatures { Query query) throws RuntimeException { RewriterUtils.log(logger, query, "Retrieving longest non-overlapping full phrase matches"); - if(phraseMatcher==null) + if (phraseMatcher == null) return null; Item root = query.getModel().getQueryTree().getRoot(); List<PhraseMatcher.Phrase> matches = phraseMatcher.matchPhrases(root); - if (matches==null || matches.isEmpty()) + if (matches == null || matches.isEmpty()) return null; Set<PhraseMatcher.Phrase> resultMatches = new HashSet<>(); ListIterator<Phrase> matchesIter = matches.listIterator(); // Iterate through all matches - while(matchesIter.hasNext()) { + while (matchesIter.hasNext()) { PhraseMatcher.Phrase phrase = matchesIter.next(); RewriterUtils.log(logger, query, "Working on phrase: " + phrase); CompositeItem currOwner = phrase.getOwner(); @@ -250,11 +252,11 @@ public class RewriterFeatures { // If phrase is not an AND item, only keep those that are single word // in order to eliminate cases such as (new RANK york) from being treated // as match if only new york but not new or york is in the dictionary - if((currOwner!=null && + if((currOwner != null && ((phrase.isComplete() && currOwner instanceof AndItem) || - (phrase.getLength()==1 && currOwner instanceof OrItem) || - (phrase.getLength()==1 && currOwner instanceof RankItem && phrase.getStartIndex()==0))) || - (currOwner==null && phrase.getLength()==1)) { + (phrase.getLength() == 1 && currOwner instanceof OrItem) || + (phrase.getLength() == 1 && currOwner instanceof RankItem && phrase.getStartIndex() == 0))) || + (currOwner == null && phrase.getLength() == 1)) { resultMatches.add(phrase); RewriterUtils.log(logger, query, "Keeping phrase: " + phrase); } @@ -298,12 +300,12 @@ public class RewriterFeatures { Query query) throws RuntimeException { RewriterUtils.log(logger, query, "Retrieving longest non-overlapping partial phrase matches"); - if(phraseMatcher==null) + if (phraseMatcher == null) return null; Item root = query.getModel().getQueryTree().getRoot(); List<PhraseMatcher.Phrase> matches = phraseMatcher.matchPhrases(root); - if (matches==null || matches.isEmpty()) + if (matches == null || matches.isEmpty()) return null; Set<PhraseMatcher.Phrase> resultMatches = new HashSet<>(); @@ -312,14 +314,14 @@ public class RewriterFeatures { ListIterator<PhraseMatcher.Phrase> matchesIter = matches.listIterator(); // Iterate through all matches - while(matchesIter.hasNext()) { + while (matchesIter.hasNext()) { PhraseMatcher.Phrase phrase = matchesIter.next(); RewriterUtils.log(logger, query, "Working on phrase: " + phrase); CompositeItem currOwner = phrase.getOwner(); // Check if previous is AND item and this phrase is in a different item // If so, work on the previous set to eliminate overlapping matches - if(!phrasesInSubTree.isEmpty() && currOwner!=null && + if (!phrasesInSubTree.isEmpty() && currOwner!=null && prevOwner!=null && !currOwner.equals(prevOwner)) { RewriterUtils.log(logger, query, "Previous phrase is in different AND item"); List<PhraseMatcher.Phrase> subTreeMatches @@ -333,13 +335,13 @@ public class RewriterFeatures { } // Check if this is an AND item - if(currOwner!=null && currOwner instanceof AndItem) { + if (currOwner instanceof AndItem) { phrasesInSubTree.add(phrase); - // If phrase is not an AND item, only keep those that are single word - // in order to eliminate cases such as (new RANK york) from being treated - // as match if only new york but not new or york is in the dictionary - } else if (phrase.getLength()==1 && - !(currOwner!=null && currOwner instanceof RankItem && phrase.getStartIndex()!=0)) { + // If phrase is not an AND item, only keep those that are single word + // in order to eliminate cases such as (new RANK york) from being treated + // as match if only new york but not new or york is in the dictionary + } + else if (phrase.getLength() == 1 && !(currOwner instanceof RankItem && phrase.getStartIndex() != 0)) { resultMatches.add(phrase); } @@ -476,7 +478,7 @@ public class RewriterFeatures { boolean removeOriginal, boolean addUnitToRewrites) throws RuntimeException { - if(matches==null) { + if(matches == null) { RewriterUtils.log(logger, query, "No expansions to be added"); return query; } @@ -494,7 +496,7 @@ public class RewriterFeatures { // Retrieve expansion phrases String expansionStr = match.getData(); - if(expansionStr.equalsIgnoreCase("n/a") && expandIndex==null) { + if (expansionStr.equalsIgnoreCase("n/a") && expandIndex == null) { continue; } StringTokenizer expansions = new StringTokenizer(expansionStr,"\t"); @@ -509,17 +511,17 @@ public class RewriterFeatures { (maxNumRewrites==0 || numRewrites < maxNumRewrites)) { String expansion = expansions.nextToken(); RewriterUtils.log(logger, query, "Working on expansion: " + expansion); - if(expansion.equalsIgnoreCase("n/a")) { + if (expansion.equalsIgnoreCase("n/a")) { expansion = matchStr; } // (AND expansion) or "expansion" Item expansionItem = convertStringToQTree(query, expansion); - if(addUnitToRewrites && expansionItem instanceof AndItem) { + if (addUnitToRewrites && expansionItem instanceof AndItem) { expansionItem = convertAndToPhrase((AndItem)expansionItem); } expansionGrp.addItem(expansionItem); - if(expandIndex!=null) { + if (expandIndex!=null) { // indexName:expansion WordItem expansionIndexItem = new WordItem(expansion, expandIndex); expansionGrp.addItem(expansionIndexItem); @@ -528,19 +530,19 @@ public class RewriterFeatures { RewriterUtils.log(logger, query, "Adding expansion: " + expansion); } - if(!removeOriginal) { + if (!removeOriginal) { //(AND original) Item matchItem = convertStringToQTree(query, matchStr); - if(expansionGrp.getItemIndex(matchItem)==-1) { + if (expansionGrp.getItemIndex(matchItem)==-1) { expansionGrp.addItem(matchItem); } } parent = match.getOwner(); int matchIndex = match.getStartIndex(); - if(parent!=null) { + if (parent!=null) { // Remove matching phrase from original query - for(int i=0; i<match.getLength(); i++) { + for (int i=0; i<match.getLength(); i++) { parent.removeItem(matchIndex); } // Adding back expansions @@ -554,11 +556,11 @@ public class RewriterFeatures { } // Not root single item - if(parent!=null) { + if (parent != null) { // Cleaning up the query after rewrite to remove redundant tags // e.g. (AND (OR (AND a b) c)) => (OR (AND a b) c) String cleanupError = QueryCanonicalizer.canonicalize(qTree); - if(cleanupError!=null) { + if (cleanupError!=null) { RewriterUtils.error(logger, query, "Error canonicalizing query tree"); throw new RuntimeException("Error canonicalizing query tree"); } @@ -595,7 +597,7 @@ public class RewriterFeatures { */ static Item convertStringToQTree(Query query, String stringToParse) { RewriterUtils.log(logger, query, "Converting string [" + stringToParse + "] to query tree"); - if(stringToParse==null) { + if (stringToParse == null) { return new NullItem(); } Model model = query.getModel(); @@ -621,7 +623,7 @@ public class RewriterFeatures { Iterator<Item> subItems = andItem.getItemIterator(); while(subItems.hasNext()) { Item curr = (subItems.next()); - if(curr instanceof IntItem) { + if (curr instanceof IntItem) { WordItem numItem = new WordItem(((IntItem)curr).stringValue()); result.addItem(numItem); } else { @@ -639,7 +641,7 @@ public class RewriterFeatures { */ private static class PhraseLength implements Comparator<PhraseMatcher.Phrase> { public int compare(PhraseMatcher.Phrase phrase1, PhraseMatcher.Phrase phrase2) { - if((phrase2.getLength()>phrase1.getLength()) || + if ((phrase2.getLength()>phrase1.getLength()) || (phrase2.getLength()==phrase1.getLength() && phrase2.getStartIndex()<=phrase1.getStartIndex())) { return 1; @@ -648,4 +650,5 @@ public class RewriterFeatures { } } } + } diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java index b8d6a050691..a12456f5354 100644 --- a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java @@ -44,6 +44,7 @@ public class LocalProviderSpec { com.yahoo.prelude.searcher.ValidatePredicateSearcher.class, com.yahoo.search.searchers.ValidateNearestNeighborSearcher.class, com.yahoo.search.searchers.ValidateMatchPhaseSearcher.class, + com.yahoo.search.searchers.ValidateFuzzySearcher.class, com.yahoo.search.yql.FieldFiller.class, com.yahoo.search.searchers.InputCheckingSearcher.class, com.yahoo.search.searchers.ContainerLatencySearcher.class); diff --git a/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java new file mode 100644 index 00000000000..249a6342da6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java @@ -0,0 +1,95 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import com.yahoo.prelude.query.FuzzyItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.search.query.ranking.RankProperties; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.yolean.chain.Before; + +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +/** + * Validates any FuzzyItem query items. + * + * @author alexeyche + */ +@Before(GroupingExecutor.COMPONENT_NAME) // Must happen before query.prepare() +public class ValidateFuzzySearcher extends Searcher { + + private final Set<String> validAttributes = new HashSet<>(); + + public ValidateFuzzySearcher(AttributesConfig attributesConfig) { + for (AttributesConfig.Attribute a : attributesConfig.attribute()) { + if (a.datatype() == AttributesConfig.Attribute.Datatype.STRING) { + validAttributes.add(a.name()); + } + } + } + + @Override + public Result search(Query query, Execution execution) { + Optional<ErrorMessage> e = validate(query); + return e.isEmpty() ? execution.search(query) : new Result(query, e.get()); + } + + private Optional<ErrorMessage> validate(Query query) { + FuzzyVisitor visitor = new FuzzyVisitor(query.getRanking().getProperties(), validAttributes, query); + ToolBox.visit(visitor, query.getModel().getQueryTree().getRoot()); + return visitor.errorMessage; + } + + private static class FuzzyVisitor extends ToolBox.QueryVisitor { + + public Optional<ErrorMessage> errorMessage = Optional.empty(); + + private final Set<String> validAttributes; + private final Query query; + + public FuzzyVisitor(RankProperties rankProperties, Set<String> validAttributes, Query query) { + this.validAttributes = validAttributes; + this.query = query; + } + + @Override + public boolean visit(Item item) { + if (item instanceof FuzzyItem) { + String error = validate((FuzzyItem)item); + if (error != null) + errorMessage = Optional.of(ErrorMessage.createIllegalQuery(error)); + } + return true; + } + + /** Returns an error message if this is invalid, or null if it is valid */ + private String validate(FuzzyItem item) { + if (!validAttributes.contains(item.getIndexName())) { + return item + " field is not a string attribute"; + } + if (item.getPrefixLength() < 0) { + return item + " has invalid prefixLength " + item.getPrefixLength() + ": Must be >= 0"; + } + if (item.getMaxEditDistance() < 0) { + return item + " has invalid maxEditDistance " + item.getMaxEditDistance() + ": Must be >= 0"; + } + if (item.stringValue().isEmpty()) { + return item + " fuzzy query must be non-empty"; + } + return null; + } + + @Override + public void onExit() {} + + } + +} diff --git a/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java new file mode 100644 index 00000000000..587b40dfd03 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/searchers/ValidateFuzzySearcherTestCase.java @@ -0,0 +1,133 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import com.yahoo.config.subscription.ConfigGetter; +import com.yahoo.prelude.IndexFacts; +import com.yahoo.prelude.IndexModel; +import com.yahoo.prelude.SearchDefinition; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.parser.Parsable; +import com.yahoo.search.query.parser.ParserEnvironment; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.yql.YqlParser; +import com.yahoo.vespa.config.search.AttributesConfig.Attribute; +import com.yahoo.vespa.config.search.AttributesConfig; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +/** + * @author alexeyche + */ +public class ValidateFuzzySearcherTestCase { + ValidateFuzzySearcher searcher; + + List<String> attributes; + + public ValidateFuzzySearcherTestCase() { + int i = 0; + attributes = new ArrayList<>(); + StringBuilder attributeConfig = new StringBuilder(); + for (Attribute.Datatype.Enum attr: Attribute.Datatype.Enum.values()) { + for (Attribute.Collectiontype.Enum ctype: Attribute.Collectiontype.Enum.values()) { + String attributeName = attr.name().toLowerCase() + "_" + ctype.name().toLowerCase(); + + attributeConfig.append("attribute[" + i + "].name "); + attributeConfig.append(attributeName); + attributeConfig.append("\n"); + + attributeConfig.append("attribute[" + i + "].datatype "); + attributeConfig.append(attr.name()); + attributeConfig.append("\n"); + + attributeConfig.append("attribute[" + i + "].collectiontype "); + attributeConfig.append(ctype.name()); + attributeConfig.append("\n"); + + i += 1; + attributes.add(attributeName); + } + } + + searcher = new ValidateFuzzySearcher(ConfigGetter.getConfig( + AttributesConfig.class, + "raw: " + + "attribute[" + attributes.size() + "]\n" + + attributeConfig)); + } + + private String makeQuery(String attribute, String query, int maxEditDistance, int prefixLength) { + return "select * from sources * where " + attribute + + " contains ({maxEditDistance:" + maxEditDistance + ", prefixLength:" + prefixLength +"}" + + "fuzzy(\"" + query + "\"))"; + } + + private String makeQuery(String attribute, String query) { + return makeQuery(attribute, query, 2, 0); + } + + + @Test + public void testQueriesToAllAttributes() { + final Set<String> validAttributes = Set.of("string_single", "string_array", "string_weightedset"); + + for (String attribute: attributes) { + String q = makeQuery(attribute, "fuzzy"); + Result r = doSearch(searcher, q); + if (validAttributes.contains(attribute)) { + assertNull(r.hits().getError()); + } else { + assertErrMsg("FUZZY(fuzzy,2,0) " + attribute + ":fuzzy field is not a string attribute", r); + } + } + } + + @Test + public void testInvalidEmptyStringQuery() { + String q = makeQuery("string_single", ""); + Result r = doSearch(searcher, q); + assertErrMsg("FUZZY(,2,0) string_single: fuzzy query must be non-empty", r); + } + + @Test + public void testInvalidQueryWrongMaxEditDistance() { + String q = makeQuery("string_single", "fuzzy", -1, 0); + Result r = doSearch(searcher, q); + assertErrMsg("FUZZY(fuzzy,-1,0) string_single:fuzzy has invalid maxEditDistance -1: Must be >= 0", r); + } + + @Test + public void testInvalidQueryWrongPrefixLength() { + String q = makeQuery("string_single", "fuzzy", 2, -1); + Result r = doSearch(searcher, q); + assertErrMsg("FUZZY(fuzzy,2,-1) string_single:fuzzy has invalid prefixLength -1: Must be >= 0", r); + } + + @Test + public void testInvalidQueryWrongAttributeName() { + String q = makeQuery("wrong_name", "fuzzy"); + Result r = doSearch(searcher, q); + assertErrMsg("FUZZY(fuzzy,2,0) wrong_name:fuzzy field is not a string attribute", r); + } + + private static void assertErrMsg(String message, Result r) { + assertEquals(ErrorMessage.createIllegalQuery(message), r.hits().getError()); + } + + private static Result doSearch(ValidateFuzzySearcher searcher, String yqlQuery) { + QueryTree queryTree = new YqlParser(new ParserEnvironment()).parse(new Parsable().setQuery(yqlQuery)); + Query query = new Query(); + query.getModel().getQueryTree().setRoot(queryTree.getRoot()); + SearchDefinition searchDefinition = new SearchDefinition("document"); + IndexFacts indexFacts = new IndexFacts(new IndexModel(searchDefinition)); + return new Execution(searcher, Execution.Context.createContextStub(indexFacts)).search(query); + } +} |