diff options
Diffstat (limited to 'container-search/src/main')
8 files changed, 164 insertions, 68 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java index b26205b74e9..ea2a7752809 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/FuzzyItem.java @@ -28,14 +28,10 @@ public class FuzzyItem extends TermItem { } public void setMaxEditDistance(int maxEditDistance) { - if (maxEditDistance < 0) - throw new IllegalArgumentException("Can not use negative maxEditDistance " + maxEditDistance); this.maxEditDistance = maxEditDistance; } public void setPrefixLength(int prefixLength) { - if (prefixLength < 0) - throw new IllegalArgumentException("Can not use negative prefixLength " + prefixLength); this.prefixLength = prefixLength; } diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java index a93dd1b9de4..916f23bd768 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java @@ -4,6 +4,7 @@ package com.yahoo.prelude.query; import com.yahoo.processing.request.CompoundName; import com.yahoo.search.Query; import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.properties.DefaultProperties; import java.util.HashSet; import java.util.ListIterator; @@ -20,8 +21,6 @@ public class QueryCanonicalizer { /** The name of the operation performed by this, for use in search chain ordering */ public static final String queryCanonicalization = "queryCanonicalization"; - private static final CompoundName MAX_QUERY_ITEMS = new CompoundName("maxQueryItems"); - /** * Validates this query and carries out possible operations on this query * which simplifies it without changing its semantics. @@ -29,8 +28,8 @@ public class QueryCanonicalizer { * @return null if the query is valid, an error message if it is invalid */ public static String canonicalize(Query query) { - Integer maxQueryItems = query.properties().getInteger(MAX_QUERY_ITEMS, Integer.MAX_VALUE); - return canonicalize(query.getModel().getQueryTree(), maxQueryItems); + return canonicalize(query.getModel().getQueryTree(), + query.properties().getInteger(DefaultProperties.MAX_QUERY_ITEMS)); } /** @@ -52,7 +51,8 @@ public class QueryCanonicalizer { CanonicalizationResult result = recursivelyCanonicalize(rootItemIterator.next(), rootItemIterator); if (query.isEmpty() && ! result.isError()) result = CanonicalizationResult.error("No query"); int itemCount = query.treeSize(); - if (itemCount > maxQueryItems) result = CanonicalizationResult.error(String.format("Query tree exceeds allowed item count. Configured limit: %d - Item count: %d", maxQueryItems, itemCount)); + if (itemCount > maxQueryItems) + result = CanonicalizationResult.error(String.format("Query tree exceeds allowed item count. Configured limit: %d - Item count: %d", maxQueryItems, itemCount)); return result.error().orElse(null); // preserve old API, unfortunately } diff --git a/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java index af6374ba245..54d8ac40556 100644 --- a/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java +++ b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java @@ -580,11 +580,7 @@ public class SearchHandler extends LoggingRequestHandler { } private Result validateQuery(Query query) { - if (query.getHttpRequest().getProperty(DefaultProperties.MAX_HITS.toString()) != null) - throw new RuntimeException(DefaultProperties.MAX_HITS + " must be specified in a query profile."); - - if (query.getHttpRequest().getProperty(DefaultProperties.MAX_OFFSET.toString()) != null) - throw new RuntimeException(DefaultProperties.MAX_OFFSET + " must be specified in a query profile."); + DefaultProperties.requireNotPresentIn(query.getHttpRequest().propertyMap()); int maxHits = query.properties().getInteger(DefaultProperties.MAX_HITS); int maxOffset = query.properties().getInteger(DefaultProperties.MAX_OFFSET); diff --git a/container-search/src/main/java/com/yahoo/search/query/QueryTree.java b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java index 3dac5648660..0655727b46b 100644 --- a/container-search/src/main/java/com/yahoo/search/query/QueryTree.java +++ b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java @@ -185,7 +185,7 @@ public class QueryTree extends CompositeItem { */ public int treeSize() { if (isEmpty()) return 0; - return(countItemsRecursively(getItemIterator().next())); + return countItemsRecursively(getItemIterator().next()); } private int countItemsRecursively(Item item) { diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java index b94ddde4733..221368afeb6 100644 --- a/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java +++ b/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java @@ -6,6 +6,7 @@ import com.yahoo.search.query.Properties; import com.yahoo.search.query.profile.types.FieldDescription; import com.yahoo.search.query.profile.types.QueryProfileType; +import java.util.List; import java.util.Map; /** @@ -17,26 +18,30 @@ public final class DefaultProperties extends Properties { public static final CompoundName MAX_OFFSET = new CompoundName("maxOffset"); public static final CompoundName MAX_HITS = new CompoundName("maxHits"); + public static final CompoundName MAX_QUERY_ITEMS = new CompoundName("maxQueryItems"); public static final QueryProfileType argumentType = new QueryProfileType("DefaultProperties"); + private static final List<CompoundName> properties = List.of(MAX_OFFSET, MAX_HITS, MAX_QUERY_ITEMS); + static { argumentType.setBuiltin(true); - - argumentType.addField(new FieldDescription(MAX_OFFSET.toString(), "integer")); - argumentType.addField(new FieldDescription(MAX_HITS.toString(), "integer")); - + properties.forEach(property -> argumentType.addField(new FieldDescription(property.toString(), "integer"))); argumentType.freeze(); } @Override public Object get(CompoundName name, Map<String, String> context, com.yahoo.processing.request.Properties substitution) { - if (MAX_OFFSET.equals(name)) { - return 1000; - } else if (MAX_HITS.equals(name)) { - return 400; - } else { - return super.get(name, context, substitution); + if (name.equals(MAX_OFFSET)) return 1000; + if (name.equals(MAX_HITS)) return 400; + if (name.equals(MAX_QUERY_ITEMS)) return 10000; + return super.get(name, context, substitution); + } + + public static void requireNotPresentIn(Map<String, String> map) { + for (var property : properties) { + if (map.containsKey(property.toString())) + throw new IllegalArgumentException(property + " must be specified in a query profile."); } } diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java index bb5792d81b7..c3905b8200b 100644 --- a/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java @@ -68,7 +68,7 @@ public class RewriterFeatures { oldRoot.equals(origQueryItem)) { PhraseItem phrase = convertAndToPhrase((AndItem)oldRoot); - if(!keepOriginalQuery) { + if (!keepOriginalQuery) { qTree.setRoot(phrase); } else { OrItem newRoot = new OrItem(); @@ -145,12 +145,12 @@ public class RewriterFeatures { } StringTokenizer rewrite_list = new StringTokenizer(rewrites, "\t"); - Item rI = null; + Item rI; // Convert matching string to query tree item Item matchingStrItem = convertStringToQTree(query, matchingStr); PhraseItem matchingStrPhraseItem = null; - if(matchingStrItem instanceof AndItem) { + if (matchingStrItem instanceof AndItem) { matchingStrPhraseItem = convertAndToPhrase(((AndItem)matchingStrItem)); } @@ -166,30 +166,32 @@ public class RewriterFeatures { // - matchingStr: (AND aa bb) // - for this case, should use getNonOverlappingMatches instead OrItem newRoot; - if(oldRoot instanceof OrItem) { - if(((OrItem)oldRoot).getItemIndex(matchingStrItem)==-1) { + if (oldRoot instanceof OrItem) { + if (((OrItem)oldRoot).getItemIndex(matchingStrItem)==-1) { RewriterUtils.log(logger, query, "Whole query matching is used, skipping rewrite"); return query; } newRoot = (OrItem)oldRoot; - } else if(oldRoot.equals(matchingStrItem) || oldRoot.equals(matchingStrPhraseItem)) { + } + else if(oldRoot.equals(matchingStrItem) || oldRoot.equals(matchingStrPhraseItem)) { newRoot = new OrItem(); newRoot.addItem(oldRoot); - } else { + } + else { RewriterUtils.log(logger, query, "Whole query matching is used, skipping rewrite"); return query; } int numRewrites = 0; - while(rewrite_list.hasMoreTokens() && - (maxNumRewrites==0 || numRewrites < maxNumRewrites)) { + while (rewrite_list.hasMoreTokens() && (maxNumRewrites == 0 || numRewrites < maxNumRewrites)) { rI = convertStringToQTree(query, rewrite_list.nextToken()); - if(addUnitToRewrites && rI instanceof AndItem) { + if (addUnitToRewrites && rI instanceof AndItem) { rI = convertAndToPhrase((AndItem)rI); } - if(newRoot.getItemIndex(rI)==-1) { + if(newRoot.getItemIndex(rI) == -1) { newRoot.addItem(rI); numRewrites++; - } else { + } + else { RewriterUtils.log(logger, query, "Rewrite already exist, skipping"); } } @@ -229,19 +231,19 @@ public class RewriterFeatures { Query query) throws RuntimeException { RewriterUtils.log(logger, query, "Retrieving longest non-overlapping full phrase matches"); - if(phraseMatcher==null) + if (phraseMatcher == null) return null; Item root = query.getModel().getQueryTree().getRoot(); List<PhraseMatcher.Phrase> matches = phraseMatcher.matchPhrases(root); - if (matches==null || matches.isEmpty()) + if (matches == null || matches.isEmpty()) return null; Set<PhraseMatcher.Phrase> resultMatches = new HashSet<>(); ListIterator<Phrase> matchesIter = matches.listIterator(); // Iterate through all matches - while(matchesIter.hasNext()) { + while (matchesIter.hasNext()) { PhraseMatcher.Phrase phrase = matchesIter.next(); RewriterUtils.log(logger, query, "Working on phrase: " + phrase); CompositeItem currOwner = phrase.getOwner(); @@ -250,11 +252,11 @@ public class RewriterFeatures { // If phrase is not an AND item, only keep those that are single word // in order to eliminate cases such as (new RANK york) from being treated // as match if only new york but not new or york is in the dictionary - if((currOwner!=null && + if((currOwner != null && ((phrase.isComplete() && currOwner instanceof AndItem) || - (phrase.getLength()==1 && currOwner instanceof OrItem) || - (phrase.getLength()==1 && currOwner instanceof RankItem && phrase.getStartIndex()==0))) || - (currOwner==null && phrase.getLength()==1)) { + (phrase.getLength() == 1 && currOwner instanceof OrItem) || + (phrase.getLength() == 1 && currOwner instanceof RankItem && phrase.getStartIndex() == 0))) || + (currOwner == null && phrase.getLength() == 1)) { resultMatches.add(phrase); RewriterUtils.log(logger, query, "Keeping phrase: " + phrase); } @@ -298,12 +300,12 @@ public class RewriterFeatures { Query query) throws RuntimeException { RewriterUtils.log(logger, query, "Retrieving longest non-overlapping partial phrase matches"); - if(phraseMatcher==null) + if (phraseMatcher == null) return null; Item root = query.getModel().getQueryTree().getRoot(); List<PhraseMatcher.Phrase> matches = phraseMatcher.matchPhrases(root); - if (matches==null || matches.isEmpty()) + if (matches == null || matches.isEmpty()) return null; Set<PhraseMatcher.Phrase> resultMatches = new HashSet<>(); @@ -312,14 +314,14 @@ public class RewriterFeatures { ListIterator<PhraseMatcher.Phrase> matchesIter = matches.listIterator(); // Iterate through all matches - while(matchesIter.hasNext()) { + while (matchesIter.hasNext()) { PhraseMatcher.Phrase phrase = matchesIter.next(); RewriterUtils.log(logger, query, "Working on phrase: " + phrase); CompositeItem currOwner = phrase.getOwner(); // Check if previous is AND item and this phrase is in a different item // If so, work on the previous set to eliminate overlapping matches - if(!phrasesInSubTree.isEmpty() && currOwner!=null && + if (!phrasesInSubTree.isEmpty() && currOwner!=null && prevOwner!=null && !currOwner.equals(prevOwner)) { RewriterUtils.log(logger, query, "Previous phrase is in different AND item"); List<PhraseMatcher.Phrase> subTreeMatches @@ -333,13 +335,13 @@ public class RewriterFeatures { } // Check if this is an AND item - if(currOwner!=null && currOwner instanceof AndItem) { + if (currOwner instanceof AndItem) { phrasesInSubTree.add(phrase); - // If phrase is not an AND item, only keep those that are single word - // in order to eliminate cases such as (new RANK york) from being treated - // as match if only new york but not new or york is in the dictionary - } else if (phrase.getLength()==1 && - !(currOwner!=null && currOwner instanceof RankItem && phrase.getStartIndex()!=0)) { + // If phrase is not an AND item, only keep those that are single word + // in order to eliminate cases such as (new RANK york) from being treated + // as match if only new york but not new or york is in the dictionary + } + else if (phrase.getLength() == 1 && !(currOwner instanceof RankItem && phrase.getStartIndex() != 0)) { resultMatches.add(phrase); } @@ -476,7 +478,7 @@ public class RewriterFeatures { boolean removeOriginal, boolean addUnitToRewrites) throws RuntimeException { - if(matches==null) { + if(matches == null) { RewriterUtils.log(logger, query, "No expansions to be added"); return query; } @@ -494,7 +496,7 @@ public class RewriterFeatures { // Retrieve expansion phrases String expansionStr = match.getData(); - if(expansionStr.equalsIgnoreCase("n/a") && expandIndex==null) { + if (expansionStr.equalsIgnoreCase("n/a") && expandIndex == null) { continue; } StringTokenizer expansions = new StringTokenizer(expansionStr,"\t"); @@ -509,17 +511,17 @@ public class RewriterFeatures { (maxNumRewrites==0 || numRewrites < maxNumRewrites)) { String expansion = expansions.nextToken(); RewriterUtils.log(logger, query, "Working on expansion: " + expansion); - if(expansion.equalsIgnoreCase("n/a")) { + if (expansion.equalsIgnoreCase("n/a")) { expansion = matchStr; } // (AND expansion) or "expansion" Item expansionItem = convertStringToQTree(query, expansion); - if(addUnitToRewrites && expansionItem instanceof AndItem) { + if (addUnitToRewrites && expansionItem instanceof AndItem) { expansionItem = convertAndToPhrase((AndItem)expansionItem); } expansionGrp.addItem(expansionItem); - if(expandIndex!=null) { + if (expandIndex!=null) { // indexName:expansion WordItem expansionIndexItem = new WordItem(expansion, expandIndex); expansionGrp.addItem(expansionIndexItem); @@ -528,19 +530,19 @@ public class RewriterFeatures { RewriterUtils.log(logger, query, "Adding expansion: " + expansion); } - if(!removeOriginal) { + if (!removeOriginal) { //(AND original) Item matchItem = convertStringToQTree(query, matchStr); - if(expansionGrp.getItemIndex(matchItem)==-1) { + if (expansionGrp.getItemIndex(matchItem)==-1) { expansionGrp.addItem(matchItem); } } parent = match.getOwner(); int matchIndex = match.getStartIndex(); - if(parent!=null) { + if (parent!=null) { // Remove matching phrase from original query - for(int i=0; i<match.getLength(); i++) { + for (int i=0; i<match.getLength(); i++) { parent.removeItem(matchIndex); } // Adding back expansions @@ -554,11 +556,11 @@ public class RewriterFeatures { } // Not root single item - if(parent!=null) { + if (parent != null) { // Cleaning up the query after rewrite to remove redundant tags // e.g. (AND (OR (AND a b) c)) => (OR (AND a b) c) String cleanupError = QueryCanonicalizer.canonicalize(qTree); - if(cleanupError!=null) { + if (cleanupError!=null) { RewriterUtils.error(logger, query, "Error canonicalizing query tree"); throw new RuntimeException("Error canonicalizing query tree"); } @@ -595,7 +597,7 @@ public class RewriterFeatures { */ static Item convertStringToQTree(Query query, String stringToParse) { RewriterUtils.log(logger, query, "Converting string [" + stringToParse + "] to query tree"); - if(stringToParse==null) { + if (stringToParse == null) { return new NullItem(); } Model model = query.getModel(); @@ -621,7 +623,7 @@ public class RewriterFeatures { Iterator<Item> subItems = andItem.getItemIterator(); while(subItems.hasNext()) { Item curr = (subItems.next()); - if(curr instanceof IntItem) { + if (curr instanceof IntItem) { WordItem numItem = new WordItem(((IntItem)curr).stringValue()); result.addItem(numItem); } else { @@ -639,7 +641,7 @@ public class RewriterFeatures { */ private static class PhraseLength implements Comparator<PhraseMatcher.Phrase> { public int compare(PhraseMatcher.Phrase phrase1, PhraseMatcher.Phrase phrase2) { - if((phrase2.getLength()>phrase1.getLength()) || + if ((phrase2.getLength()>phrase1.getLength()) || (phrase2.getLength()==phrase1.getLength() && phrase2.getStartIndex()<=phrase1.getStartIndex())) { return 1; @@ -648,4 +650,5 @@ public class RewriterFeatures { } } } + } diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java index b8d6a050691..a12456f5354 100644 --- a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java @@ -44,6 +44,7 @@ public class LocalProviderSpec { com.yahoo.prelude.searcher.ValidatePredicateSearcher.class, com.yahoo.search.searchers.ValidateNearestNeighborSearcher.class, com.yahoo.search.searchers.ValidateMatchPhaseSearcher.class, + com.yahoo.search.searchers.ValidateFuzzySearcher.class, com.yahoo.search.yql.FieldFiller.class, com.yahoo.search.searchers.InputCheckingSearcher.class, com.yahoo.search.searchers.ContainerLatencySearcher.class); diff --git a/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java b/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java new file mode 100644 index 00000000000..249a6342da6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/searchers/ValidateFuzzySearcher.java @@ -0,0 +1,95 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.search.searchers; + +import com.yahoo.prelude.query.FuzzyItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.ToolBox; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.grouping.vespa.GroupingExecutor; +import com.yahoo.search.query.ranking.RankProperties; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.yolean.chain.Before; + +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +/** + * Validates any FuzzyItem query items. + * + * @author alexeyche + */ +@Before(GroupingExecutor.COMPONENT_NAME) // Must happen before query.prepare() +public class ValidateFuzzySearcher extends Searcher { + + private final Set<String> validAttributes = new HashSet<>(); + + public ValidateFuzzySearcher(AttributesConfig attributesConfig) { + for (AttributesConfig.Attribute a : attributesConfig.attribute()) { + if (a.datatype() == AttributesConfig.Attribute.Datatype.STRING) { + validAttributes.add(a.name()); + } + } + } + + @Override + public Result search(Query query, Execution execution) { + Optional<ErrorMessage> e = validate(query); + return e.isEmpty() ? execution.search(query) : new Result(query, e.get()); + } + + private Optional<ErrorMessage> validate(Query query) { + FuzzyVisitor visitor = new FuzzyVisitor(query.getRanking().getProperties(), validAttributes, query); + ToolBox.visit(visitor, query.getModel().getQueryTree().getRoot()); + return visitor.errorMessage; + } + + private static class FuzzyVisitor extends ToolBox.QueryVisitor { + + public Optional<ErrorMessage> errorMessage = Optional.empty(); + + private final Set<String> validAttributes; + private final Query query; + + public FuzzyVisitor(RankProperties rankProperties, Set<String> validAttributes, Query query) { + this.validAttributes = validAttributes; + this.query = query; + } + + @Override + public boolean visit(Item item) { + if (item instanceof FuzzyItem) { + String error = validate((FuzzyItem)item); + if (error != null) + errorMessage = Optional.of(ErrorMessage.createIllegalQuery(error)); + } + return true; + } + + /** Returns an error message if this is invalid, or null if it is valid */ + private String validate(FuzzyItem item) { + if (!validAttributes.contains(item.getIndexName())) { + return item + " field is not a string attribute"; + } + if (item.getPrefixLength() < 0) { + return item + " has invalid prefixLength " + item.getPrefixLength() + ": Must be >= 0"; + } + if (item.getMaxEditDistance() < 0) { + return item + " has invalid maxEditDistance " + item.getMaxEditDistance() + ": Must be >= 0"; + } + if (item.stringValue().isEmpty()) { + return item + " fuzzy query must be non-empty"; + } + return null; + } + + @Override + public void onExit() {} + + } + +} |