diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-04-22 17:32:10 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-22 17:32:10 +0200 |
commit | 2b0050d8edb0df787e8c0f605e30e6707d476f0c (patch) | |
tree | aef819a695c0a8342dfaa02954c816c6eb32d6da | |
parent | 2d05e43f365a4e93ab33aaa80ea432d0b0192aef (diff) | |
parent | 34848b841173803ffef0a92da466e58a1475954c (diff) |
Merge pull request #22226 from vespa-engine/bratseth/set-default-max-query-items
Set default max query items
6 files changed, 71 insertions, 65 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index 3ad0f94511d..67bc553c478 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -6635,11 +6635,13 @@ ], "methods": [ "public void <init>()", - "public java.lang.Object get(com.yahoo.processing.request.CompoundName, java.util.Map, com.yahoo.processing.request.Properties)" + "public java.lang.Object get(com.yahoo.processing.request.CompoundName, java.util.Map, com.yahoo.processing.request.Properties)", + "public static void requireNotPresentIn(java.util.Map)" ], "fields": [ "public static final com.yahoo.processing.request.CompoundName MAX_OFFSET", "public static final com.yahoo.processing.request.CompoundName MAX_HITS", + "public static final com.yahoo.processing.request.CompoundName MAX_QUERY_ITEMS", "public static final com.yahoo.search.query.profile.types.QueryProfileType argumentType" ] }, diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java index a93dd1b9de4..916f23bd768 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java @@ -4,6 +4,7 @@ package com.yahoo.prelude.query; import com.yahoo.processing.request.CompoundName; import com.yahoo.search.Query; import com.yahoo.search.query.QueryTree; +import com.yahoo.search.query.properties.DefaultProperties; import java.util.HashSet; import java.util.ListIterator; @@ -20,8 +21,6 @@ public class QueryCanonicalizer { /** The name of the operation performed by this, for use in search chain ordering */ public static final String queryCanonicalization = "queryCanonicalization"; - private static final CompoundName MAX_QUERY_ITEMS = new CompoundName("maxQueryItems"); - /** * Validates this query and carries out possible operations on this query * which simplifies it without changing its semantics. @@ -29,8 +28,8 @@ public class QueryCanonicalizer { * @return null if the query is valid, an error message if it is invalid */ public static String canonicalize(Query query) { - Integer maxQueryItems = query.properties().getInteger(MAX_QUERY_ITEMS, Integer.MAX_VALUE); - return canonicalize(query.getModel().getQueryTree(), maxQueryItems); + return canonicalize(query.getModel().getQueryTree(), + query.properties().getInteger(DefaultProperties.MAX_QUERY_ITEMS)); } /** @@ -52,7 +51,8 @@ public class QueryCanonicalizer { CanonicalizationResult result = recursivelyCanonicalize(rootItemIterator.next(), rootItemIterator); if (query.isEmpty() && ! result.isError()) result = CanonicalizationResult.error("No query"); int itemCount = query.treeSize(); - if (itemCount > maxQueryItems) result = CanonicalizationResult.error(String.format("Query tree exceeds allowed item count. Configured limit: %d - Item count: %d", maxQueryItems, itemCount)); + if (itemCount > maxQueryItems) + result = CanonicalizationResult.error(String.format("Query tree exceeds allowed item count. Configured limit: %d - Item count: %d", maxQueryItems, itemCount)); return result.error().orElse(null); // preserve old API, unfortunately } diff --git a/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java index af6374ba245..54d8ac40556 100644 --- a/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java +++ b/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java @@ -580,11 +580,7 @@ public class SearchHandler extends LoggingRequestHandler { } private Result validateQuery(Query query) { - if (query.getHttpRequest().getProperty(DefaultProperties.MAX_HITS.toString()) != null) - throw new RuntimeException(DefaultProperties.MAX_HITS + " must be specified in a query profile."); - - if (query.getHttpRequest().getProperty(DefaultProperties.MAX_OFFSET.toString()) != null) - throw new RuntimeException(DefaultProperties.MAX_OFFSET + " must be specified in a query profile."); + DefaultProperties.requireNotPresentIn(query.getHttpRequest().propertyMap()); int maxHits = query.properties().getInteger(DefaultProperties.MAX_HITS); int maxOffset = query.properties().getInteger(DefaultProperties.MAX_OFFSET); diff --git a/container-search/src/main/java/com/yahoo/search/query/QueryTree.java b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java index 3dac5648660..0655727b46b 100644 --- a/container-search/src/main/java/com/yahoo/search/query/QueryTree.java +++ b/container-search/src/main/java/com/yahoo/search/query/QueryTree.java @@ -185,7 +185,7 @@ public class QueryTree extends CompositeItem { */ public int treeSize() { if (isEmpty()) return 0; - return(countItemsRecursively(getItemIterator().next())); + return countItemsRecursively(getItemIterator().next()); } private int countItemsRecursively(Item item) { diff --git a/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java b/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java index b94ddde4733..221368afeb6 100644 --- a/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java +++ b/container-search/src/main/java/com/yahoo/search/query/properties/DefaultProperties.java @@ -6,6 +6,7 @@ import com.yahoo.search.query.Properties; import com.yahoo.search.query.profile.types.FieldDescription; import com.yahoo.search.query.profile.types.QueryProfileType; +import java.util.List; import java.util.Map; /** @@ -17,26 +18,30 @@ public final class DefaultProperties extends Properties { public static final CompoundName MAX_OFFSET = new CompoundName("maxOffset"); public static final CompoundName MAX_HITS = new CompoundName("maxHits"); + public static final CompoundName MAX_QUERY_ITEMS = new CompoundName("maxQueryItems"); public static final QueryProfileType argumentType = new QueryProfileType("DefaultProperties"); + private static final List<CompoundName> properties = List.of(MAX_OFFSET, MAX_HITS, MAX_QUERY_ITEMS); + static { argumentType.setBuiltin(true); - - argumentType.addField(new FieldDescription(MAX_OFFSET.toString(), "integer")); - argumentType.addField(new FieldDescription(MAX_HITS.toString(), "integer")); - + properties.forEach(property -> argumentType.addField(new FieldDescription(property.toString(), "integer"))); argumentType.freeze(); } @Override public Object get(CompoundName name, Map<String, String> context, com.yahoo.processing.request.Properties substitution) { - if (MAX_OFFSET.equals(name)) { - return 1000; - } else if (MAX_HITS.equals(name)) { - return 400; - } else { - return super.get(name, context, substitution); + if (name.equals(MAX_OFFSET)) return 1000; + if (name.equals(MAX_HITS)) return 400; + if (name.equals(MAX_QUERY_ITEMS)) return 10000; + return super.get(name, context, substitution); + } + + public static void requireNotPresentIn(Map<String, String> map) { + for (var property : properties) { + if (map.containsKey(property.toString())) + throw new IllegalArgumentException(property + " must be specified in a query profile."); } } diff --git a/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java index bb5792d81b7..c3905b8200b 100644 --- a/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java +++ b/container-search/src/main/java/com/yahoo/search/query/rewrite/RewriterFeatures.java @@ -68,7 +68,7 @@ public class RewriterFeatures { oldRoot.equals(origQueryItem)) { PhraseItem phrase = convertAndToPhrase((AndItem)oldRoot); - if(!keepOriginalQuery) { + if (!keepOriginalQuery) { qTree.setRoot(phrase); } else { OrItem newRoot = new OrItem(); @@ -145,12 +145,12 @@ public class RewriterFeatures { } StringTokenizer rewrite_list = new StringTokenizer(rewrites, "\t"); - Item rI = null; + Item rI; // Convert matching string to query tree item Item matchingStrItem = convertStringToQTree(query, matchingStr); PhraseItem matchingStrPhraseItem = null; - if(matchingStrItem instanceof AndItem) { + if (matchingStrItem instanceof AndItem) { matchingStrPhraseItem = convertAndToPhrase(((AndItem)matchingStrItem)); } @@ -166,30 +166,32 @@ public class RewriterFeatures { // - matchingStr: (AND aa bb) // - for this case, should use getNonOverlappingMatches instead OrItem newRoot; - if(oldRoot instanceof OrItem) { - if(((OrItem)oldRoot).getItemIndex(matchingStrItem)==-1) { + if (oldRoot instanceof OrItem) { + if (((OrItem)oldRoot).getItemIndex(matchingStrItem)==-1) { RewriterUtils.log(logger, query, "Whole query matching is used, skipping rewrite"); return query; } newRoot = (OrItem)oldRoot; - } else if(oldRoot.equals(matchingStrItem) || oldRoot.equals(matchingStrPhraseItem)) { + } + else if(oldRoot.equals(matchingStrItem) || oldRoot.equals(matchingStrPhraseItem)) { newRoot = new OrItem(); newRoot.addItem(oldRoot); - } else { + } + else { RewriterUtils.log(logger, query, "Whole query matching is used, skipping rewrite"); return query; } int numRewrites = 0; - while(rewrite_list.hasMoreTokens() && - (maxNumRewrites==0 || numRewrites < maxNumRewrites)) { + while (rewrite_list.hasMoreTokens() && (maxNumRewrites == 0 || numRewrites < maxNumRewrites)) { rI = convertStringToQTree(query, rewrite_list.nextToken()); - if(addUnitToRewrites && rI instanceof AndItem) { + if (addUnitToRewrites && rI instanceof AndItem) { rI = convertAndToPhrase((AndItem)rI); } - if(newRoot.getItemIndex(rI)==-1) { + if(newRoot.getItemIndex(rI) == -1) { newRoot.addItem(rI); numRewrites++; - } else { + } + else { RewriterUtils.log(logger, query, "Rewrite already exist, skipping"); } } @@ -229,19 +231,19 @@ public class RewriterFeatures { Query query) throws RuntimeException { RewriterUtils.log(logger, query, "Retrieving longest non-overlapping full phrase matches"); - if(phraseMatcher==null) + if (phraseMatcher == null) return null; Item root = query.getModel().getQueryTree().getRoot(); List<PhraseMatcher.Phrase> matches = phraseMatcher.matchPhrases(root); - if (matches==null || matches.isEmpty()) + if (matches == null || matches.isEmpty()) return null; Set<PhraseMatcher.Phrase> resultMatches = new HashSet<>(); ListIterator<Phrase> matchesIter = matches.listIterator(); // Iterate through all matches - while(matchesIter.hasNext()) { + while (matchesIter.hasNext()) { PhraseMatcher.Phrase phrase = matchesIter.next(); RewriterUtils.log(logger, query, "Working on phrase: " + phrase); CompositeItem currOwner = phrase.getOwner(); @@ -250,11 +252,11 @@ public class RewriterFeatures { // If phrase is not an AND item, only keep those that are single word // in order to eliminate cases such as (new RANK york) from being treated // as match if only new york but not new or york is in the dictionary - if((currOwner!=null && + if((currOwner != null && ((phrase.isComplete() && currOwner instanceof AndItem) || - (phrase.getLength()==1 && currOwner instanceof OrItem) || - (phrase.getLength()==1 && currOwner instanceof RankItem && phrase.getStartIndex()==0))) || - (currOwner==null && phrase.getLength()==1)) { + (phrase.getLength() == 1 && currOwner instanceof OrItem) || + (phrase.getLength() == 1 && currOwner instanceof RankItem && phrase.getStartIndex() == 0))) || + (currOwner == null && phrase.getLength() == 1)) { resultMatches.add(phrase); RewriterUtils.log(logger, query, "Keeping phrase: " + phrase); } @@ -298,12 +300,12 @@ public class RewriterFeatures { Query query) throws RuntimeException { RewriterUtils.log(logger, query, "Retrieving longest non-overlapping partial phrase matches"); - if(phraseMatcher==null) + if (phraseMatcher == null) return null; Item root = query.getModel().getQueryTree().getRoot(); List<PhraseMatcher.Phrase> matches = phraseMatcher.matchPhrases(root); - if (matches==null || matches.isEmpty()) + if (matches == null || matches.isEmpty()) return null; Set<PhraseMatcher.Phrase> resultMatches = new HashSet<>(); @@ -312,14 +314,14 @@ public class RewriterFeatures { ListIterator<PhraseMatcher.Phrase> matchesIter = matches.listIterator(); // Iterate through all matches - while(matchesIter.hasNext()) { + while (matchesIter.hasNext()) { PhraseMatcher.Phrase phrase = matchesIter.next(); RewriterUtils.log(logger, query, "Working on phrase: " + phrase); CompositeItem currOwner = phrase.getOwner(); // Check if previous is AND item and this phrase is in a different item // If so, work on the previous set to eliminate overlapping matches - if(!phrasesInSubTree.isEmpty() && currOwner!=null && + if (!phrasesInSubTree.isEmpty() && currOwner!=null && prevOwner!=null && !currOwner.equals(prevOwner)) { RewriterUtils.log(logger, query, "Previous phrase is in different AND item"); List<PhraseMatcher.Phrase> subTreeMatches @@ -333,13 +335,13 @@ public class RewriterFeatures { } // Check if this is an AND item - if(currOwner!=null && currOwner instanceof AndItem) { + if (currOwner instanceof AndItem) { phrasesInSubTree.add(phrase); - // If phrase is not an AND item, only keep those that are single word - // in order to eliminate cases such as (new RANK york) from being treated - // as match if only new york but not new or york is in the dictionary - } else if (phrase.getLength()==1 && - !(currOwner!=null && currOwner instanceof RankItem && phrase.getStartIndex()!=0)) { + // If phrase is not an AND item, only keep those that are single word + // in order to eliminate cases such as (new RANK york) from being treated + // as match if only new york but not new or york is in the dictionary + } + else if (phrase.getLength() == 1 && !(currOwner instanceof RankItem && phrase.getStartIndex() != 0)) { resultMatches.add(phrase); } @@ -476,7 +478,7 @@ public class RewriterFeatures { boolean removeOriginal, boolean addUnitToRewrites) throws RuntimeException { - if(matches==null) { + if(matches == null) { RewriterUtils.log(logger, query, "No expansions to be added"); return query; } @@ -494,7 +496,7 @@ public class RewriterFeatures { // Retrieve expansion phrases String expansionStr = match.getData(); - if(expansionStr.equalsIgnoreCase("n/a") && expandIndex==null) { + if (expansionStr.equalsIgnoreCase("n/a") && expandIndex == null) { continue; } StringTokenizer expansions = new StringTokenizer(expansionStr,"\t"); @@ -509,17 +511,17 @@ public class RewriterFeatures { (maxNumRewrites==0 || numRewrites < maxNumRewrites)) { String expansion = expansions.nextToken(); RewriterUtils.log(logger, query, "Working on expansion: " + expansion); - if(expansion.equalsIgnoreCase("n/a")) { + if (expansion.equalsIgnoreCase("n/a")) { expansion = matchStr; } // (AND expansion) or "expansion" Item expansionItem = convertStringToQTree(query, expansion); - if(addUnitToRewrites && expansionItem instanceof AndItem) { + if (addUnitToRewrites && expansionItem instanceof AndItem) { expansionItem = convertAndToPhrase((AndItem)expansionItem); } expansionGrp.addItem(expansionItem); - if(expandIndex!=null) { + if (expandIndex!=null) { // indexName:expansion WordItem expansionIndexItem = new WordItem(expansion, expandIndex); expansionGrp.addItem(expansionIndexItem); @@ -528,19 +530,19 @@ public class RewriterFeatures { RewriterUtils.log(logger, query, "Adding expansion: " + expansion); } - if(!removeOriginal) { + if (!removeOriginal) { //(AND original) Item matchItem = convertStringToQTree(query, matchStr); - if(expansionGrp.getItemIndex(matchItem)==-1) { + if (expansionGrp.getItemIndex(matchItem)==-1) { expansionGrp.addItem(matchItem); } } parent = match.getOwner(); int matchIndex = match.getStartIndex(); - if(parent!=null) { + if (parent!=null) { // Remove matching phrase from original query - for(int i=0; i<match.getLength(); i++) { + for (int i=0; i<match.getLength(); i++) { parent.removeItem(matchIndex); } // Adding back expansions @@ -554,11 +556,11 @@ public class RewriterFeatures { } // Not root single item - if(parent!=null) { + if (parent != null) { // Cleaning up the query after rewrite to remove redundant tags // e.g. (AND (OR (AND a b) c)) => (OR (AND a b) c) String cleanupError = QueryCanonicalizer.canonicalize(qTree); - if(cleanupError!=null) { + if (cleanupError!=null) { RewriterUtils.error(logger, query, "Error canonicalizing query tree"); throw new RuntimeException("Error canonicalizing query tree"); } @@ -595,7 +597,7 @@ public class RewriterFeatures { */ static Item convertStringToQTree(Query query, String stringToParse) { RewriterUtils.log(logger, query, "Converting string [" + stringToParse + "] to query tree"); - if(stringToParse==null) { + if (stringToParse == null) { return new NullItem(); } Model model = query.getModel(); @@ -621,7 +623,7 @@ public class RewriterFeatures { Iterator<Item> subItems = andItem.getItemIterator(); while(subItems.hasNext()) { Item curr = (subItems.next()); - if(curr instanceof IntItem) { + if (curr instanceof IntItem) { WordItem numItem = new WordItem(((IntItem)curr).stringValue()); result.addItem(numItem); } else { @@ -639,7 +641,7 @@ public class RewriterFeatures { */ private static class PhraseLength implements Comparator<PhraseMatcher.Phrase> { public int compare(PhraseMatcher.Phrase phrase1, PhraseMatcher.Phrase phrase2) { - if((phrase2.getLength()>phrase1.getLength()) || + if ((phrase2.getLength()>phrase1.getLength()) || (phrase2.getLength()==phrase1.getLength() && phrase2.getStartIndex()<=phrase1.getStartIndex())) { return 1; @@ -648,4 +650,5 @@ public class RewriterFeatures { } } } + } |