diff options
author | Jon Bratseth <bratseth@gmail.com> | 2023-09-12 16:21:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-12 16:21:03 +0200 |
commit | 4477e12e6334fe2ad3fa04bdf6babfd70ad9eed8 (patch) | |
tree | 53ded5140109de3327056d890e03bc692ddebb8c | |
parent | fe4e2049ccf6a0be88f2bc8d6059987d4d1ce75e (diff) | |
parent | 50cbfd667315abbc60353f1f12c15085fc220f1b (diff) |
Merge pull request #28499 from vespa-engine/balder/no-optimization-of-duplicate-terms-in-rankitem-vs-first-item
Drop optimization that should make terms present in query and rankiteā¦
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java | 56 | ||||
-rw-r--r-- | container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java | 21 |
2 files changed, 7 insertions, 70 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java index 916f23bd768..07a9683468e 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java @@ -1,15 +1,12 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.query; -import com.yahoo.processing.request.CompoundName; import com.yahoo.search.Query; import com.yahoo.search.query.QueryTree; import com.yahoo.search.query.properties.DefaultProperties; -import java.util.HashSet; import java.util.ListIterator; import java.util.Optional; -import java.util.Set; /** * Query normalizer and sanity checker. @@ -61,12 +58,11 @@ public class QueryCanonicalizer { * * @param item the item to canonicalize * @param parentIterator iterator for the parent of this item, never null - * @return true if the given query is valid, false otherwise + * @return result of canonicalization */ private static CanonicalizationResult recursivelyCanonicalize(Item item, ListIterator<Item> parentIterator) { // children first as they may be removed - if (item instanceof CompositeItem) { - CompositeItem composite = (CompositeItem)item; + if (item instanceof CompositeItem composite) { for (ListIterator<Item> i = composite.getItemIterator(); i.hasNext(); ) { CanonicalizationResult childResult = recursivelyCanonicalize(i.next(), i); if (childResult.isError()) return childResult; @@ -78,8 +74,7 @@ public class QueryCanonicalizer { private static CanonicalizationResult canonicalizeThis(Item item, ListIterator<Item> parentIterator) { if (item instanceof NullItem) parentIterator.remove(); - if ( ! (item instanceof CompositeItem)) return CanonicalizationResult.success(); - CompositeItem composite = (CompositeItem)item; + if ( ! (item instanceof CompositeItem composite)) return CanonicalizationResult.success(); boolean replacedByFalse = collapseFalse(composite, parentIterator); if (replacedByFalse) return CanonicalizationResult.success(); @@ -89,13 +84,10 @@ public class QueryCanonicalizer { if (composite instanceof EquivItem) { removeDuplicates((EquivItem) composite); } - else if (composite instanceof RankItem) { - makeDuplicatesCheap((RankItem)composite); - } if (composite.getItemCount() == 0) parentIterator.remove(); - composite.extractSingleChild().ifPresent(extractedChild -> parentIterator.set(extractedChild)); + composite.extractSingleChild().ifPresent(parentIterator::set); return CanonicalizationResult.success(); } @@ -183,22 +175,19 @@ public class QueryCanonicalizer { for (int j = 0; j < i; ++j) { Item check = composite.getItem(j); if (deleteCandidate.getClass() == check.getClass()) { - if (deleteCandidate instanceof PhraseItem) { - PhraseItem phraseDeletionCandidate = (PhraseItem) deleteCandidate; + if (deleteCandidate instanceof PhraseItem phraseDeletionCandidate) { PhraseItem phraseToCheck = (PhraseItem) check; if (phraseDeletionCandidate.getIndexedString().equals(phraseToCheck.getIndexedString())) { composite.removeItem(i); break; } - } else if (deleteCandidate instanceof PhraseSegmentItem) { - PhraseSegmentItem phraseSegmentDeletionCandidate = (PhraseSegmentItem) deleteCandidate; + } else if (deleteCandidate instanceof PhraseSegmentItem phraseSegmentDeletionCandidate) { PhraseSegmentItem phraseSegmentToCheck = (PhraseSegmentItem) check; if (phraseSegmentDeletionCandidate.getIndexedString().equals(phraseSegmentToCheck.getIndexedString())) { composite.removeItem(i); break; } - } else if (deleteCandidate instanceof BlockItem) { - BlockItem blockDeletionCandidate = (BlockItem) deleteCandidate; + } else if (deleteCandidate instanceof BlockItem blockDeletionCandidate) { BlockItem blockToCheck = (BlockItem) check; if (blockDeletionCandidate.stringValue().equals(blockToCheck.stringValue())) { composite.removeItem(i); @@ -210,37 +199,6 @@ public class QueryCanonicalizer { } } - /** - * If a term is present as both a rank term (i.e not the first child) and in - * the match condition (first child), then turn off any rank calculation for - * the term during matching, as it will be made available anyway for matches - * by the same term in the rank part. - * - * @param rankItem an item which will be simplified in place - */ - private static void makeDuplicatesCheap(RankItem rankItem) { - // Collect terms used for ranking - Set<TermItem> rankTerms = new HashSet<>(); - for (int i = 1; i < rankItem.getItemCount(); i++) { - if (rankItem.getItem(i) instanceof TermItem) - rankTerms.add((TermItem)rankItem.getItem(i)); - } - - // Make terms used for matching cheap if they also are ranking terms - makeDuplicatesCheap(rankItem.getItem(0), rankTerms); - } - - private static void makeDuplicatesCheap(Item item, Set<TermItem> rankTerms) { - if (item instanceof CompositeItem) { - for (ListIterator<Item> i = ((CompositeItem)item).getItemIterator(); i.hasNext();) - makeDuplicatesCheap(i.next(), rankTerms); - } - else if (rankTerms.contains(item)) { - item.setRanked(false); - item.setPositionData(false); - } - } - public static class CanonicalizationResult { private final Optional<String> error; diff --git a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java index 81aa28a5326..d14697da981 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java @@ -467,27 +467,6 @@ public class QueryCanonicalizerTestCase { } @Test - void testRankDuplicateCheapification() { - AndItem and = new AndItem(); - WordItem shoe = new WordItem("shoe", "prod"); - and.addItem(shoe); - and.addItem(new WordItem("apparel & accessories", "tcnm")); - RankItem rank = new RankItem(); - rank.addItem(and); - - rank.addItem(new WordItem("shoe", "prod")); // rank item which also ossurs in first argument - for (int i = 0; i < 25; i++) - rank.addItem(new WordItem("word" + i, "normbrnd")); - QueryTree tree = new QueryTree(rank); - - assertTrue(shoe.isRanked()); - assertTrue(shoe.usePositionData()); - QueryCanonicalizer.canonicalize(tree); - assertFalse(shoe.isRanked()); - assertFalse(shoe.usePositionData()); - } - - @Test void queryTreeExceedsAllowedSize() { Query query = new Query(); QueryTree tree = query.getModel().getQueryTree(); |