aboutsummaryrefslogtreecommitdiffstats
path: root/container-search
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-09-12 14:48:38 +0200
committerHenning Baldersheim <balder@yahoo-inc.com>2023-09-12 14:48:38 +0200
commit50cbfd667315abbc60353f1f12c15085fc220f1b (patch)
tree0e0109b60578729e0fcd85c753ca6b07c2956d5c /container-search
parent0a93d81af746a81616cdb7a083379d8aabd3bfa8 (diff)
Drop optimization that should make terms present in query and rankitem cheap in matching part of tree.
This optimization is outdated as this is not a normal usecase anymore, and queries have become larger and more expensive.
Diffstat (limited to 'container-search')
-rw-r--r--container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java56
-rw-r--r--container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java21
2 files changed, 7 insertions, 70 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java
index 916f23bd768..07a9683468e 100644
--- a/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java
+++ b/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java
@@ -1,15 +1,12 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;
-import com.yahoo.processing.request.CompoundName;
import com.yahoo.search.Query;
import com.yahoo.search.query.QueryTree;
import com.yahoo.search.query.properties.DefaultProperties;
-import java.util.HashSet;
import java.util.ListIterator;
import java.util.Optional;
-import java.util.Set;
/**
* Query normalizer and sanity checker.
@@ -61,12 +58,11 @@ public class QueryCanonicalizer {
*
* @param item the item to canonicalize
* @param parentIterator iterator for the parent of this item, never null
- * @return true if the given query is valid, false otherwise
+ * @return result of canonicalization
*/
private static CanonicalizationResult recursivelyCanonicalize(Item item, ListIterator<Item> parentIterator) {
// children first as they may be removed
- if (item instanceof CompositeItem) {
- CompositeItem composite = (CompositeItem)item;
+ if (item instanceof CompositeItem composite) {
for (ListIterator<Item> i = composite.getItemIterator(); i.hasNext(); ) {
CanonicalizationResult childResult = recursivelyCanonicalize(i.next(), i);
if (childResult.isError()) return childResult;
@@ -78,8 +74,7 @@ public class QueryCanonicalizer {
private static CanonicalizationResult canonicalizeThis(Item item, ListIterator<Item> parentIterator) {
if (item instanceof NullItem) parentIterator.remove();
- if ( ! (item instanceof CompositeItem)) return CanonicalizationResult.success();
- CompositeItem composite = (CompositeItem)item;
+ if ( ! (item instanceof CompositeItem composite)) return CanonicalizationResult.success();
boolean replacedByFalse = collapseFalse(composite, parentIterator);
if (replacedByFalse) return CanonicalizationResult.success();
@@ -89,13 +84,10 @@ public class QueryCanonicalizer {
if (composite instanceof EquivItem) {
removeDuplicates((EquivItem) composite);
}
- else if (composite instanceof RankItem) {
- makeDuplicatesCheap((RankItem)composite);
- }
if (composite.getItemCount() == 0)
parentIterator.remove();
- composite.extractSingleChild().ifPresent(extractedChild -> parentIterator.set(extractedChild));
+ composite.extractSingleChild().ifPresent(parentIterator::set);
return CanonicalizationResult.success();
}
@@ -183,22 +175,19 @@ public class QueryCanonicalizer {
for (int j = 0; j < i; ++j) {
Item check = composite.getItem(j);
if (deleteCandidate.getClass() == check.getClass()) {
- if (deleteCandidate instanceof PhraseItem) {
- PhraseItem phraseDeletionCandidate = (PhraseItem) deleteCandidate;
+ if (deleteCandidate instanceof PhraseItem phraseDeletionCandidate) {
PhraseItem phraseToCheck = (PhraseItem) check;
if (phraseDeletionCandidate.getIndexedString().equals(phraseToCheck.getIndexedString())) {
composite.removeItem(i);
break;
}
- } else if (deleteCandidate instanceof PhraseSegmentItem) {
- PhraseSegmentItem phraseSegmentDeletionCandidate = (PhraseSegmentItem) deleteCandidate;
+ } else if (deleteCandidate instanceof PhraseSegmentItem phraseSegmentDeletionCandidate) {
PhraseSegmentItem phraseSegmentToCheck = (PhraseSegmentItem) check;
if (phraseSegmentDeletionCandidate.getIndexedString().equals(phraseSegmentToCheck.getIndexedString())) {
composite.removeItem(i);
break;
}
- } else if (deleteCandidate instanceof BlockItem) {
- BlockItem blockDeletionCandidate = (BlockItem) deleteCandidate;
+ } else if (deleteCandidate instanceof BlockItem blockDeletionCandidate) {
BlockItem blockToCheck = (BlockItem) check;
if (blockDeletionCandidate.stringValue().equals(blockToCheck.stringValue())) {
composite.removeItem(i);
@@ -210,37 +199,6 @@ public class QueryCanonicalizer {
}
}
- /**
- * If a term is present as both a rank term (i.e not the first child) and in
- * the match condition (first child), then turn off any rank calculation for
- * the term during matching, as it will be made available anyway for matches
- * by the same term in the rank part.
- *
- * @param rankItem an item which will be simplified in place
- */
- private static void makeDuplicatesCheap(RankItem rankItem) {
- // Collect terms used for ranking
- Set<TermItem> rankTerms = new HashSet<>();
- for (int i = 1; i < rankItem.getItemCount(); i++) {
- if (rankItem.getItem(i) instanceof TermItem)
- rankTerms.add((TermItem)rankItem.getItem(i));
- }
-
- // Make terms used for matching cheap if they also are ranking terms
- makeDuplicatesCheap(rankItem.getItem(0), rankTerms);
- }
-
- private static void makeDuplicatesCheap(Item item, Set<TermItem> rankTerms) {
- if (item instanceof CompositeItem) {
- for (ListIterator<Item> i = ((CompositeItem)item).getItemIterator(); i.hasNext();)
- makeDuplicatesCheap(i.next(), rankTerms);
- }
- else if (rankTerms.contains(item)) {
- item.setRanked(false);
- item.setPositionData(false);
- }
- }
-
public static class CanonicalizationResult {
private final Optional<String> error;
diff --git a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java
index 81aa28a5326..d14697da981 100644
--- a/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java
+++ b/container-search/src/test/java/com/yahoo/prelude/query/test/QueryCanonicalizerTestCase.java
@@ -467,27 +467,6 @@ public class QueryCanonicalizerTestCase {
}
@Test
- void testRankDuplicateCheapification() {
- AndItem and = new AndItem();
- WordItem shoe = new WordItem("shoe", "prod");
- and.addItem(shoe);
- and.addItem(new WordItem("apparel & accessories", "tcnm"));
- RankItem rank = new RankItem();
- rank.addItem(and);
-
- rank.addItem(new WordItem("shoe", "prod")); // rank item which also ossurs in first argument
- for (int i = 0; i < 25; i++)
- rank.addItem(new WordItem("word" + i, "normbrnd"));
- QueryTree tree = new QueryTree(rank);
-
- assertTrue(shoe.isRanked());
- assertTrue(shoe.usePositionData());
- QueryCanonicalizer.canonicalize(tree);
- assertFalse(shoe.isRanked());
- assertFalse(shoe.usePositionData());
- }
-
- @Test
void queryTreeExceedsAllowedSize() {
Query query = new Query();
QueryTree tree = query.getModel().getQueryTree();