diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java | 361 |
1 files changed, 0 insertions, 361 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java b/container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java deleted file mode 100644 index ff603a64725..00000000000 --- a/container-search/src/main/java/com/yahoo/prelude/querytransform/IndexCombinatorSearcher.java +++ /dev/null @@ -1,361 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.prelude.querytransform; - -import static com.yahoo.prelude.querytransform.PhrasingSearcher.PHRASE_REPLACEMENT; - -import com.yahoo.component.chain.dependencies.After; -import com.yahoo.component.chain.dependencies.Before; -import com.yahoo.component.chain.dependencies.Provides; -import com.yahoo.log.LogLevel; -import com.yahoo.prelude.Index; -import com.yahoo.prelude.Index.Attribute; -import com.yahoo.prelude.IndexFacts; -import com.yahoo.prelude.query.*; -import com.yahoo.search.Query; -import com.yahoo.search.Result; -import com.yahoo.search.Searcher; -import com.yahoo.search.searchchain.Execution; -import com.yahoo.search.searchchain.PhaseNames; - -import java.util.*; - -/** - * Searcher to rewrite queries to achieve mixed recall between indices and - * memory attributes. - * - * @author Steinar Knutsen - * @deprecated do not use - */ -// TODO: Remove on Vespa 7 (not necessary any more) -@After({PhaseNames.RAW_QUERY, PHRASE_REPLACEMENT}) -@Before(PhaseNames.TRANSFORMED_QUERY) -@Provides(IndexCombinatorSearcher.MIXED_RECALL_REWRITE) -@Deprecated // OK -public class IndexCombinatorSearcher extends Searcher { - - public static final String MIXED_RECALL_REWRITE = "MixedRecallRewrite"; - - private static class ArrayComparator implements Comparator<Attribute[]> { - /** - * Note, this ignores if there is a difference in whether to - * attributes have tokenized content. (If this is the case, - * we are having worse problems anyway.) - */ - public int compare(Attribute[] o1, Attribute[] o2 ) { - if (o1.length < o2.length) { - return -1; - } else if (o1.length > o2.length) { - return 1; - } - int limit = o1.length; - for (int i = 0; i < limit; ++i) { - int r = o1[i].name.compareTo(o2[i].name); - if (r != 0) { - return r; - } - } - return 0; - } - } - - private final ArrayComparator comparator = new ArrayComparator(); - - private enum RewriteStrategies { - NONE, CHEAP_AND, EXPENSIVE_AND, FLAT - } - - @Override - public Result search(Query query, Execution execution) { - Item root = query.getModel().getQueryTree().getRoot(); - IndexFacts.Session session = execution.context().getIndexFacts().newSession(query); - String oldQuery = (query.getTraceLevel() >= 2) ? root.toString() : ""; - - if (root instanceof BlockItem || root instanceof PhraseItem) { - root = convertSinglePhraseOrBlock(root, session); - } else if (root instanceof CompositeItem) { - root = rewrite((CompositeItem) root, session); - } - query.getModel().getQueryTree().setRoot(root); - - if ((query.getTraceLevel() >= 2) && !(oldQuery.equals(root.toString()))) { - query.trace("Rewriting for mixed recall between indices and attributes", true, 2); - } - return execution.search(query); - } - - private RewriteStrategies chooseRewriteStrategy(CompositeItem c, IndexFacts.Session session) { - if (c instanceof OrItem) { - return RewriteStrategies.FLAT; - } else if (!(c instanceof AndItem)) { - return RewriteStrategies.NONE; - } - Map<Attribute[], Integer> m = new TreeMap<>(comparator); - for (Iterator<Item> i = c.getItemIterator(); i.hasNext();) { - Item j = i.next(); - if (j instanceof BlockItem || j instanceof PhraseItem) { - Attribute[] attributes= getIndices((HasIndexItem) j, session); - if (attributes == null) { - continue; - } - Integer count = m.get(attributes); - if (count == null) { - count = 1; - } else { - count = count.intValue() + 1; - } - m.put(attributes, count); - } - } - - if (m.size() == 0) { - return RewriteStrategies.NONE; - } - - int singles = 0; - int pairs = 0; - int higher = 0; - // count the number of sets being associated with 1, 2 or more terms - for (Integer i : m.values()) { - switch (i.intValue()) { - case 1: - ++singles; - break; - case 2: - pairs += 2; - break; - default: - ++higher; - break; - } - } - if (higher == 0 && pairs + singles <= 2) { - return RewriteStrategies.EXPENSIVE_AND; - } else { - return RewriteStrategies.CHEAP_AND; - } - } - - private CompositeItem rewriteNot(NotItem not, IndexFacts.Session session) { - Item positive = not.getItem(0); - if (positive instanceof BlockItem || positive instanceof PhraseItem) { - positive = convertSinglePhraseOrBlock(positive, session); - not.setItem(0, positive); - } else if (positive instanceof CompositeItem) { - CompositeItem c = (CompositeItem) positive; - positive = rewrite(c, session); - not.setItem(0, positive); - } - - int length = not.getItemCount(); - // no need for keeping proximity in the negative branches, so we - // convert them one by one, _and_ always uses cheap transform - for (int i = 1; i < length; ++i) { - Item exclusion = not.getItem(i); - if (exclusion instanceof BlockItem || exclusion instanceof PhraseItem) { - exclusion = convertSinglePhraseOrBlock(exclusion, session); - not.setItem(i, exclusion); - } else if (exclusion instanceof CompositeItem) { - CompositeItem c = (CompositeItem) exclusion; - switch (chooseRewriteStrategy(c, session)) { - case NONE: - c = traverse(c, session); - break; - case CHEAP_AND: - case EXPENSIVE_AND: - c = cheapTransform(c, session); - break; - default: - c = flatTransform(c, session); - break; - } - not.setItem(i, c); - } - } - return not; - } - - private Item rewrite(CompositeItem c, IndexFacts.Session session) { - if (c instanceof NotItem) { - c = rewriteNot((NotItem) c, session); - return c; - } else { - switch (chooseRewriteStrategy(c, session)) { - case NONE: - c = traverse(c, session); - break; - case CHEAP_AND: - c = cheapTransform(c, session); - break; - case EXPENSIVE_AND: - c = expensiveTransform((AndItem) c, session); - break; - case FLAT: - c = flatTransform(c, session); - break; - default: - break; - } - } - return c; - } - - private CompositeItem traverse(CompositeItem c, IndexFacts.Session session) { - int length = c.getItemCount(); - for (int i = 0; i < length; ++i) { - Item word = c.getItem(i); - if (word instanceof CompositeItem && !(word instanceof PhraseItem) && !(word instanceof BlockItem)) { - c.setItem(i, rewrite((CompositeItem) word, session)); - } - } - return c; - } - - private CompositeItem expensiveTransform(AndItem c, IndexFacts.Session session) { - int[] indices = new int[2]; - int items = 0; - int length = c.getItemCount(); - Attribute[][] names = new Attribute[2][]; - CompositeItem result = null; - for (int i = 0; i < length; ++i) { - Item word = c.getItem(i); - if (word instanceof BlockItem || word instanceof PhraseItem) { - Attribute[] attributes = getIndices((HasIndexItem) word, session); - if (attributes == null) { - continue; - } - // this throwing an out of bounds if more than two candidates is intentional - names[items] = attributes; - indices[items++] = i; - } else if (word instanceof CompositeItem) { - c.setItem(i, rewrite((CompositeItem) word, session)); - } - } - switch (items) { - case 1: - result = linearAnd(c, names[0], indices[0]); - break; - case 2: - result = quadraticAnd(c, names[0], names[1], indices[0], indices[1]); - break; - default: - // should never happen - getLogger().log( - LogLevel.WARNING, - "Unexpected number of items for mixed recall, got " + items - + ", expected 1 or 2."); - break; - } - return result; - } - - private Attribute[] getIndices(HasIndexItem block, IndexFacts.Session session) { - return session.getIndex(block.getIndexName()).getMatchGroup(); - } - - private OrItem linearAnd(AndItem c, Attribute[] names, int brancherIndex) { - OrItem or = new OrItem(); - for (int i = 0; i < names.length; ++i) { - AndItem duck = (AndItem) c.clone(); - Item b = retarget(duck.getItem(brancherIndex), names[i]); - duck.setItem(brancherIndex, b); - or.addItem(duck); - } - return or; - } - - private OrItem quadraticAnd(AndItem c, Attribute[] firstNames, Attribute[] secondNames, int firstBrancher, int secondBrancher) { - OrItem or = new OrItem(); - for (int i = 0; i < firstNames.length; ++i) { - for (int j = 0; j < secondNames.length; ++j) { - AndItem duck = (AndItem) c.clone(); - Item b = retarget(duck.getItem(firstBrancher), firstNames[i]); - duck.setItem(firstBrancher, b); - b = retarget(duck.getItem(secondBrancher), secondNames[j]); - duck.setItem(secondBrancher, b); - or.addItem(duck); - } - } - return or; - } - - private CompositeItem flatTransform(CompositeItem c, IndexFacts.Session session) { - int maxIndex = c.getItemCount() - 1; - for (int i = maxIndex; i >= 0; --i) { - Item word = c.getItem(i); - if (word instanceof BlockItem || word instanceof PhraseItem) { - Attribute[] attributes = getIndices((HasIndexItem) word, session); - if (attributes == null) { - continue; - } - c.removeItem(i); - for (Attribute name : attributes) { - Item term = word.clone(); - Item forNewIndex = retarget(term, name); - c.addItem(forNewIndex); - } - } else if (word instanceof CompositeItem) { - c.setItem(i, rewrite((CompositeItem) word, session)); - } - } - return c; - } - - private CompositeItem cheapTransform(CompositeItem c, IndexFacts.Session session) { - if (c instanceof OrItem) { - return flatTransform(c, session); - } - int length = c.getItemCount(); - for (int i = 0; i < length; ++i) { - Item j = c.getItem(i); - if (j instanceof BlockItem || j instanceof PhraseItem) { - Attribute[] attributes = getIndices((HasIndexItem) j, session); - if (attributes == null) { - continue; - } - CompositeItem or = searchAllForItem(j, attributes); - c.setItem(i, or); - } else if (j instanceof CompositeItem) { - c.setItem(i, rewrite((CompositeItem) j, session)); - } - } - return c; - } - - private OrItem searchAllForItem(Item word, Attribute[] attributes) { - OrItem or = new OrItem(); - for (Attribute name : attributes) { - Item term = word.clone(); - term = retarget(term, name); - or.addItem(term); - } - return or; - } - - private Item retarget(Item word, Attribute newIndex) { - if (word instanceof PhraseItem && !newIndex.isTokenizedContent()) { - PhraseItem asPhrase = (PhraseItem) word; - WordItem newWord = new WordItem(asPhrase.getIndexedString(), newIndex.name, false); - return newWord; - } else if (word instanceof IndexedItem) { - word.setIndexName(newIndex.name); - } else if (word instanceof CompositeItem) { - CompositeItem asComposite = (CompositeItem) word; - for (Iterator<Item> i = asComposite.getItemIterator(); i.hasNext();) { - Item segment = i.next(); - segment.setIndexName(newIndex.name); - } - } - return word; - } - - private Item convertSinglePhraseOrBlock(Item item, IndexFacts.Session session) { - Item newItem; - Attribute[] attributes = getIndices((HasIndexItem) item, session); - if (attributes == null) { - return item; - } - newItem = searchAllForItem(item, attributes); - return newItem; - } - -} |