diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/semantics/engine')
10 files changed, 1309 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java new file mode 100644 index 00000000000..f2650fef83a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.semantics.rule.Condition; + +/** + * A choice point in an rule evaluation. A choicepoint is open if there are other choices to make at the point, + * closed if there are no further choices. In addition it contains enough information to enable + * the rule evaluation to backtrack to this point + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class Choicepoint { + + /** Whether there are (or may be) open choices to explore at this choicepoint yet */ + private boolean open=true; + + /** The number of tries made at this choice point */ + private int tries=0; + + /** The condition creating this choicepoint */ + private Condition condition; + + /** The state this choice point can be rolled back to */ + private State state; + + private RuleEvaluation owner; + + public Choicepoint(RuleEvaluation e, Condition condition) { + this.owner=e; + state=new State(this,e); + this.condition=condition; + if (e.getTraceLevel()>=5) + e.trace(5,"Added choice point at " + e.currentItem() + " for '" + condition + "'"); + } + + /** Returns the condition which created this choice point */ + public Condition getCondition() { return condition; } + + /** Returns wether there are (or may be) open choices to explore at this choicepoint yet */ + public boolean isOpen() { return open; } + + /** Marks this choice point as closed (!open) - there are no further choices to explore */ + public void close() { this.open=false; } + + /** Returns the number open tries made at this point */ + public int tryCount() { return tries; } + + /** Registers that another try has been made */ + public void addTry() { + tries++; + } + + /** + * Backtrack to the evaluation state at the point where this choicepoint were instantiated. + */ + public void backtrack() { + state.backtrack(owner); + if (owner.getTraceLevel()>=5) + owner.trace(5,"Backtracked to " + owner.currentItem() + " for '" + condition + "'"); + } + + /** Backtracks the position only, not matches */ + public void backtrackPosition() { + state.backtrackPosition(owner); + } + + /** + * Updates the state of this choice point to the current state of its evaluation + */ + public void updateState() { + state.updateState(owner); + } + + /** Returns the state of this choice point */ + public State getState() { return state; } + + /** The state of this choicepoint */ + public final static class State { + + private int position=0; + + private int referencedMatchCount=0; + + private int nonreferencedMatchCount=0; + + public State(Choicepoint choicepoint,RuleEvaluation evaluation) { + updateState(evaluation); + } + + public void updateState(RuleEvaluation evaluation) { + position=evaluation.currentPosition(); + referencedMatchCount=evaluation.getReferencedMatchCount(); + nonreferencedMatchCount=evaluation.getNonreferencedMatchCount(); + } + + /** Backtrack to the evaluation state at the point where this choicepoint were instantiated */ + public void backtrack(RuleEvaluation e) { + backtrackPosition(e); + + // Is this check masking errors? + if (e.referencedMatches().size()>referencedMatchCount) + e.referencedMatches().subList(referencedMatchCount, + e.referencedMatches().size()) + .clear(); + // Is this check masking errors? + if (e.nonreferencedMatches().size()>nonreferencedMatchCount) + e.nonreferencedMatches().subList(nonreferencedMatchCount, + e.nonreferencedMatches().size()) + .clear(); + } + + public void backtrackPosition(RuleEvaluation e) { + e.setPosition(position); + } + + public int getPosition() { return position; } + + public int getReferencedMatchCount() { return referencedMatchCount; } + + public int getNonreferencedMatchCount() { return nonreferencedMatchCount; } + + } + +} + diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java new file mode 100644 index 00000000000..fe3543fc655 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java @@ -0,0 +1,453 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.Query; +import com.yahoo.search.query.QueryTree; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +/** + * An evaluation of a query over a rule base. There is one evaluation for each evaluation + * of one query over one rule base. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Evaluation { + + // TODO: Retrofit query into the namespace construct + private ParameterNameSpace parameterNameSpace=null; + + private Query query; + + /** The current index into the flattened item list */ + private int currentIndex = 0; + + /** Query items flattened to a list iterator */ + private List<FlattenedItem> flattenedItems; + + /** The rule evaluation context, can be reset once the rule is evaluated */ + private RuleEvaluation ruleEvaluation; + + /** + * The amount of context information to collect about this evaluation. + * 0 means no context information, higher numbers means more context information. + */ + private int traceLevel=0; + + private String traceIndentation=""; + + /** See RuleEngine */ + private Set<Integer> matchDigests=new HashSet<>(); + + /** The previous size of this query (see RuleEngine), set on matches only */ + private int previousQuerySize=0; + + /** Should we allow stemmed matches? */ + private boolean stemming=true; + + public Evaluation(Query query) { + this(query,0); + } + + /** + * Creates a new evaluation + * + * @param query the query this evaluation is for + * @param traceLevel the amount of tracing to do + */ + public Evaluation(Query query,int traceLevel) { + this.query=query; + this.traceLevel=traceLevel; + reset(); + ruleEvaluation=new RuleEvaluation(this); + } + + /** Resets the item iterator to point to the first item */ + public void reset() { + if (flattenedItems!=null) + previousQuerySize=flattenedItems.size(); + currentIndex=0; + traceIndentation=""; + flattenedItems=new java.util.ArrayList<>(); + flatten(query.getModel().getQueryTree().getRoot(),0,flattenedItems); + } + + /** Sets the item iterator to point to the last item: */ + public void setToLast() { // PGA + if (flattenedItems!=null) + currentIndex = flattenedItems.size()-1; + else + currentIndex = -1; + } + + /** Resets the item iterator to point to the last item: */ + public void resetToLast() { // PGA + if (flattenedItems!=null) + previousQuerySize=flattenedItems.size(); + traceIndentation=""; + flattenedItems=new java.util.ArrayList<>(); + flatten(query.getModel().getQueryTree().getRoot(),0,flattenedItems); + currentIndex = flattenedItems.size()-1; + } + + public Query getQuery() { return query; } + + /** Set to true to enable stemmed matches. True by default */ + public void setStemming(boolean stemming) { this.stemming=stemming; } + + /** Returns whether stemmed matches are allowed. True by default */ + public boolean getStemming() { return stemming; } + + void addMatchDigest(int digest) { matchDigests.add(new Integer(digest)); } + + boolean hasMatchDigest(int matchDigest) { return matchDigests.contains(new Integer(matchDigest)); } + + int getPreviousQuerySize() { return previousQuerySize; } + + public int getQuerySize() { return flattenedItems.size(); } + + /** Advances to the next item as current item */ + public void next() { + currentIndex++; + } + + public void previous() {//PGA + currentIndex--; + } + + + /** Returns the current item, or null if there is no more elements */ + public FlattenedItem currentItem() { + if ( (currentIndex>=flattenedItems.size()) || (currentIndex<0)) return null; //PGA + return flattenedItems.get(currentIndex); + } + + /** Returns a fresh rule evaluation starting at the current position of this */ + public RuleEvaluation freshRuleEvaluation() { + ruleEvaluation.initialize(flattenedItems,currentIndex); + return ruleEvaluation; + } + + /** Adds an item to the query being evaluated in a way consistent with the query type */ + // TODO: Add this functionality to Query? + public void addItem(Item item, TermType termType) { + Item root= query.getModel().getQueryTree().getRoot(); + if (root==null) + query.getModel().getQueryTree().setRoot(item); + else + query.getModel().getQueryTree().setRoot(combineItems(root,item,termType)); + } + + /** Removes this item */ + public void removeItem(Item item) { + item.getParent().removeItem(item); + } + + /** + * Removes this item by identity to ensure we remove the right one if there are multiple + * equal items + */ + public void removeItemByIdentity(Item item) { + int position=findIndexByIdentity(item); + if (position>=0) + item.getParent().removeItem(position); + else + item.getParent().removeItem(item); // Fallback to removeField by equal() + } + + private int findIndexByIdentity(Item item) { + int position=0; + for (Iterator<Item> i=item.getParent().getItemIterator(); i.hasNext(); ) { + Item child=i.next(); + if (item==child) { + return position; + } + position++; + } + return -1; + } + + /** Removes an item, prefers the one at/close to the given position if there are multiple ones */ + public void removeItem(int position,Item item) { + Item removeCandidate=item.getParent().getItem(position); + if (removeCandidate.equals(item)) // Remove based on position + item.getParent().removeItem(position); + else + item.getParent().removeItem(item); // Otherwise, just removeField any such item + } + + /** + * Convert segment items into their mutable counterpart, do not update query tree. + * Non-segment items are returned directly. + * + * @return a mutable CompositeItem instance + */ + private CompositeItem convertSegmentItem(CompositeItem item) { + if (!(item instanceof SegmentItem)) { + return item; + } + CompositeItem converted = null; + if (item instanceof AndSegmentItem) { + converted = new AndItem(); + } else if (item instanceof PhraseSegmentItem) { + PhraseItem p = new PhraseItem(); + PhraseSegmentItem old = (PhraseSegmentItem) item; + p.setIndexName(old.getIndexName()); + converted = p; + } else { + // TODO: Do something else than nothing for unknowns? + return item; + } + for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { + converted.addItem(i.next()); + } + return converted; + } + + + private void insertMutableInTree(CompositeItem mutable, CompositeItem original, CompositeItem parent) { + if (parent == null) { + query.getModel().getQueryTree().setRoot(mutable); + + } else { + int parentsIndex = parent.getItemIndex(original); + parent.setItem(parentsIndex, mutable); + } + } + + /** + * Convert The parent of this item into a mutable item. Note, this + * may change the shape of the query tree. (E.g. if the original parent is a + * segment phrase, and the original parent's parent is a phrase, the terms + * from the parent will be moved to the parent's parent.) + * + * @param item The item for which the parent shall be made mutable + */ + public void makeParentMutable(TermItem item) { + CompositeItem parent = item.getParent(); + CompositeItem mutable = convertSegmentItem(parent); + if (parent != mutable) { + CompositeItem parentsParent = parent.getParent(); + insertMutableInTree(mutable, parent, parentsParent); + } + } + + /** + * Inserts an item to the query being evaluated in a way consistent with the query type + * + * @param item the item to insert + * @param parent the parent of this item, or null to set the root + * @param index the index at which to insert this into the parent + * @param desiredParentType the desired type of the composite which contains item when this returns + */ + public void insertItem(Item item, CompositeItem parent, int index, TermType desiredParentType) { + if (parent==null) { // TODO: Accommodate for termtype in this case too + query.getModel().getQueryTree().setRoot(item); + + return; + } + + if (parent.getItemCount()>0 && parent instanceof QueryTree && parent.getItem(0) instanceof CompositeItem) { + // combine with the existing root instead + parent=(CompositeItem)parent.getItem(0); + if (index==1) { // that means adding it after the existing root + index=parent.getItemCount(); + } + } + + if (( desiredParentType==TermType.DEFAULT || desiredParentType.hasItemClass(parent.getClass()) ) + && equalIndexNameIfParentIsPhrase(item,parent)) { + addItem(parent,index,item,desiredParentType); + } + else { + insertIncompatibleItem(item,parent,query,desiredParentType); + } + } + + private void addItem(CompositeItem parent,int index,Item item,TermType desiredParentType) { + if (parent instanceof NotItem) { + if (index==0 && parent.getItem(0)==null) { // Case 1: The current positive is null and we are adding a positive + parent.setItem(0,item); + } + else if (index<=1 && !(parent.getItem(0) instanceof CompositeItem)) { // Case 2: The positive must become a composite + CompositeItem positiveComposite=(CompositeItem)desiredParentType.createItemClass(); + positiveComposite.addItem(parent.getItem(0)); + positiveComposite.addItem(index,item); + parent.setItem(0,positiveComposite); + } + else if (parent.getItem(0)!=null && parent.getItem(0) instanceof CompositeItem // Case 3: Add to the positive composite + && index<=((CompositeItem)parent.getItem(0)).getItemCount()) { + ((CompositeItem)parent.getItem(0)).addItem(index,item); + } + else { // Case 4: Add negative + parent.addItem(index,item); + } + } + else if (parent.getItemCount()>0 && parent instanceof QueryTree) { + CompositeItem composite=(CompositeItem)desiredParentType.createItemClass(); + composite.addItem(parent.getItem(0)); + composite.addItem(index,item); + parent.setItem(0,composite); + } + else { + parent.addItem(index,item); + } + } + + /** A special purpose check used to simplify the above */ + private boolean equalIndexNameIfParentIsPhrase(Item item,CompositeItem parent) { + if ( ! (parent instanceof PhraseItem)) return true; + if ( ! (item instanceof IndexedItem)) return true; + + return ((PhraseItem)parent).getIndexName().equals(((IndexedItem)item).getIndexName()); + } + + private void insertIncompatibleItem(Item item,CompositeItem parent,Query query,TermType desiredParentType) { + // Create new parent + CompositeItem newParent; + if (desiredParentType==TermType.DEFAULT) + newParent=new AndItem(); + else + newParent=(CompositeItem)desiredParentType.createItemClass(); + + // Save previous parent parent + CompositeItem parentsParent=parent.getParent(); + + // Add items to new parent + newParent.addItem(parent); + newParent.addItem(item); + + // Insert new parent as root or child of old parents parent + if (parentsParent==null) { + query.getModel().getQueryTree().setRoot(newParent); + + } + else { + int parentIndex=0; + if (parentsParent!=null) { + parentIndex=parentsParent.getItemIndex(parent); + } + parentsParent.setItem(parentIndex,newParent); + } + } + + private Item combineItems(Item first,Item second,TermType termType) { + if (first instanceof NullItem) { + return second; + } else if (first instanceof NotItem) { + NotItem notItem=(NotItem)first; + if (termType==TermType.NOT) { + notItem.addNegativeItem(second); + } + else { + Item newPositive=combineItems(notItem.getPositiveItem(),second,termType); + notItem.setPositiveItem(newPositive); + } + return notItem; + } + else if (first instanceof CompositeItem) { + CompositeItem composite=(CompositeItem)first; + CompositeItem combined=createType(termType); + if (combined.getClass().equals(composite.getClass())) { + composite.addItem(second); + return composite; + } + else { + combined.addItem(first); + combined.addItem(second); // Also works for nots + return combined; + } + } + else if (first instanceof TermItem) { + CompositeItem combined=createType(termType); + combined.addItem(first); + combined.addItem(second); + return combined; + } + else { + throw new RuntimeException("Don't know how to add an item to type " + first.getClass()); + } + } + + private CompositeItem createType(TermType termType) { + if (termType==TermType.DEFAULT) { + if (query.getModel().getType().equals(Query.Type.ANY)) + return new OrItem(); + else + return new AndItem(); + } + else if (termType==TermType.AND) { + return new AndItem(); + } + else if (termType==TermType.OR) { + return new OrItem(); + } + else if (termType==TermType.RANK) { + return new RankItem(); + } + else if (termType==TermType.NOT) { + return new NotItem(); + } + throw new IllegalArgumentException("Programing error, this method should be updated with add in RankType"); + } + + private void flatten(Item item,int position,List<FlattenedItem> toList) { + if (item==null) return; + if (item.isFilter()) return; + + if (item instanceof TermItem) { // make eligible for matching + toList.add(new FlattenedItem((TermItem)item,position)); + return; + } + + if (item instanceof CompositeItem) { // make children eligible for matching + CompositeItem composite=(CompositeItem)item; + int childPosition=0; + for (Iterator<?> i=composite.getItemIterator(); i.hasNext(); ) { + flatten((Item)i.next(),childPosition++,toList); + } + } + + // other terms are unmatchable + } + + public void trace(int level,String message) { + if (level>getTraceLevel()) return; + query.trace(traceIndentation + message,false,1); + } + + /** + * The amount of context information to collect about this evaluation. + * 0 (the default) means no context information, higher numbers means + * more context information. + */ + public int getTraceLevel() { return traceLevel; } + + public void indentTrace() { + traceIndentation=traceIndentation + " "; + } + + public void unindentTrace() { + if (traceIndentation.length()<3) + traceIndentation=""; + else + traceIndentation=traceIndentation.substring(3); + } + + public NameSpace getNameSpace(String nameSpaceName) { + if (nameSpaceName.equals("parameter")) { + if (parameterNameSpace==null) + parameterNameSpace=new ParameterNameSpace(); + return parameterNameSpace; + } + + // That's all for now + throw new RuntimeException("Unknown namespace '" + nameSpaceName + "'"); + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java new file mode 100644 index 00000000000..00a66206b46 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +/** + * Thrown on semantic exceptions on evaluation over a rule base + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +@SuppressWarnings("serial") +public class EvaluationException extends RuntimeException { + + public EvaluationException(String message) { + super(message); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java new file mode 100644 index 00000000000..1631d60df6b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.TermItem; + +/** + * An item which knows its position in its parent + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class FlattenedItem { + + private TermItem item; + + /** The position of this item in its parent */ + private int position; + + public FlattenedItem(TermItem item,int position) { + this.item=item; + this.position=position; + } + + public TermItem getItem() { return item; } + + public int getPosition() { return position; } + + public String toString() { + return position + ":" + item; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java new file mode 100644 index 00000000000..fc7aec62412 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.query.WordItem; + +/** + * A match + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class Match { + + /** The start position of this match */ + private int position; + + private TermItem item; + + /** The string to replace the match by, usually item.getIndexedString() */ + private String replaceValue; + + /** The parent of the matched item */ + private CompositeItem parent=null; + + /** + * Creates a match + * + * @param item the match to add + * @param replaceValue the string to replace this match by, usually the item.getIndexedString() + * which is what the replace value will be if it is passed as null here + */ + public Match(FlattenedItem item,String replaceValue) { + this.item=item.getItem(); + if (replaceValue==null) + this.replaceValue=item.getItem().getIndexedString(); + else + this.replaceValue=replaceValue; + this.parent=this.item.getParent(); + this.position=item.getPosition(); + } + + public int getPosition() { return position; } + + public TermItem getItem() { return item; } + + public String getReplaceValue() { + return replaceValue; + } + + /** + * Returns the parent in which the item was matched, or null if the item was root. + * Note that the item may subsequently have been removed, so it does not necessarily + * have this parent + */ + public CompositeItem getParent() { return parent; } + + public int hashCode() { + return + 17*item.getIndexedString().hashCode()+ + 33*item.getIndexName().hashCode(); + } + + /** Returns a new item representing this match */ + public Item toItem(String label) { + return new WordItem(getReplaceValue(),label); + } + + public boolean equals(Object o) { + if (! (o instanceof Match)) return false; + + Match other=(Match)o; + if (other.position!=position) return false; + if (!other.item.equals(item)) return false; + + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java new file mode 100644 index 00000000000..76eea63bd68 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +/** + * A collection of facts (addressed by namespace.fact in conditions) + * over which we may write conditions + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class NameSpace { + + public abstract boolean matches(String term,RuleEvaluation e); + + // TODO: public abstract void produce(RuleEvaluation e); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java new file mode 100644 index 00000000000..35427250511 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.search.Query; + +/** + * A name space representing the (http) parameters following this query + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class ParameterNameSpace extends NameSpace { + + public boolean matches(String term,RuleEvaluation e) { + Query query=e.getEvaluation().getQuery(); + String value=query.properties().getString(term); + if (value==null) return false; + e.setValue(value); + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java new file mode 100644 index 00000000000..cb7d2af8d19 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; + +/** + * The Matches referenced by a particular context name in a rule evaluation + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReferencedMatches { + + private String contextName; + + private List<Match> matches=new java.util.ArrayList<>(1); + + public ReferencedMatches(String contextName) { + this.contextName=contextName; + } + + public void addMatch(Match match) { + matches.add(match); + } + + public String getContextName() { return contextName; } + + public Iterator<Match> matchIterator() { + return matches.iterator(); + } + + /** + * Returns the item to insert from these referenced matches, or null if none + * + * @param label the label of the matches + */ + public Item toItem(String label) { + if (matches.size()==0) return null; + if (matches.size()==1) return matches.get(0).toItem(label); + + PhraseItem phrase=new PhraseItem(); // TODO: Somehow allow AND items instead here + phrase.setIndexName(label); + for (Iterator<Match> i=matches.iterator(); i.hasNext(); ) { + phrase.addItem(i.next().toItem(label)); + } + return phrase; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java new file mode 100644 index 00000000000..ee874b76ed6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.search.Query; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.rule.ProductionRule; + +import java.util.ListIterator; + +/** + * Evaluates the rules of a rule base. This method is thread safe on analyze calls, but + * not on modification calls. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleEngine { + + private RuleBase rules; + + public RuleEngine(RuleBase rules) { + this.rules=rules; + } + + /** + * Evaluates a rule base over a query + * + * @param query the query to evaluate + * @param traceLevel the level of tracing to do + * @return the error caused by analyzing the query, or null if there was no error + * If there is an error, this query is destroyed (unusable) + */ + public String evaluate(Query query,int traceLevel) { + // TODO: This is O(query size*rule base size). We'll eventually need to create indices + // on rules to look up rule candidates per term to make it O(query size) instead + // Probably create indices on the first term like Prolog implementations use to + + boolean matchedAnything=false; + Evaluation evaluation=new Evaluation(query,traceLevel); + evaluation.setStemming(rules.getStemming()); + evaluation.trace(2,"Evaluating query '" + evaluation.getQuery().getModel().getQueryTree().getRoot() + "':"); + for (ListIterator<ProductionRule> i=rules.ruleIterator(); i.hasNext(); ) { + evaluation.reset(); + ProductionRule rule=i.next(); + boolean matched=matchRuleAtAllStartPoints(evaluation,rule); + matchedAnything|=matched; + } + + if (!matchedAnything) return null; + + String error=QueryCanonicalizer.canonicalize(query); + + if (query.getTraceLevel()>=1) + query.trace("SemanticSearcher: Rewrote query",true,1); + + return error; + } + + /** Match a rule at any starting point in the query */ + private boolean matchRuleAtAllStartPoints(Evaluation evaluation, ProductionRule rule) { + boolean matchedAtLeastOnce=false; + int iterationCount=0; + + /** + * Test if it is a removal rule, if so iterate backwards so that precalculated + * replacement positions does not become invalid as the query shrink + */ + boolean removalRule = false; + if ( (rule instanceof com.yahoo.prelude.semantics.rule.ReplacingProductionRule) && + (rule.getProduction().toString().length() == 0) ) { // empty replacement + removalRule = true; + evaluation.setToLast(); + } + + int loopLimit=Math.max(15,evaluation.getQuerySize()*3); + + while (evaluation.currentItem() != null) { + boolean matched=matchRule(evaluation,rule); + if (matched) { + if (removalRule) + evaluation.resetToLast(); + else + evaluation.reset(); + matchedAtLeastOnce = true; + if (rule.isLoop()) break; + } + else { + if (removalRule) + evaluation.previous(); + else + evaluation.next(); + } + + if (matched && iterationCount++ > loopLimit) { + throw new RuleBaseException("Rule '" + rule + "' has matched '" + + evaluation.getQuery().getModel().getQueryTree().getRoot() + + "' " + loopLimit + " times, aborting"); + } + } + + return matchedAtLeastOnce; + } + + /** + * Matches a rule at the current starting point of the evaluation, and carries + * out the production if there is a match + * + * @return whether this rule matched + */ + // TODO: Code cleanup + private boolean matchRule(Evaluation evaluation, ProductionRule rule) { + RuleEvaluation ruleEvaluation=evaluation.freshRuleEvaluation(); + + ruleEvaluation.indentTrace(); + if (ruleEvaluation.getTraceLevel()>=3) { + ruleEvaluation.trace(3,"Evaluating rule '" + rule + + "' on '" + ruleEvaluation.getEvaluation().getQuery().getModel().getQueryTree().getRoot() + + "' at '" + ruleEvaluation.currentItem() + "':"); + } + + ruleEvaluation.indentTrace(); + + boolean matches=rule.matches(ruleEvaluation); + + boolean matchedBefore=false; + int currentMatchDigest=ruleEvaluation.calculateMatchDigest(rule); + if (evaluation.hasMatchDigest(currentMatchDigest)) + matchedBefore=true; + + boolean queryGotShorter=false; + if (evaluation.getPreviousQuerySize()>evaluation.getQuerySize()) + queryGotShorter=true; + + boolean doProduction=!matchedBefore || queryGotShorter; + + ruleEvaluation.unindentTrace(); + + if (ruleEvaluation.getTraceLevel()>=2) { + if (matches && doProduction) + ruleEvaluation.trace(2,"Matched rule '" + rule + "' at " + ruleEvaluation.previousItem()); + else if (!matches) + ruleEvaluation.trace(2,"Did not match rule '" + rule + "' at " + ruleEvaluation.currentItem()); + else if (!doProduction) + ruleEvaluation.trace(2,"Ignoring repeated match of '" + rule + "'"); + } + + ruleEvaluation.unindentTrace(); + + if (!matches || !doProduction) return false; + + // Do production barrier + + evaluation.addMatchDigest(currentMatchDigest); + String preQuery=null; + if (evaluation.getTraceLevel()>=1) { + preQuery= evaluation.getQuery().getModel().getQueryTree().getRoot().toString(); + } + rule.produce(ruleEvaluation); + if (evaluation.getTraceLevel()>=1) { + evaluation.trace(1,"Transforming '" + preQuery + "' to '" + + evaluation.getQuery().getModel().getQueryTree().getRoot().toString() + + "' since '" + rule + "' matched"); + } + + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java new file mode 100644 index 00000000000..a6b90f98879 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java @@ -0,0 +1,346 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.rule.Condition; +import com.yahoo.prelude.semantics.rule.ProductionRule; + +import java.util.*; + +/** + * A particular evalutation of a particular rule. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleEvaluation { + + // TODO: Create a query builder (or something) though which all query manipulation + // here and in Evaluation is done. This class must also hold all the matches + // and probably be able to update the match positions to keep them in sync with changes + // to the query + + // Remember that whenever state is added to this class, you + // must consider whether/how to make that state backtrackable + // by savinginformation in choicepoint.state + + /** The items to match in this evaluation */ + private List<FlattenedItem> items; + + /** The current position into the list of items */ + private int position; + + /** The start position into the item list */ + private int startPosition; + + /** The references to matched contexts to be made in this evaluation */ + private Set<String> matchReferences; + + /** The current context of this evaluation, or null we're currently not in an interesting context */ + private String currentContext; + + /** A list of referencedMatches */ + private List<ReferencedMatches> referencedMatchesList =new java.util.ArrayList<>(); + + private List<Match> nonreferencedMatches=new java.util.ArrayList<>(); + + /** The evaluation owning this */ + private Evaluation evaluation; + + /** The choice points saved in this evaluation */ + private Stack<Choicepoint> choicepoints=null; + + /* The last value returned by a condition evaluated in this, may be null */ + private Object value=null; + + /** True when we are evaluating inside a condition which inverts the truth value */ + private boolean inNegation=false; + + /** + * A label we should use to match candidate terms for. + * Used to propagate a label from e.g. reference conditions to named conditions + */ + private String currentLabel=null; + + public RuleEvaluation(Evaluation owner) { + this.evaluation=owner; + } + + public void initialize(List<FlattenedItem> list,int startPosition) { + this.startPosition=startPosition; + items=list; + reinitialize(); + } + + void reinitialize() { + position=startPosition; + currentContext=null; + referencedMatchesList.clear(); + nonreferencedMatches.clear(); + if (choicepoints!=null) + choicepoints.clear(); + } + + public void setMatchReferences(Set<String> matchReferences) { this.matchReferences=matchReferences; } + + /** + * <p>Calculates an id which is unique for each match (the totality of the matched terms) + * to a high probability. Why can we not simply look at the position + * of terms? Because rules are allowed to modify the query tree in ways that makes positions + * change.</p> + * + * <p>This digest is also problematic, because it's really the matching condition who should + * calculate a match digest for that term which incorporates the semantics of that kind + * of match (maybe not the word and index, but something else). This is a todo for when + * we add other kinds of conditions.</p> + */ + int calculateMatchDigest(ProductionRule rule) { + int matchDigest=rule.hashCode(); + int matchCounter=1; + for (Iterator<ReferencedMatches> i=referencedMatchesList.iterator(); i.hasNext(); ) { + ReferencedMatches matches=i.next(); + int termCounter=0; + for (Iterator<Match> j=matches.matchIterator(); j.hasNext(); ) { + Match match=j.next(); + matchDigest=7*matchDigest*matchCounter+ + 71*termCounter+ + match.hashCode(); + termCounter++; + } + matchCounter++; + } + for (Iterator<Match> i=nonreferencedMatches.iterator(); i.hasNext(); ) { + Match match=i.next(); + matchDigest=7*matchDigest*matchCounter+match.hashCode(); + matchCounter++; + } + return matchDigest; + } + + /** + * Returns the current term item to look at, + * or null if there are no more elements + */ + public FlattenedItem currentItem() { + if (position>=items.size()) return null; + return items.get(position); + } + + public FlattenedItem previousItem() { + if (position-1<0) return null; + return items.get(position-1); + } + + /** Returns the position of the current item */ + public int currentPosition() { + return position; + } + + /** Sets the current position */ + public void setPosition(int position) { + this.position=position; + } + + /** Returns the total number of items to match in this evaluation */ + public int itemCount() { + return items.size() - startPosition; + } + + /** Returns the last value returned by a condition in this evaluation, or null */ + public Object getValue() { return value; } + + /** Sets the last value returned by a condition in this evaluatiino, or null */ + public void setValue(Object value) { this.value=value; } + + /** Returns whether we are evaluating inside a condition which inverts the truth value */ + public boolean isInNegation() { return inNegation; } + + /** sets whether we are evaluating inside a condition which inverts the truth value */ + public void setInNegation(boolean inNegation) { this.inNegation=inNegation; } + + /** Returns the current position into the terms this evaluates over */ + public int getPosition() { return position; } + + /** Sets a new current label and returns the previous one */ + public String setCurrentLabel(String currentLabel) { + String oldLabel=currentLabel; + this.currentLabel=currentLabel; + return oldLabel; + } + + public String getCurrentLabel() { return currentLabel; } + + /** + * Advances currentItem to the next term item and returns thatItem. + * If the current item before this call is the last item, this will + * return (and set currentItem to) null. + */ + public FlattenedItem next() { + position++; + + if (position>=items.size()) { + position=items.size(); + return null; + } + + return items.get(position); + } + + // TODO: Simplistic yet. Nedd to support context nesting etc. + public void entering(String context) { + if (context==null) return; + if (matchReferences!=null && matchReferences.contains(context)) + currentContext=context; + + } + + public void leaving(String context) { + if (context==null) return; + if (currentContext==null) return; + if (currentContext.equals(context)) + currentContext=null; + } + + /** + * Adds a match + * + * @param item the match to add + * @param replaceString the string to replace this match by, usually the item.getIndexedValue() + */ + public void addMatch(FlattenedItem item,String replaceString) { + evaluation.makeParentMutable(item.getItem()); + Match match=new Match(item,replaceString); + if (currentContext!=null) { + ReferencedMatches matches=getReferencedMatches(currentContext); + if (matches==null) { + matches=new ReferencedMatches(currentContext); + referencedMatchesList.add(matches); + } + matches.addMatch(match); + } + else { + nonreferencedMatches.add(match); + } + } + + /** Returns the referenced matches for a context name, or null if none */ + public ReferencedMatches getReferencedMatches(String name) { + for (Iterator<ReferencedMatches> i=referencedMatchesList.iterator(); i.hasNext(); ) { + ReferencedMatches matches=i.next(); + if (name.equals(matches.getContextName())) + return matches; + } + return null; + } + + public int getReferencedMatchCount() { return referencedMatchesList.size(); } + + public int getNonreferencedMatchCount() { return nonreferencedMatches.size(); } + + /** Returns the evaluation this belongs to */ + public Evaluation getEvaluation() { return evaluation; } + + /** Adds an item to the query being evaluated in a way consistent with the query type */ + public void addItem(Item item, TermType termType) { + evaluation.addItem(item,termType); + } + + public void removeItem(Item item) { + evaluation.removeItem(item); + } + + public void removeItemByIdentity(Item item) { + evaluation.removeItemByIdentity(item); + } + + /** Removes an item, prefers the one at/close to the given position if there are multiple ones */ + public void removeItem(int position,Item item) { + evaluation.removeItem(position,item); + } + + + /** + * Inserts an item to the query being evaluated in a way consistent with the query type + * + * @param item the item to insert + * @param parent the parent of this item, or null to set the root + * @param index the index at which to insert this into the parent + * @param termType the kind of item to index, this decides the resulting structure + */ + public void insertItem(Item item, CompositeItem parent, int index, TermType termType) { + evaluation.insertItem(item,parent,index,termType); + } + + /** Returns a read-only view of the items of this */ + public List<FlattenedItem> items() { + return Collections.unmodifiableList(items); + } + + public Match getNonreferencedMatch(int index) { + return nonreferencedMatches.get(index); + } + + public void trace(int level,String string) { + evaluation.trace(level,string); + } + + public int getTraceLevel() { + return evaluation.getTraceLevel(); + } + + public void indentTrace() { + evaluation.indentTrace(); + } + + public void unindentTrace() { + evaluation.unindentTrace(); + } + + /** + * Add a choice point to this evaluation + * + * @param condition the creating condition + * @param create true to create this choicepoint if it is missing + * @return the choicepoint, or null if not present, and create is false + */ + public Choicepoint getChoicepoint(Condition condition,boolean create) { + if (choicepoints==null) { + if (!create) return null; + choicepoints=new java.util.Stack<>(); + } + Choicepoint choicepoint=lookupChoicepoint(condition); + if (choicepoint==null) { + if (!create) return null; + choicepoint=new Choicepoint(this,condition); + choicepoints.push(choicepoint); + } + return choicepoint; + } + + private Choicepoint lookupChoicepoint(Condition condition) { + for (Iterator<Choicepoint> i=choicepoints.iterator(); i.hasNext(); ) { + Choicepoint choicepoint=i.next(); + if (condition==choicepoint.getCondition()) + return choicepoint; + } + return null; + } + + List<ReferencedMatches> referencedMatches() { + return referencedMatchesList; + } + + List<Match> nonreferencedMatches() { + return nonreferencedMatches; + } + + /** Remove all the terms recognized by this match */ + public void removeMatches(ReferencedMatches matches) { + for (Iterator<Match> i=matches.matchIterator(); i.hasNext(); ) { + Match match=i.next(); + removeItemByIdentity(match.getItem()); + } + } + +} |