diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/semantics')
45 files changed, 4401 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java new file mode 100644 index 00000000000..d3f51e76712 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java @@ -0,0 +1,432 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +import com.yahoo.search.Query; +import com.yahoo.prelude.querytransform.PhraseMatcher; +import com.yahoo.prelude.semantics.engine.RuleEngine; +import com.yahoo.prelude.semantics.parser.ParseException; +import com.yahoo.prelude.semantics.rule.*; +import com.yahoo.protect.Validator; + +import java.io.File; +import java.util.*; + +/** + * A set of semantic production rules and named conditions used to analyze + * and rewrite queries + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleBase { + + /** The globally identifying name of this rule base */ + private String name; + + /** The name of the source of this rules */ + private String source; + + /** The name of the automata file used, or null if none */ + protected String automataFileName=null; + + /** + * True if this rule base is default. + * The semantics of default is left to the surrounding framework + */ + private boolean isDefault=false; + + private List<ProductionRule> productionRules=new java.util.ArrayList<>(); + + private Map<String, NamedCondition> namedConditions=new java.util.LinkedHashMap<>(); + + /** The analyzer used to do evaluations over this rule base */ + private RuleEngine analyzer=new RuleEngine(this); + + private static final PhraseMatcher nullPhraseMatcher=PhraseMatcher.getNullMatcher(); + + /** + * The matcher using an automata to match terms and phrases prior to matching rules + * or the null matcher if no matcher is used. + */ + private PhraseMatcher phraseMatcher=nullPhraseMatcher; + + /** + * The names of the rule bases included indirectly or directly in this + * Ordered by first to last included + */ + private Set<String> includedNames=new java.util.LinkedHashSet<>(); + + /** + * True if this uses an automata, even if an automata is not present right now. Useful to validate without + * having automatas available + */ + private boolean usesAutomata=false; + + /** Should we allow stemmed matches? */ + private boolean stemming=true; + + /** Creates an empty rule base. TODO: Disallow */ + public RuleBase() { + } + + /** Creates an empty rule base */ + public RuleBase(String name) { + setName(name); + } + + /** + * Creates a rule base from a file + * + * @param ruleFile the rule file to read. The name of the file (minus path) becomes the rule base name + * @param automataFile the automata file, or null to not use an automata + * @throws java.io.IOException if there is a problem reading one of the files + * @throws ParseException if the rule file can not be parsed correctly + * @throws RuleBaseException if the rule file contains inconsistencies + */ + public static RuleBase createFromFile(String ruleFile,String automataFile) throws java.io.IOException, ParseException { + return new RuleImporter().importFile(ruleFile,automataFile); + } + + /** + * Creates a rule base from a string + * + * @param name the name of the rule base + * @param ruleString the rule string to read + * @param automataFile the automata file, or null to not use an automata + * @throws java.io.IOException if there is a problem reading the automata file + * @throws com.yahoo.prelude.semantics.parser.ParseException if the rule file can not be parsed correctly + * @throws com.yahoo.prelude.semantics.RuleBaseException if the rule file contains inconsistencies + */ + public static RuleBase createFromString(String name,String ruleString,String automataFile) throws java.io.IOException, ParseException { + RuleBase base=new RuleImporter().importString(ruleString,automataFile,new RuleBase()); + base.setName(name); + return base; + } + + /** Set to true to enable stemmed matches. True by default */ + public void setStemming(boolean stemming) { this.stemming=stemming; } + + /** Returns whether stemmed matches are allowed. True by default */ + public boolean getStemming() { return stemming; } + + /** + * <p>Include another rule base into this. This <b>transfers ownership</b> + * of the given rule base - it can not be subsequently used for any purpose + * (including accessing).</p> + * + * <p>Each rule base will only be included by the first include directive enountered + * for that rule base.</p> + */ + public void include(RuleBase include) { + productionRules.add(new IncludeDirective(include)); + includedNames.addAll(include.includedNames); + includedNames.add(include.getName()); + } + + /** Rules are order based - they are included recursively depth first */ + private void inlineIncluded() { + // Re-add our own conditions last to - added later overrides + Map<String, NamedCondition> thisConditions=namedConditions; + namedConditions=new LinkedHashMap<>(); + + Set<RuleBase> included=new HashSet<>(); + included.add(this); + for (ListIterator<ProductionRule> i=productionRules.listIterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + if ( ! (rule instanceof IncludeDirective) ) continue; + + i.remove(); + RuleBase toInclude=((IncludeDirective)rule).getIncludedBase(); + if ( ! included.contains(toInclude)) + toInclude.inlineIn(this,i,included); + } + + namedConditions.putAll(thisConditions); + } + + /** + * Recursively include this and everything it includes into the given rule base. + * Skips bases already included in this. + */ + private void inlineIn(RuleBase receiver,ListIterator<ProductionRule> receiverRules,Set<RuleBase> included) { + if (included.contains(this)) return; + included.add(this); + + for (Iterator<ProductionRule> i=productionRules.iterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + if (rule instanceof IncludeDirective) + ((IncludeDirective)rule).getIncludedBase().inlineIn(receiver,receiverRules,included); + else + receiverRules.add(rule); + } + + receiver.namedConditions.putAll(namedConditions); + } + + /** Adds a named condition which can be referenced by rules */ + public void addCondition(NamedCondition namedCondition) { + namedConditions.put(namedCondition.getName(),namedCondition); + + Condition condition=namedCondition.getCondition(); + Condition superCondition=findIncludedCondition(namedCondition.getName()); + resolveSuper(condition,superCondition); + } + + private void resolveSuper(Condition condition,Condition superCondition) { + if (condition instanceof SuperCondition) { + ((SuperCondition)condition).setCondition(superCondition); + } + else if (condition instanceof CompositeCondition) { + for (Iterator<Condition> i=((CompositeCondition)condition).conditionIterator(); i.hasNext(); ) { + Condition subCondition=i.next(); + resolveSuper(subCondition,superCondition); + } + } + } + + private Condition findIncludedCondition(String name) { + for (Iterator<ProductionRule> i=productionRules.iterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + if ( ! (rule instanceof IncludeDirective) ) continue; + + RuleBase included=((IncludeDirective)rule).getIncludedBase(); + NamedCondition condition=included.getCondition(name); + if (condition!=null) return condition.getCondition(); + included.findIncludedCondition(name); + // FIXME: dead code commented out + // if (condition!=null) return condition.getCondition(); + } + return null; + } + + /** + * Returns whether this rule base - directly or through other includes - includes + * the rule base with the given name + */ + public boolean includes(String ruleBaseName) { + return includedNames.contains(ruleBaseName); + } + + /** + * Sets the name of this rule base. + * If this rule base is given to a searcher, it must be removed before the name + * change, and then re-added + */ + public void setName(String name) { + Validator.ensureNotNull("Rule base name",name); + this.name=name; + } + + /** Returns the name of this rule base. This is never null. */ + public String getName() { return name; } + + /** + * Sets the name of the automata file to use as a source of condition matches. + * To reload the automata, call this again. This can be done safely at any + * point by any thread while this rule base is in use. + * + * @throws IllegalArgumentException if the file is not found + */ + public void setAutomataFile(String automataFile) { + if ( ! new File(automataFile).exists()) + throw new IllegalArgumentException("Automata file '" + automataFile + "' " + + "included in " + this + " not found"); + phraseMatcher=new PhraseMatcher(automataFile); + phraseMatcher.setIgnorePluralForm(true); + phraseMatcher.setMatchAll(true); + phraseMatcher.setMatchPhraseItems(true); + phraseMatcher.setMatchSingleItems(true); + setPhraseMatcher(phraseMatcher); + this.automataFileName=automataFile; + } + + /** Returns the name of the automata file used, or null if none */ + public String getAutomataFile() { return automataFileName; } + + /** Sets whether this base is default, the semantics of default is left to the application */ + public void setDefault(boolean isDefault) { this.isDefault=isDefault; } + + /** Returns whether this base is default, the semantics of default is left to the application */ + public boolean isDefault() { return isDefault; } + + /** Thread safely sets the phrase matcher to use in this, or null to not use a phrase matcher */ + public synchronized void setPhraseMatcher(PhraseMatcher matcher) { + if (matcher==null) + this.phraseMatcher = nullPhraseMatcher; + else + this.phraseMatcher = matcher; + } + + /** Thread safely gets the phrase matcher to use in this */ + public synchronized PhraseMatcher getPhraseMatcher() { + return this.phraseMatcher; + } + + /** + * The identifying name of the source of this rule base. + * The absolute file name if this came from a file. + */ + public String getSource() { return source; } + + /** + * Sets the name of the source of this rule base. If this came from a file, + * the source must be set to the absolute file name of the rule base + */ + public void setSource(String source) { this.source = source; } + + /** Returns whether this uses a phrase matcher automata */ + public boolean usesAutomata() { + return usesAutomata || phraseMatcher!=nullPhraseMatcher; + } + + /** + * Set to truew if this uses an automata, even if an automata is not present right now. + * Useful to validate without having automatas available + */ + void setUsesAutomata(boolean usesAutomata) { this.usesAutomata=usesAutomata; } + + // Note that included rules are added though a list iterator, not this */ + public void addRule(ProductionRule productionRule) { + productionRules.add(productionRule); + } + + /** Returns a named condition, or null if no condition with that name exists */ + public NamedCondition getCondition(String name) { + return namedConditions.get(name); + } + + /** + * Call this when all rules are added, before any rule evaluation starts. + * + * @throws RuleBaseException if there is an inconsistency in the rule base. + */ + public void initialize() { + inlineIncluded(); + makeReferences(); + } + + /** + * Analyzes a query over this rule base + * + * @param query the query to analyze + * @param traceLevel the level of tracing to add to the query + * @return the error caused by analyzing the query, or null if there was no error + * If there is an error, this query is destroyed (unusable) + */ + public String analyze(Query query,int traceLevel) { + int queryTraceLevel=query.getTraceLevel(); + if (traceLevel>0 && queryTraceLevel==0) + query.setTraceLevel(1); + + matchAutomata(query,traceLevel); + String error=analyzer.evaluate(query,traceLevel); + + query.setTraceLevel(queryTraceLevel); + return error; + } + + protected void matchAutomata(Query query,int traceLevel) { + List<PhraseMatcher.Phrase> matches=getPhraseMatcher().matchPhrases(query.getModel().getQueryTree().getRoot()); + if (matches==null || matches.size()==0) return; + for (Iterator<PhraseMatcher.Phrase> i=matches.iterator(); i.hasNext(); ) { + PhraseMatcher.Phrase phrase= i.next(); + if (traceLevel>=3) + query.trace("Semantic searcher automata matched " + phrase,false,1); + + annotatePhrase(phrase,query,traceLevel); + } + } + + // Note: When changing this method, change CompatibleRuleBase as well! + // TODO: Values are not added right now + protected void annotatePhrase(PhraseMatcher.Phrase phrase,Query query,int traceLevel) { + for (StringTokenizer tokens=new StringTokenizer(phrase.getData(),"|",false) ; tokens.hasMoreTokens(); ) { + String token=tokens.nextToken(); + int semicolonIndex=token.indexOf(";"); + String annotation=token; + String value=""; + if (semicolonIndex>0) { + annotation=token.substring(0,semicolonIndex); + value=token.substring(semicolonIndex+1); + } + + // Annotate all matched items + phrase.getItem(0).addAnnotation(annotation,phrase); + if (traceLevel>=4) + query.trace(" Annotating '" + phrase + "' as " + annotation + + (value.equals("") ? "" :"=" + value),false,1); + } + } + + private void makeReferences() { + for (Iterator<ProductionRule> i=ruleIterator(); i.hasNext(); ) { + ProductionRule rule=i.next(); + rule.makeReferences(this); + } + for (Iterator<NamedCondition> i=conditionIterator(); i.hasNext(); ) { + NamedCondition namedCondition=i.next(); + namedCondition.getCondition().makeReferences(this); + } + } + + /** Returns the rules in added order */ + public ListIterator<ProductionRule> ruleIterator() { return productionRules.listIterator(); } + + /** Returns the rules unmodifiable */ + public List<ProductionRule> rules() { + return Collections.unmodifiableList(productionRules); + } + + /** Returns the named conditions in added order */ + public Iterator<NamedCondition> conditionIterator() { return namedConditions.values().iterator(); } + + /** Returns true if the given object is a rule base having the same name as this */ + public boolean equals(Object object) { + if ( ! (object instanceof RuleBase)) return false; + return ((RuleBase)object).getName().equals(this.getName()); + } + + public int hashCode() { + return getName().hashCode(); + } + + public String toString() { + return "rule base '" + getName() + "'"; + } + + /** + * Returns a string containing all the rules and conditions of this rule base + * in the form they will be evaluated, with all included rule bases inlined + */ + public String toContentString() { + StringBuilder b=new StringBuilder(); + for (Iterator<ProductionRule> i=productionRules.iterator(); i.hasNext(); ) { + b.append(i.next().toString()); + b.append("\n"); + } + b.append("\n"); + b.append("\n"); + for (Iterator<NamedCondition> i=namedConditions.values().iterator(); i.hasNext(); ) { + b.append(i.next().toString()); + b.append("\n"); + } + return b.toString(); + } + + /** A placeholder for an included rule base until it is inlined */ + private static class IncludeDirective extends ProductionRule { + + private RuleBase includedBase; + + public IncludeDirective(RuleBase ruleBase) { + this.includedBase=ruleBase; + } + + public RuleBase getIncludedBase() { return includedBase; } + + /** Not used */ + public String getSymbol() { return ""; } + + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBaseException.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBaseException.java new file mode 100644 index 00000000000..34c113ceec8 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBaseException.java @@ -0,0 +1,20 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +/** + * Thrown on rule base consistency problems + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +@SuppressWarnings("serial") +public class RuleBaseException extends RuntimeException { + + public RuleBaseException(String message) { + super(message); + } + + public RuleBaseException(String message,Exception cause) { + super(message,cause); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java new file mode 100644 index 00000000000..1dab816f22b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java @@ -0,0 +1,285 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; +import java.util.Arrays; +import java.util.List; + +import com.yahoo.io.IOUtils; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.prelude.semantics.parser.*; + +/** + * Imports rule bases from various sources. + * + * @author bratseth + */ +// Uses the JavaCC-generated parser to read rule bases. +// This is an intermediate between the parser and the rule base being loaded +// on implementation of some directives, for example, it knows where to find +// rule bases included into others, while neither the rule base or the parser knows. +public class RuleImporter { + + /** + * If this is set, imported rule bases are looked up in this config + * otherwise, they are looked up as files + */ + private SemanticRulesConfig config = null; + + /** + * Ignore requests to read automata files. + * Useful to validate rule bases without having automatas present + */ + private boolean ignoreAutomatas = false; + + /** + * Ignore requests to include files. + * Useful to validate rule bases one by one in config + */ + private boolean ignoreIncludes = false; + + /** Create a rule importer which will read from file */ + public RuleImporter() { + this(null, false); + } + + /** Create a rule importer which will read from a config object */ + public RuleImporter(SemanticRulesConfig config) { + this(config, false); + } + + public RuleImporter(boolean ignoreAutomatas) { + this(null, ignoreAutomatas); + } + + public RuleImporter(boolean ignoreAutomatas, boolean ignoreIncludes) { + this(null, ignoreAutomatas, ignoreIncludes); + } + + public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas) { + this.config=config; + this.ignoreAutomatas=ignoreAutomatas; + } + + public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas, boolean ignoreIncludes) { + this.config = config; + this.ignoreAutomatas = ignoreAutomatas; + this.ignoreIncludes = ignoreIncludes; + } + + /** + * Imports semantic rules from a file + * + * @param fileName the rule file to use + * @throws java.io.IOException if the file can not be read for some reason + * @throws ParseException if the file does not contain a valid semantic rule set + */ + public RuleBase importFile(String fileName) throws IOException, ParseException { + return importFile(fileName,null); + } + + /** + * Imports semantic rules from a file + * + * @param fileName the rule file to use + * @param automataFile the automata file to use, or null to not use any + * @throws java.io.IOException if the file can not be read for some reason + * @throws ParseException if the file does not contain a valid semantic rule set + */ + public RuleBase importFile(String fileName,String automataFile) throws IOException, ParseException { + return importFile(fileName,automataFile,null); + } + + /** + * Imports semantic rules from a file + * + * @param fileName the rule file to use + * @param automataFile the automata file to use, or null to not use any + * @param ruleBase an existing rule base to import these rules into, or null + * to create a new + * @throws java.io.IOException if the file can not be read for some reason + * @throws ParseException if the file does not contain a valid semantic rule set + */ + public RuleBase importFile(String fileName,String automataFile,RuleBase ruleBase) throws IOException, ParseException { + ruleBase=privateImportFile(fileName,automataFile,ruleBase); + ruleBase.initialize(); + return ruleBase; + } + + public RuleBase privateImportFile(String fileName,String automataFile,RuleBase ruleBase) throws IOException, ParseException { + BufferedReader reader=null; + try { + reader= IOUtils.createReader(fileName, "utf-8"); + File file=new File(fileName); + String absoluteFileName=file.getAbsolutePath(); + if (ruleBase==null) + ruleBase=new RuleBase(); + ruleBase.setName(stripLastName(file.getName())); + privateImportFromReader(reader,absoluteFileName,automataFile,ruleBase); + return ruleBase; + } + finally { + IOUtils.closeReader(reader); + } + } + + /** Imports all the rule files (files ending by "sr") in the given directory */ + public List<RuleBase> importDir(String ruleBaseDir) throws IOException, ParseException { + File ruleBaseDirFile=new File(ruleBaseDir); + if (!ruleBaseDirFile.exists()) + throw new IOException("Rule base dir '" + ruleBaseDirFile.getAbsolutePath() + "' does not exist"); + File[] files=ruleBaseDirFile.listFiles(); + Arrays.sort(files); + List<RuleBase> ruleBases=new java.util.ArrayList<>(); + for (File file : files) { + if (!file.getName().endsWith(".sr")) continue; + RuleBase base = importFile(file.getAbsolutePath()); + ruleBases.add(base); + } + return ruleBases; + } + + /** Read and include a rule base in another */ + public void include(String ruleBaseName,RuleBase ruleBase) throws java.io.IOException, ParseException { + if (ignoreIncludes) return; + RuleBase include; + if (config==null) { + include=privateImportFromDirectory(ruleBaseName,ruleBase); + } + else { + include=privateImportFromConfig(ruleBaseName); + } + ruleBase.include(include); + } + + /** Returns an unitialized rule base */ + private RuleBase privateImportFromDirectory(String ruleBaseName,RuleBase ruleBase) throws IOException, ParseException { + RuleBase include = new RuleBase(); + String includeDir=new File(ruleBase.getSource()).getParentFile().getAbsolutePath(); + if (!ruleBaseName.endsWith(".sr")) + ruleBaseName=ruleBaseName + ".sr"; + File importFile=new File(includeDir,ruleBaseName); + if (!importFile.exists()) + throw new IOException("No file named '" + shortenPath(importFile.getPath()) + "'"); + return privateImportFile(importFile.getPath(),null,include); + } + + /** Returns an unitialized rule base */ + private RuleBase privateImportFromConfig(String ruleBaseName) throws IOException, ParseException { + SemanticRulesConfig.Rulebase ruleBaseConfig=findRuleBaseConfig(config,ruleBaseName); + if (ruleBaseConfig==null) + ruleBaseConfig=findRuleBaseConfig(config,stripLastName(ruleBaseName)); + if (ruleBaseConfig==null) + throw new ParseException("Could not find included rule base '" + ruleBaseName + "'"); + return privateImportConfig(ruleBaseConfig); + } + + private SemanticRulesConfig.Rulebase findRuleBaseConfig(SemanticRulesConfig config,String ruleBaseName) { + for (Object aRulebase : config.rulebase()) { + SemanticRulesConfig.Rulebase ruleBaseConfig = (SemanticRulesConfig.Rulebase) aRulebase; + if (ruleBaseConfig.name().equals(ruleBaseName)) + return ruleBaseConfig; + } + return null; + } + + public void setAutomata(RuleBase base,String automata) { + if (ignoreAutomatas) + base.setUsesAutomata(true); // Stop it from failing on automata condition references + else + base.setAutomataFile(automata); + } + + static String stripLastName(String fileName) { + int lastDotIndex=fileName.lastIndexOf("."); + if (lastDotIndex<0) return fileName; + return fileName.substring(0,lastDotIndex); + } + + public RuleBase importString(String string, String automataFile) throws IOException, ParseException { + return importString(string, automataFile, null, null); + } + + public RuleBase importString(String string, String automataFile, String sourceName) throws IOException, ParseException { + return importString(string, automataFile, sourceName, null); + } + + public RuleBase importString(String string, String automataFile, RuleBase ruleBase) throws IOException, ParseException { + return importString(string, automataFile, null, ruleBase); + } + + public RuleBase importString(String string, String automataFile, String sourceName, RuleBase ruleBase) throws IOException, ParseException { + return importFromReader(new StringReader(string), sourceName, automataFile, ruleBase); + } + + public RuleBase importConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws IOException, ParseException { + RuleBase ruleBase=privateImportConfig(ruleBaseConfig); + ruleBase.initialize(); + return ruleBase; + } + + /** Imports an unitialized rule base */ + public RuleBase privateImportConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws IOException, ParseException { + if (config==null) throw new IllegalStateException("Must initialize with config if importing from config"); + RuleBase ruleBase = new RuleBase(); + ruleBase.setName(ruleBaseConfig.name()); + return privateImportFromReader(new StringReader(ruleBaseConfig.rules()),"semantic-rules.cfg", + ruleBaseConfig.automata(),ruleBase); + } + + public RuleBase importFromReader(Reader reader,String sourceInfo,String automataFile) throws ParseException { + return importFromReader(reader,sourceInfo,automataFile,null); + } + + /** + * Imports rules from a reader + * + * @param reader the reader containing rules on the proper syntax + * @param sourceName a string describing the source of the rules used for error messages + * @param ruleBase an existing rule base to import the rules into, or null to create a new one + * @return the rule base containing the rules added from the reader + * @throws ParseException if the reader contains illegal rule syntax + */ + public RuleBase importFromReader(Reader reader, String sourceName, String automataFile, RuleBase ruleBase) throws ParseException { + ruleBase=privateImportFromReader(reader, sourceName, automataFile,ruleBase); + ruleBase.initialize(); + return ruleBase; + } + + /** Returns an unitialized rule base */ + public RuleBase privateImportFromReader(Reader reader, String sourceName, String automataFile, RuleBase ruleBase) throws ParseException { + try { + if (ruleBase==null) { + ruleBase=new RuleBase(); + if (sourceName == null) + sourceName = "anonymous"; + ruleBase.setName(sourceName); + } + ruleBase.setSource(sourceName.replace('\\','/')); + new SemanticsParser(reader).semanticRules(ruleBase, this); + if (automataFile!=null && !automataFile.isEmpty()) + ruleBase.setAutomataFile(automataFile.replace('\\','/')); + return ruleBase; + } catch (Throwable t) { // also catches token mgr errors + ParseException p=new ParseException("Could not parse '" + shortenPath(sourceName) + "'"); + p.initCause(t); + throw p; + } + } + + /** + * Snips what's in from of rules/ if "rules/" is present in the string + * to avoid displaying details about where application content is copied + * (if rules/ is present, these rules are read from an applicatino package) + */ + private static String shortenPath(String path) { + int rulesIndex=path.indexOf("rules/"); + if (rulesIndex<0) return path; + return path.substring(rulesIndex); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java new file mode 100644 index 00000000000..f4858bbb9e1 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics; + +import com.google.inject.Inject; +import com.yahoo.component.chain.dependencies.After; +import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.prelude.ConfigurationException; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.processing.request.CompoundName; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.PhaseNames; + +import java.util.*; + +import static com.yahoo.prelude.querytransform.IndexCombinatorSearcher.MIXED_RECALL_REWRITE; +import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; + +/** + * Analyzes query semantics and enhances the query to reflect findings + * + * @author bratseth + */ +@After(PhaseNames.RAW_QUERY) +@Before({PhaseNames.TRANSFORMED_QUERY, STEMMING, MIXED_RECALL_REWRITE}) +public class SemanticSearcher extends Searcher { + + private static final CompoundName rulesRulebase=new CompoundName("rules.rulebase"); + private static final CompoundName rulesOff=new CompoundName("rules.off"); + private static final CompoundName tracelevelRules=new CompoundName("tracelevel.rules"); + + /** The default rule base of this */ + private RuleBase defaultRuleBase; + + /** All rule bases of this (always including the default) */ + private final Map<String, RuleBase> ruleBases = new java.util.HashMap<>(); + + /** Creates a semantic searcher using the given default rule base */ + public SemanticSearcher(RuleBase ruleBase) { + this(Collections.singletonList(ruleBase)); + defaultRuleBase = ruleBase; + } + + public SemanticSearcher(RuleBase ... ruleBases) { + this(Arrays.asList(ruleBases)); + } + + @Inject + public SemanticSearcher(SemanticRulesConfig config) { + this(toList(config)); + } + + public SemanticSearcher(List<RuleBase> ruleBases) { + for (RuleBase ruleBase : ruleBases) { + if (ruleBase.isDefault()) + defaultRuleBase = ruleBase; + this.ruleBases.put(ruleBase.getName(),ruleBase); + } + } + + private static List<RuleBase> toList(SemanticRulesConfig config) { + try { + RuleImporter ruleImporter = new RuleImporter(config); + List<RuleBase> ruleBaseList = new java.util.ArrayList<>(); + for (SemanticRulesConfig.Rulebase ruleBaseConfig : config.rulebase()) { + RuleBase ruleBase = ruleImporter.importConfig(ruleBaseConfig); + if (ruleBaseConfig.isdefault()) + ruleBase.setDefault(true); + ruleBaseList.add(ruleBase); + } + return ruleBaseList; + } + catch (Exception e) { + throw new ConfigurationException("Failed configuring semantic rules",e); + } + } + + @Override + public Result search(Query query, Execution execution) { + if (query.properties().getBoolean(rulesOff)) + return execution.search(query); + + int traceLevel= query.properties().getInteger(tracelevelRules, query.getTraceLevel()-2); + if (traceLevel<0) traceLevel=0; + RuleBase ruleBase=resolveRuleBase(query); + if (ruleBase==null) + return execution.search(query); + + String error=ruleBase.analyze(query,traceLevel); + if (error!=null) + return handleError(ruleBase, query,error); + else + return execution.search(query); + } + + private RuleBase resolveRuleBase(Query query) { + String ruleBaseName=query.properties().getString(rulesRulebase); + if (ruleBaseName==null || ruleBaseName.equals("")) return getDefaultRuleBase(); + RuleBase ruleBase=getRuleBase(ruleBaseName); + if (ruleBase==null) + throw new RuleBaseException("Requested rule base '" + ruleBaseName + "' does not exist"); + return ruleBase; + } + + private Result handleError(RuleBase ruleBase,Query query,String error) { + String message="Evaluation of query '" + query.getModel().getQueryTree() + + "' over '" + ruleBase + "' caused the invalid query '" + + query.getModel().getQueryTree().getRoot() + "': " + error; + getLogger().warning(message); + return new Result(query,ErrorMessage.createInvalidQueryTransformation(message)); + } + + /** Returns the default rule base */ + public RuleBase getDefaultRuleBase() { return defaultRuleBase; } + + /** + * Returns the rule base of the given name, or null if none. + * The part of the name following the last dot (if any) is removed before lookup. + */ + public RuleBase getRuleBase(String ruleBaseName) { + ruleBaseName=RuleImporter.stripLastName(ruleBaseName); + return ruleBases.get(ruleBaseName); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java new file mode 100644 index 00000000000..b04e693089a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java @@ -0,0 +1,86 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.benchmark; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.Iterator; + +import com.yahoo.search.Query; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleImporter; +import com.yahoo.prelude.semantics.parser.ParseException; + +public class RuleBaseBenchmark { + + public void benchmark(String ruleBaseFile, String queryFile, int iterations) + throws IOException, ParseException { + + String fsaFile = null; + if(ruleBaseFile.endsWith(".sr")){ + fsaFile = ruleBaseFile.substring(0,ruleBaseFile.length()-3) + ".fsa"; + File fsa = new File(fsaFile); + if(!fsa.exists()){ + fsaFile = null; + } + } + RuleBase ruleBase = new RuleImporter().importFile(ruleBaseFile,fsaFile); + ArrayList<String> queries = new ArrayList<>(); + BufferedReader reader = new BufferedReader(new FileReader(queryFile)); + String line; + while((line=reader.readLine())!=null){ + queries.add(line); + } + Date start = new Date(); + for (int i=0;i<iterations;i++){ + for (Iterator<String> iter = queries.iterator(); iter.hasNext(); ){ + String queryString = iter.next(); + Query query = new Query("?query="+queryString); + ruleBase.analyze(query,0); + } + } + Date end = new Date(); + long elapsed = end.getTime()-start.getTime(); + System.out.print("BENCHMARK: rulebase=" + ruleBaseFile + + "\n fsa=" + fsaFile + + "\n queries=" + queryFile + + "\n iterations=" + iterations + + "\n elapsed=" + elapsed + "ms\n"); + } + + + public static void main(String[] args) { + if(args.length<3){ + System.out.println("USAGE: RuleBaseBenchmark ruleBaseFile queryFile iterations"); + System.exit(1); + } + + try { + new RuleBaseBenchmark().benchmark(args[0],args[1],Integer.parseInt(args[2])); + } + catch (Exception e) { + System.out.println("ERROR: " + collectMessage(e)); + //e.printStackTrace(); + System.exit(1); + } + } + + private static String collectMessage(Throwable e) { + if (e.getCause()==null) + return messageOrName(e); + else + return messageOrName(e) + ": " + collectMessage(e.getCause()); + } + + private static String messageOrName(Throwable e) { + if (e.getMessage()!=null) + return e.getMessage(); + else + return e.getClass().getName(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/queries b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/queries new file mode 100644 index 00000000000..3feebfb4698 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/queries @@ -0,0 +1,5 @@ +shop in geary street +foo +bar +aardwark +to be or not to be that is the question diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/rules.sr b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/rules.sr new file mode 100644 index 00000000000..020699ba7cb --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/rules.sr @@ -0,0 +1,62 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +# Local use case + +[listing] [preposition] [place] -> listing:[listing] place:[place]; + +[listing] :- restaurant, shop, cafe, hotel; + +[preposition] :- in, at, near; + +[place] :- [street] [city], [street]; + +[street] :- geary street, geary; +[city] :- san francisco; + +# Shopping use case + +[brand] -> brand:[brand]; +[category] -> category:[category]; + +[brand] :- sony, dell; # Refer to automata later +[category] :- digital camera, camera, phone; # Ditto + +# Travel use case, note how explicit reference name overrides named condition as reference name + +# [from:place] [to:place] -> from:[from] to:[to] + +# Answers use case + +# why is [noun] ... [adjective] +> ?about:[noun] + +# Adding rule using the default query mode (and/or) + +[foobar] +> foobar:[foobar]; + +[foobar] :- foo, bar; + +# Adding rank rule + +[word] +> $foobar:[word]; + +[word] :- aardwark, word; + +# Literal production + +lotr -> lord of the rings; + +# Adding a negative + +java +> -coffee; + +# Adding another negative +# TODO: Term types in conditions +# java -coffee +> -island + +# "Stopwords" + +be -> ; +the -> ; + +[stopword] -> ; + +[stopword] :- to, or, not; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/config/RuleConfigDeriver.java b/container-search/src/main/java/com/yahoo/prelude/semantics/config/RuleConfigDeriver.java new file mode 100644 index 00000000000..b0e50727773 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/config/RuleConfigDeriver.java @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.config; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.IOException; +import java.io.Writer; +import java.util.ArrayList; +import java.util.List; + +import com.yahoo.io.IOUtils; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleImporter; +import com.yahoo.prelude.semantics.parser.ParseException; + +/** + * Reads the rule base files in the given directory and creates a + * semantic-rules.cfg file containing those rule bases in the given output dir. + * + * @author bratseth + */ +// Note: This is not used by the config model any more and can be removed +public class RuleConfigDeriver { + + public void derive(String ruleBaseDir, String outputDir) throws IOException, ParseException { + // Validate output dir + File outputDirFile=new File(outputDir); + if (!outputDirFile.exists()) + throw new IOException("Output dir " + outputDirFile.getAbsolutePath() + + " does not exist"); + + List<RuleBase> ruleBases = derive(ruleBaseDir); + // Convert file to config + exportConfig(ruleBases,outputDir); + } + + public List<RuleBase> derive(String ruleBaseDir) throws IOException, ParseException { + // Validate the rule bases + boolean ignoreAutomatas=true; // Don't fail if they are not available in config + List<RuleBase> ruleBases = new RuleImporter(ignoreAutomatas).importDir(ruleBaseDir); + ensureZeroOrOneDefault(ruleBases); + return ruleBases; + } + + public List<RuleBase> derive(List<NamedReader> readers) throws IOException, ParseException { + // Validate the rule bases + boolean ignoreAutomatas = true; // Don't fail if they are not available in config + List<RuleBase> ruleBases = new ArrayList<>(); + RuleImporter importer = new RuleImporter(ignoreAutomatas); + for (NamedReader reader : readers) { + ruleBases.add(importer.importFromReader(reader, reader.getName(), null)); + } + ensureZeroOrOneDefault(ruleBases); + return ruleBases; + } + + private void ensureZeroOrOneDefault(List<RuleBase> ruleBases) throws ParseException { + String defaultName=null; + for (RuleBase ruleBase : ruleBases) { + if (defaultName != null && ruleBase.isDefault()) + throw new ParseException("Both '" + defaultName + "' and '" + ruleBase.getName() + + "' is marked as default, there can only be one"); + if (ruleBase.isDefault()) + defaultName = ruleBase.getName(); + } + } + + private void exportConfig(List<RuleBase> ruleBases, String outputDir) + throws IOException { + BufferedWriter writer=null; + try { + writer=IOUtils.createWriter(outputDir + "/semantic-rules.cfg","utf-8",false); + writer.write("rulebase[" + ruleBases.size() + "]\n"); + for (int i=0; i<ruleBases.size(); i++) { + RuleBase ruleBase= ruleBases.get(i); + writer.write("rulebase[" + i + "].name \"" + ruleBase.getName() + "\"\n"); + writer.write("rulebase[" + i + "].rules \""); + writeRuleBaseAsLine(ruleBase.getSource(),writer); + writer.write("\"\n"); + } + } + finally { + IOUtils.closeWriter(writer); + } + } + + private void writeRuleBaseAsLine(String file, Writer writer) throws IOException { + BufferedReader reader=null; + try { + reader=IOUtils.createReader(file,"utf-8"); + String line; + while (null!=(line=reader.readLine())) { + writer.write(line); + writer.write("\\n"); + } + } + finally { + IOUtils.closeReader(reader); + } + } + + public static void main(String[] args) { + if(args.length<2){ + System.out.println("USAGE: RuleConfigDeriver ruleBaseDir outputDir"); + System.exit(1); + } + + try { + new RuleConfigDeriver().derive(args[0],args[1]); + } + catch (Exception e) { + System.out.println("ERROR: " + collectMessage(e)); + System.exit(1); + } + } + + private static String collectMessage(Throwable e) { + if (e.getCause()==null) + return messageOrName(e); + else + return messageOrName(e) + ": " + collectMessage(e.getCause()); + } + + private static String messageOrName(Throwable e) { + if (e.getMessage()!=null) + return e.getMessage(); + else + return e.getClass().getName(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/config/package-info.java b/container-search/src/main/java/com/yahoo/prelude/semantics/config/package-info.java new file mode 100644 index 00000000000..6b2801d10d7 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/config/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.semantics.config; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java new file mode 100644 index 00000000000..f2650fef83a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Choicepoint.java @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.semantics.rule.Condition; + +/** + * A choice point in an rule evaluation. A choicepoint is open if there are other choices to make at the point, + * closed if there are no further choices. In addition it contains enough information to enable + * the rule evaluation to backtrack to this point + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class Choicepoint { + + /** Whether there are (or may be) open choices to explore at this choicepoint yet */ + private boolean open=true; + + /** The number of tries made at this choice point */ + private int tries=0; + + /** The condition creating this choicepoint */ + private Condition condition; + + /** The state this choice point can be rolled back to */ + private State state; + + private RuleEvaluation owner; + + public Choicepoint(RuleEvaluation e, Condition condition) { + this.owner=e; + state=new State(this,e); + this.condition=condition; + if (e.getTraceLevel()>=5) + e.trace(5,"Added choice point at " + e.currentItem() + " for '" + condition + "'"); + } + + /** Returns the condition which created this choice point */ + public Condition getCondition() { return condition; } + + /** Returns wether there are (or may be) open choices to explore at this choicepoint yet */ + public boolean isOpen() { return open; } + + /** Marks this choice point as closed (!open) - there are no further choices to explore */ + public void close() { this.open=false; } + + /** Returns the number open tries made at this point */ + public int tryCount() { return tries; } + + /** Registers that another try has been made */ + public void addTry() { + tries++; + } + + /** + * Backtrack to the evaluation state at the point where this choicepoint were instantiated. + */ + public void backtrack() { + state.backtrack(owner); + if (owner.getTraceLevel()>=5) + owner.trace(5,"Backtracked to " + owner.currentItem() + " for '" + condition + "'"); + } + + /** Backtracks the position only, not matches */ + public void backtrackPosition() { + state.backtrackPosition(owner); + } + + /** + * Updates the state of this choice point to the current state of its evaluation + */ + public void updateState() { + state.updateState(owner); + } + + /** Returns the state of this choice point */ + public State getState() { return state; } + + /** The state of this choicepoint */ + public final static class State { + + private int position=0; + + private int referencedMatchCount=0; + + private int nonreferencedMatchCount=0; + + public State(Choicepoint choicepoint,RuleEvaluation evaluation) { + updateState(evaluation); + } + + public void updateState(RuleEvaluation evaluation) { + position=evaluation.currentPosition(); + referencedMatchCount=evaluation.getReferencedMatchCount(); + nonreferencedMatchCount=evaluation.getNonreferencedMatchCount(); + } + + /** Backtrack to the evaluation state at the point where this choicepoint were instantiated */ + public void backtrack(RuleEvaluation e) { + backtrackPosition(e); + + // Is this check masking errors? + if (e.referencedMatches().size()>referencedMatchCount) + e.referencedMatches().subList(referencedMatchCount, + e.referencedMatches().size()) + .clear(); + // Is this check masking errors? + if (e.nonreferencedMatches().size()>nonreferencedMatchCount) + e.nonreferencedMatches().subList(nonreferencedMatchCount, + e.nonreferencedMatches().size()) + .clear(); + } + + public void backtrackPosition(RuleEvaluation e) { + e.setPosition(position); + } + + public int getPosition() { return position; } + + public int getReferencedMatchCount() { return referencedMatchCount; } + + public int getNonreferencedMatchCount() { return nonreferencedMatchCount; } + + } + +} + diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java new file mode 100644 index 00000000000..fe3543fc655 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Evaluation.java @@ -0,0 +1,453 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.*; +import com.yahoo.search.Query; +import com.yahoo.search.query.QueryTree; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +/** + * An evaluation of a query over a rule base. There is one evaluation for each evaluation + * of one query over one rule base. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class Evaluation { + + // TODO: Retrofit query into the namespace construct + private ParameterNameSpace parameterNameSpace=null; + + private Query query; + + /** The current index into the flattened item list */ + private int currentIndex = 0; + + /** Query items flattened to a list iterator */ + private List<FlattenedItem> flattenedItems; + + /** The rule evaluation context, can be reset once the rule is evaluated */ + private RuleEvaluation ruleEvaluation; + + /** + * The amount of context information to collect about this evaluation. + * 0 means no context information, higher numbers means more context information. + */ + private int traceLevel=0; + + private String traceIndentation=""; + + /** See RuleEngine */ + private Set<Integer> matchDigests=new HashSet<>(); + + /** The previous size of this query (see RuleEngine), set on matches only */ + private int previousQuerySize=0; + + /** Should we allow stemmed matches? */ + private boolean stemming=true; + + public Evaluation(Query query) { + this(query,0); + } + + /** + * Creates a new evaluation + * + * @param query the query this evaluation is for + * @param traceLevel the amount of tracing to do + */ + public Evaluation(Query query,int traceLevel) { + this.query=query; + this.traceLevel=traceLevel; + reset(); + ruleEvaluation=new RuleEvaluation(this); + } + + /** Resets the item iterator to point to the first item */ + public void reset() { + if (flattenedItems!=null) + previousQuerySize=flattenedItems.size(); + currentIndex=0; + traceIndentation=""; + flattenedItems=new java.util.ArrayList<>(); + flatten(query.getModel().getQueryTree().getRoot(),0,flattenedItems); + } + + /** Sets the item iterator to point to the last item: */ + public void setToLast() { // PGA + if (flattenedItems!=null) + currentIndex = flattenedItems.size()-1; + else + currentIndex = -1; + } + + /** Resets the item iterator to point to the last item: */ + public void resetToLast() { // PGA + if (flattenedItems!=null) + previousQuerySize=flattenedItems.size(); + traceIndentation=""; + flattenedItems=new java.util.ArrayList<>(); + flatten(query.getModel().getQueryTree().getRoot(),0,flattenedItems); + currentIndex = flattenedItems.size()-1; + } + + public Query getQuery() { return query; } + + /** Set to true to enable stemmed matches. True by default */ + public void setStemming(boolean stemming) { this.stemming=stemming; } + + /** Returns whether stemmed matches are allowed. True by default */ + public boolean getStemming() { return stemming; } + + void addMatchDigest(int digest) { matchDigests.add(new Integer(digest)); } + + boolean hasMatchDigest(int matchDigest) { return matchDigests.contains(new Integer(matchDigest)); } + + int getPreviousQuerySize() { return previousQuerySize; } + + public int getQuerySize() { return flattenedItems.size(); } + + /** Advances to the next item as current item */ + public void next() { + currentIndex++; + } + + public void previous() {//PGA + currentIndex--; + } + + + /** Returns the current item, or null if there is no more elements */ + public FlattenedItem currentItem() { + if ( (currentIndex>=flattenedItems.size()) || (currentIndex<0)) return null; //PGA + return flattenedItems.get(currentIndex); + } + + /** Returns a fresh rule evaluation starting at the current position of this */ + public RuleEvaluation freshRuleEvaluation() { + ruleEvaluation.initialize(flattenedItems,currentIndex); + return ruleEvaluation; + } + + /** Adds an item to the query being evaluated in a way consistent with the query type */ + // TODO: Add this functionality to Query? + public void addItem(Item item, TermType termType) { + Item root= query.getModel().getQueryTree().getRoot(); + if (root==null) + query.getModel().getQueryTree().setRoot(item); + else + query.getModel().getQueryTree().setRoot(combineItems(root,item,termType)); + } + + /** Removes this item */ + public void removeItem(Item item) { + item.getParent().removeItem(item); + } + + /** + * Removes this item by identity to ensure we remove the right one if there are multiple + * equal items + */ + public void removeItemByIdentity(Item item) { + int position=findIndexByIdentity(item); + if (position>=0) + item.getParent().removeItem(position); + else + item.getParent().removeItem(item); // Fallback to removeField by equal() + } + + private int findIndexByIdentity(Item item) { + int position=0; + for (Iterator<Item> i=item.getParent().getItemIterator(); i.hasNext(); ) { + Item child=i.next(); + if (item==child) { + return position; + } + position++; + } + return -1; + } + + /** Removes an item, prefers the one at/close to the given position if there are multiple ones */ + public void removeItem(int position,Item item) { + Item removeCandidate=item.getParent().getItem(position); + if (removeCandidate.equals(item)) // Remove based on position + item.getParent().removeItem(position); + else + item.getParent().removeItem(item); // Otherwise, just removeField any such item + } + + /** + * Convert segment items into their mutable counterpart, do not update query tree. + * Non-segment items are returned directly. + * + * @return a mutable CompositeItem instance + */ + private CompositeItem convertSegmentItem(CompositeItem item) { + if (!(item instanceof SegmentItem)) { + return item; + } + CompositeItem converted = null; + if (item instanceof AndSegmentItem) { + converted = new AndItem(); + } else if (item instanceof PhraseSegmentItem) { + PhraseItem p = new PhraseItem(); + PhraseSegmentItem old = (PhraseSegmentItem) item; + p.setIndexName(old.getIndexName()); + converted = p; + } else { + // TODO: Do something else than nothing for unknowns? + return item; + } + for (Iterator<Item> i = item.getItemIterator(); i.hasNext();) { + converted.addItem(i.next()); + } + return converted; + } + + + private void insertMutableInTree(CompositeItem mutable, CompositeItem original, CompositeItem parent) { + if (parent == null) { + query.getModel().getQueryTree().setRoot(mutable); + + } else { + int parentsIndex = parent.getItemIndex(original); + parent.setItem(parentsIndex, mutable); + } + } + + /** + * Convert The parent of this item into a mutable item. Note, this + * may change the shape of the query tree. (E.g. if the original parent is a + * segment phrase, and the original parent's parent is a phrase, the terms + * from the parent will be moved to the parent's parent.) + * + * @param item The item for which the parent shall be made mutable + */ + public void makeParentMutable(TermItem item) { + CompositeItem parent = item.getParent(); + CompositeItem mutable = convertSegmentItem(parent); + if (parent != mutable) { + CompositeItem parentsParent = parent.getParent(); + insertMutableInTree(mutable, parent, parentsParent); + } + } + + /** + * Inserts an item to the query being evaluated in a way consistent with the query type + * + * @param item the item to insert + * @param parent the parent of this item, or null to set the root + * @param index the index at which to insert this into the parent + * @param desiredParentType the desired type of the composite which contains item when this returns + */ + public void insertItem(Item item, CompositeItem parent, int index, TermType desiredParentType) { + if (parent==null) { // TODO: Accommodate for termtype in this case too + query.getModel().getQueryTree().setRoot(item); + + return; + } + + if (parent.getItemCount()>0 && parent instanceof QueryTree && parent.getItem(0) instanceof CompositeItem) { + // combine with the existing root instead + parent=(CompositeItem)parent.getItem(0); + if (index==1) { // that means adding it after the existing root + index=parent.getItemCount(); + } + } + + if (( desiredParentType==TermType.DEFAULT || desiredParentType.hasItemClass(parent.getClass()) ) + && equalIndexNameIfParentIsPhrase(item,parent)) { + addItem(parent,index,item,desiredParentType); + } + else { + insertIncompatibleItem(item,parent,query,desiredParentType); + } + } + + private void addItem(CompositeItem parent,int index,Item item,TermType desiredParentType) { + if (parent instanceof NotItem) { + if (index==0 && parent.getItem(0)==null) { // Case 1: The current positive is null and we are adding a positive + parent.setItem(0,item); + } + else if (index<=1 && !(parent.getItem(0) instanceof CompositeItem)) { // Case 2: The positive must become a composite + CompositeItem positiveComposite=(CompositeItem)desiredParentType.createItemClass(); + positiveComposite.addItem(parent.getItem(0)); + positiveComposite.addItem(index,item); + parent.setItem(0,positiveComposite); + } + else if (parent.getItem(0)!=null && parent.getItem(0) instanceof CompositeItem // Case 3: Add to the positive composite + && index<=((CompositeItem)parent.getItem(0)).getItemCount()) { + ((CompositeItem)parent.getItem(0)).addItem(index,item); + } + else { // Case 4: Add negative + parent.addItem(index,item); + } + } + else if (parent.getItemCount()>0 && parent instanceof QueryTree) { + CompositeItem composite=(CompositeItem)desiredParentType.createItemClass(); + composite.addItem(parent.getItem(0)); + composite.addItem(index,item); + parent.setItem(0,composite); + } + else { + parent.addItem(index,item); + } + } + + /** A special purpose check used to simplify the above */ + private boolean equalIndexNameIfParentIsPhrase(Item item,CompositeItem parent) { + if ( ! (parent instanceof PhraseItem)) return true; + if ( ! (item instanceof IndexedItem)) return true; + + return ((PhraseItem)parent).getIndexName().equals(((IndexedItem)item).getIndexName()); + } + + private void insertIncompatibleItem(Item item,CompositeItem parent,Query query,TermType desiredParentType) { + // Create new parent + CompositeItem newParent; + if (desiredParentType==TermType.DEFAULT) + newParent=new AndItem(); + else + newParent=(CompositeItem)desiredParentType.createItemClass(); + + // Save previous parent parent + CompositeItem parentsParent=parent.getParent(); + + // Add items to new parent + newParent.addItem(parent); + newParent.addItem(item); + + // Insert new parent as root or child of old parents parent + if (parentsParent==null) { + query.getModel().getQueryTree().setRoot(newParent); + + } + else { + int parentIndex=0; + if (parentsParent!=null) { + parentIndex=parentsParent.getItemIndex(parent); + } + parentsParent.setItem(parentIndex,newParent); + } + } + + private Item combineItems(Item first,Item second,TermType termType) { + if (first instanceof NullItem) { + return second; + } else if (first instanceof NotItem) { + NotItem notItem=(NotItem)first; + if (termType==TermType.NOT) { + notItem.addNegativeItem(second); + } + else { + Item newPositive=combineItems(notItem.getPositiveItem(),second,termType); + notItem.setPositiveItem(newPositive); + } + return notItem; + } + else if (first instanceof CompositeItem) { + CompositeItem composite=(CompositeItem)first; + CompositeItem combined=createType(termType); + if (combined.getClass().equals(composite.getClass())) { + composite.addItem(second); + return composite; + } + else { + combined.addItem(first); + combined.addItem(second); // Also works for nots + return combined; + } + } + else if (first instanceof TermItem) { + CompositeItem combined=createType(termType); + combined.addItem(first); + combined.addItem(second); + return combined; + } + else { + throw new RuntimeException("Don't know how to add an item to type " + first.getClass()); + } + } + + private CompositeItem createType(TermType termType) { + if (termType==TermType.DEFAULT) { + if (query.getModel().getType().equals(Query.Type.ANY)) + return new OrItem(); + else + return new AndItem(); + } + else if (termType==TermType.AND) { + return new AndItem(); + } + else if (termType==TermType.OR) { + return new OrItem(); + } + else if (termType==TermType.RANK) { + return new RankItem(); + } + else if (termType==TermType.NOT) { + return new NotItem(); + } + throw new IllegalArgumentException("Programing error, this method should be updated with add in RankType"); + } + + private void flatten(Item item,int position,List<FlattenedItem> toList) { + if (item==null) return; + if (item.isFilter()) return; + + if (item instanceof TermItem) { // make eligible for matching + toList.add(new FlattenedItem((TermItem)item,position)); + return; + } + + if (item instanceof CompositeItem) { // make children eligible for matching + CompositeItem composite=(CompositeItem)item; + int childPosition=0; + for (Iterator<?> i=composite.getItemIterator(); i.hasNext(); ) { + flatten((Item)i.next(),childPosition++,toList); + } + } + + // other terms are unmatchable + } + + public void trace(int level,String message) { + if (level>getTraceLevel()) return; + query.trace(traceIndentation + message,false,1); + } + + /** + * The amount of context information to collect about this evaluation. + * 0 (the default) means no context information, higher numbers means + * more context information. + */ + public int getTraceLevel() { return traceLevel; } + + public void indentTrace() { + traceIndentation=traceIndentation + " "; + } + + public void unindentTrace() { + if (traceIndentation.length()<3) + traceIndentation=""; + else + traceIndentation=traceIndentation.substring(3); + } + + public NameSpace getNameSpace(String nameSpaceName) { + if (nameSpaceName.equals("parameter")) { + if (parameterNameSpace==null) + parameterNameSpace=new ParameterNameSpace(); + return parameterNameSpace; + } + + // That's all for now + throw new RuntimeException("Unknown namespace '" + nameSpaceName + "'"); + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java new file mode 100644 index 00000000000..00a66206b46 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/EvaluationException.java @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +/** + * Thrown on semantic exceptions on evaluation over a rule base + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +@SuppressWarnings("serial") +public class EvaluationException extends RuntimeException { + + public EvaluationException(String message) { + super(message); + } +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java new file mode 100644 index 00000000000..1631d60df6b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/FlattenedItem.java @@ -0,0 +1,31 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.TermItem; + +/** + * An item which knows its position in its parent + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class FlattenedItem { + + private TermItem item; + + /** The position of this item in its parent */ + private int position; + + public FlattenedItem(TermItem item,int position) { + this.item=item; + this.position=position; + } + + public TermItem getItem() { return item; } + + public int getPosition() { return position; } + + public String toString() { + return position + ":" + item; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java new file mode 100644 index 00000000000..fc7aec62412 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/Match.java @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.query.WordItem; + +/** + * A match + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class Match { + + /** The start position of this match */ + private int position; + + private TermItem item; + + /** The string to replace the match by, usually item.getIndexedString() */ + private String replaceValue; + + /** The parent of the matched item */ + private CompositeItem parent=null; + + /** + * Creates a match + * + * @param item the match to add + * @param replaceValue the string to replace this match by, usually the item.getIndexedString() + * which is what the replace value will be if it is passed as null here + */ + public Match(FlattenedItem item,String replaceValue) { + this.item=item.getItem(); + if (replaceValue==null) + this.replaceValue=item.getItem().getIndexedString(); + else + this.replaceValue=replaceValue; + this.parent=this.item.getParent(); + this.position=item.getPosition(); + } + + public int getPosition() { return position; } + + public TermItem getItem() { return item; } + + public String getReplaceValue() { + return replaceValue; + } + + /** + * Returns the parent in which the item was matched, or null if the item was root. + * Note that the item may subsequently have been removed, so it does not necessarily + * have this parent + */ + public CompositeItem getParent() { return parent; } + + public int hashCode() { + return + 17*item.getIndexedString().hashCode()+ + 33*item.getIndexName().hashCode(); + } + + /** Returns a new item representing this match */ + public Item toItem(String label) { + return new WordItem(getReplaceValue(),label); + } + + public boolean equals(Object o) { + if (! (o instanceof Match)) return false; + + Match other=(Match)o; + if (other.position!=position) return false; + if (!other.item.equals(item)) return false; + + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java new file mode 100644 index 00000000000..76eea63bd68 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/NameSpace.java @@ -0,0 +1,16 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +/** + * A collection of facts (addressed by namespace.fact in conditions) + * over which we may write conditions + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public abstract class NameSpace { + + public abstract boolean matches(String term,RuleEvaluation e); + + // TODO: public abstract void produce(RuleEvaluation e); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java new file mode 100644 index 00000000000..35427250511 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ParameterNameSpace.java @@ -0,0 +1,21 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.search.Query; + +/** + * A name space representing the (http) parameters following this query + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class ParameterNameSpace extends NameSpace { + + public boolean matches(String term,RuleEvaluation e) { + Query query=e.getEvaluation().getQuery(); + String value=query.properties().getString(term); + if (value==null) return false; + e.setValue(value); + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java new file mode 100644 index 00000000000..cb7d2af8d19 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/ReferencedMatches.java @@ -0,0 +1,52 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.PhraseItem; + +/** + * The Matches referenced by a particular context name in a rule evaluation + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReferencedMatches { + + private String contextName; + + private List<Match> matches=new java.util.ArrayList<>(1); + + public ReferencedMatches(String contextName) { + this.contextName=contextName; + } + + public void addMatch(Match match) { + matches.add(match); + } + + public String getContextName() { return contextName; } + + public Iterator<Match> matchIterator() { + return matches.iterator(); + } + + /** + * Returns the item to insert from these referenced matches, or null if none + * + * @param label the label of the matches + */ + public Item toItem(String label) { + if (matches.size()==0) return null; + if (matches.size()==1) return matches.get(0).toItem(label); + + PhraseItem phrase=new PhraseItem(); // TODO: Somehow allow AND items instead here + phrase.setIndexName(label); + for (Iterator<Match> i=matches.iterator(); i.hasNext(); ) { + phrase.addItem(i.next().toItem(label)); + } + return phrase; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java new file mode 100644 index 00000000000..ee874b76ed6 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.search.Query; +import com.yahoo.prelude.query.QueryCanonicalizer; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.rule.ProductionRule; + +import java.util.ListIterator; + +/** + * Evaluates the rules of a rule base. This method is thread safe on analyze calls, but + * not on modification calls. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleEngine { + + private RuleBase rules; + + public RuleEngine(RuleBase rules) { + this.rules=rules; + } + + /** + * Evaluates a rule base over a query + * + * @param query the query to evaluate + * @param traceLevel the level of tracing to do + * @return the error caused by analyzing the query, or null if there was no error + * If there is an error, this query is destroyed (unusable) + */ + public String evaluate(Query query,int traceLevel) { + // TODO: This is O(query size*rule base size). We'll eventually need to create indices + // on rules to look up rule candidates per term to make it O(query size) instead + // Probably create indices on the first term like Prolog implementations use to + + boolean matchedAnything=false; + Evaluation evaluation=new Evaluation(query,traceLevel); + evaluation.setStemming(rules.getStemming()); + evaluation.trace(2,"Evaluating query '" + evaluation.getQuery().getModel().getQueryTree().getRoot() + "':"); + for (ListIterator<ProductionRule> i=rules.ruleIterator(); i.hasNext(); ) { + evaluation.reset(); + ProductionRule rule=i.next(); + boolean matched=matchRuleAtAllStartPoints(evaluation,rule); + matchedAnything|=matched; + } + + if (!matchedAnything) return null; + + String error=QueryCanonicalizer.canonicalize(query); + + if (query.getTraceLevel()>=1) + query.trace("SemanticSearcher: Rewrote query",true,1); + + return error; + } + + /** Match a rule at any starting point in the query */ + private boolean matchRuleAtAllStartPoints(Evaluation evaluation, ProductionRule rule) { + boolean matchedAtLeastOnce=false; + int iterationCount=0; + + /** + * Test if it is a removal rule, if so iterate backwards so that precalculated + * replacement positions does not become invalid as the query shrink + */ + boolean removalRule = false; + if ( (rule instanceof com.yahoo.prelude.semantics.rule.ReplacingProductionRule) && + (rule.getProduction().toString().length() == 0) ) { // empty replacement + removalRule = true; + evaluation.setToLast(); + } + + int loopLimit=Math.max(15,evaluation.getQuerySize()*3); + + while (evaluation.currentItem() != null) { + boolean matched=matchRule(evaluation,rule); + if (matched) { + if (removalRule) + evaluation.resetToLast(); + else + evaluation.reset(); + matchedAtLeastOnce = true; + if (rule.isLoop()) break; + } + else { + if (removalRule) + evaluation.previous(); + else + evaluation.next(); + } + + if (matched && iterationCount++ > loopLimit) { + throw new RuleBaseException("Rule '" + rule + "' has matched '" + + evaluation.getQuery().getModel().getQueryTree().getRoot() + + "' " + loopLimit + " times, aborting"); + } + } + + return matchedAtLeastOnce; + } + + /** + * Matches a rule at the current starting point of the evaluation, and carries + * out the production if there is a match + * + * @return whether this rule matched + */ + // TODO: Code cleanup + private boolean matchRule(Evaluation evaluation, ProductionRule rule) { + RuleEvaluation ruleEvaluation=evaluation.freshRuleEvaluation(); + + ruleEvaluation.indentTrace(); + if (ruleEvaluation.getTraceLevel()>=3) { + ruleEvaluation.trace(3,"Evaluating rule '" + rule + + "' on '" + ruleEvaluation.getEvaluation().getQuery().getModel().getQueryTree().getRoot() + + "' at '" + ruleEvaluation.currentItem() + "':"); + } + + ruleEvaluation.indentTrace(); + + boolean matches=rule.matches(ruleEvaluation); + + boolean matchedBefore=false; + int currentMatchDigest=ruleEvaluation.calculateMatchDigest(rule); + if (evaluation.hasMatchDigest(currentMatchDigest)) + matchedBefore=true; + + boolean queryGotShorter=false; + if (evaluation.getPreviousQuerySize()>evaluation.getQuerySize()) + queryGotShorter=true; + + boolean doProduction=!matchedBefore || queryGotShorter; + + ruleEvaluation.unindentTrace(); + + if (ruleEvaluation.getTraceLevel()>=2) { + if (matches && doProduction) + ruleEvaluation.trace(2,"Matched rule '" + rule + "' at " + ruleEvaluation.previousItem()); + else if (!matches) + ruleEvaluation.trace(2,"Did not match rule '" + rule + "' at " + ruleEvaluation.currentItem()); + else if (!doProduction) + ruleEvaluation.trace(2,"Ignoring repeated match of '" + rule + "'"); + } + + ruleEvaluation.unindentTrace(); + + if (!matches || !doProduction) return false; + + // Do production barrier + + evaluation.addMatchDigest(currentMatchDigest); + String preQuery=null; + if (evaluation.getTraceLevel()>=1) { + preQuery= evaluation.getQuery().getModel().getQueryTree().getRoot().toString(); + } + rule.produce(ruleEvaluation); + if (evaluation.getTraceLevel()>=1) { + evaluation.trace(1,"Transforming '" + preQuery + "' to '" + + evaluation.getQuery().getModel().getQueryTree().getRoot().toString() + + "' since '" + rule + "' matched"); + } + + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java new file mode 100644 index 00000000000..a6b90f98879 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEvaluation.java @@ -0,0 +1,346 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.rule.Condition; +import com.yahoo.prelude.semantics.rule.ProductionRule; + +import java.util.*; + +/** + * A particular evalutation of a particular rule. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class RuleEvaluation { + + // TODO: Create a query builder (or something) though which all query manipulation + // here and in Evaluation is done. This class must also hold all the matches + // and probably be able to update the match positions to keep them in sync with changes + // to the query + + // Remember that whenever state is added to this class, you + // must consider whether/how to make that state backtrackable + // by savinginformation in choicepoint.state + + /** The items to match in this evaluation */ + private List<FlattenedItem> items; + + /** The current position into the list of items */ + private int position; + + /** The start position into the item list */ + private int startPosition; + + /** The references to matched contexts to be made in this evaluation */ + private Set<String> matchReferences; + + /** The current context of this evaluation, or null we're currently not in an interesting context */ + private String currentContext; + + /** A list of referencedMatches */ + private List<ReferencedMatches> referencedMatchesList =new java.util.ArrayList<>(); + + private List<Match> nonreferencedMatches=new java.util.ArrayList<>(); + + /** The evaluation owning this */ + private Evaluation evaluation; + + /** The choice points saved in this evaluation */ + private Stack<Choicepoint> choicepoints=null; + + /* The last value returned by a condition evaluated in this, may be null */ + private Object value=null; + + /** True when we are evaluating inside a condition which inverts the truth value */ + private boolean inNegation=false; + + /** + * A label we should use to match candidate terms for. + * Used to propagate a label from e.g. reference conditions to named conditions + */ + private String currentLabel=null; + + public RuleEvaluation(Evaluation owner) { + this.evaluation=owner; + } + + public void initialize(List<FlattenedItem> list,int startPosition) { + this.startPosition=startPosition; + items=list; + reinitialize(); + } + + void reinitialize() { + position=startPosition; + currentContext=null; + referencedMatchesList.clear(); + nonreferencedMatches.clear(); + if (choicepoints!=null) + choicepoints.clear(); + } + + public void setMatchReferences(Set<String> matchReferences) { this.matchReferences=matchReferences; } + + /** + * <p>Calculates an id which is unique for each match (the totality of the matched terms) + * to a high probability. Why can we not simply look at the position + * of terms? Because rules are allowed to modify the query tree in ways that makes positions + * change.</p> + * + * <p>This digest is also problematic, because it's really the matching condition who should + * calculate a match digest for that term which incorporates the semantics of that kind + * of match (maybe not the word and index, but something else). This is a todo for when + * we add other kinds of conditions.</p> + */ + int calculateMatchDigest(ProductionRule rule) { + int matchDigest=rule.hashCode(); + int matchCounter=1; + for (Iterator<ReferencedMatches> i=referencedMatchesList.iterator(); i.hasNext(); ) { + ReferencedMatches matches=i.next(); + int termCounter=0; + for (Iterator<Match> j=matches.matchIterator(); j.hasNext(); ) { + Match match=j.next(); + matchDigest=7*matchDigest*matchCounter+ + 71*termCounter+ + match.hashCode(); + termCounter++; + } + matchCounter++; + } + for (Iterator<Match> i=nonreferencedMatches.iterator(); i.hasNext(); ) { + Match match=i.next(); + matchDigest=7*matchDigest*matchCounter+match.hashCode(); + matchCounter++; + } + return matchDigest; + } + + /** + * Returns the current term item to look at, + * or null if there are no more elements + */ + public FlattenedItem currentItem() { + if (position>=items.size()) return null; + return items.get(position); + } + + public FlattenedItem previousItem() { + if (position-1<0) return null; + return items.get(position-1); + } + + /** Returns the position of the current item */ + public int currentPosition() { + return position; + } + + /** Sets the current position */ + public void setPosition(int position) { + this.position=position; + } + + /** Returns the total number of items to match in this evaluation */ + public int itemCount() { + return items.size() - startPosition; + } + + /** Returns the last value returned by a condition in this evaluation, or null */ + public Object getValue() { return value; } + + /** Sets the last value returned by a condition in this evaluatiino, or null */ + public void setValue(Object value) { this.value=value; } + + /** Returns whether we are evaluating inside a condition which inverts the truth value */ + public boolean isInNegation() { return inNegation; } + + /** sets whether we are evaluating inside a condition which inverts the truth value */ + public void setInNegation(boolean inNegation) { this.inNegation=inNegation; } + + /** Returns the current position into the terms this evaluates over */ + public int getPosition() { return position; } + + /** Sets a new current label and returns the previous one */ + public String setCurrentLabel(String currentLabel) { + String oldLabel=currentLabel; + this.currentLabel=currentLabel; + return oldLabel; + } + + public String getCurrentLabel() { return currentLabel; } + + /** + * Advances currentItem to the next term item and returns thatItem. + * If the current item before this call is the last item, this will + * return (and set currentItem to) null. + */ + public FlattenedItem next() { + position++; + + if (position>=items.size()) { + position=items.size(); + return null; + } + + return items.get(position); + } + + // TODO: Simplistic yet. Nedd to support context nesting etc. + public void entering(String context) { + if (context==null) return; + if (matchReferences!=null && matchReferences.contains(context)) + currentContext=context; + + } + + public void leaving(String context) { + if (context==null) return; + if (currentContext==null) return; + if (currentContext.equals(context)) + currentContext=null; + } + + /** + * Adds a match + * + * @param item the match to add + * @param replaceString the string to replace this match by, usually the item.getIndexedValue() + */ + public void addMatch(FlattenedItem item,String replaceString) { + evaluation.makeParentMutable(item.getItem()); + Match match=new Match(item,replaceString); + if (currentContext!=null) { + ReferencedMatches matches=getReferencedMatches(currentContext); + if (matches==null) { + matches=new ReferencedMatches(currentContext); + referencedMatchesList.add(matches); + } + matches.addMatch(match); + } + else { + nonreferencedMatches.add(match); + } + } + + /** Returns the referenced matches for a context name, or null if none */ + public ReferencedMatches getReferencedMatches(String name) { + for (Iterator<ReferencedMatches> i=referencedMatchesList.iterator(); i.hasNext(); ) { + ReferencedMatches matches=i.next(); + if (name.equals(matches.getContextName())) + return matches; + } + return null; + } + + public int getReferencedMatchCount() { return referencedMatchesList.size(); } + + public int getNonreferencedMatchCount() { return nonreferencedMatches.size(); } + + /** Returns the evaluation this belongs to */ + public Evaluation getEvaluation() { return evaluation; } + + /** Adds an item to the query being evaluated in a way consistent with the query type */ + public void addItem(Item item, TermType termType) { + evaluation.addItem(item,termType); + } + + public void removeItem(Item item) { + evaluation.removeItem(item); + } + + public void removeItemByIdentity(Item item) { + evaluation.removeItemByIdentity(item); + } + + /** Removes an item, prefers the one at/close to the given position if there are multiple ones */ + public void removeItem(int position,Item item) { + evaluation.removeItem(position,item); + } + + + /** + * Inserts an item to the query being evaluated in a way consistent with the query type + * + * @param item the item to insert + * @param parent the parent of this item, or null to set the root + * @param index the index at which to insert this into the parent + * @param termType the kind of item to index, this decides the resulting structure + */ + public void insertItem(Item item, CompositeItem parent, int index, TermType termType) { + evaluation.insertItem(item,parent,index,termType); + } + + /** Returns a read-only view of the items of this */ + public List<FlattenedItem> items() { + return Collections.unmodifiableList(items); + } + + public Match getNonreferencedMatch(int index) { + return nonreferencedMatches.get(index); + } + + public void trace(int level,String string) { + evaluation.trace(level,string); + } + + public int getTraceLevel() { + return evaluation.getTraceLevel(); + } + + public void indentTrace() { + evaluation.indentTrace(); + } + + public void unindentTrace() { + evaluation.unindentTrace(); + } + + /** + * Add a choice point to this evaluation + * + * @param condition the creating condition + * @param create true to create this choicepoint if it is missing + * @return the choicepoint, or null if not present, and create is false + */ + public Choicepoint getChoicepoint(Condition condition,boolean create) { + if (choicepoints==null) { + if (!create) return null; + choicepoints=new java.util.Stack<>(); + } + Choicepoint choicepoint=lookupChoicepoint(condition); + if (choicepoint==null) { + if (!create) return null; + choicepoint=new Choicepoint(this,condition); + choicepoints.push(choicepoint); + } + return choicepoint; + } + + private Choicepoint lookupChoicepoint(Condition condition) { + for (Iterator<Choicepoint> i=choicepoints.iterator(); i.hasNext(); ) { + Choicepoint choicepoint=i.next(); + if (condition==choicepoint.getCondition()) + return choicepoint; + } + return null; + } + + List<ReferencedMatches> referencedMatches() { + return referencedMatchesList; + } + + List<Match> nonreferencedMatches() { + return nonreferencedMatches; + } + + /** Remove all the terms recognized by this match */ + public void removeMatches(ReferencedMatches matches) { + for (Iterator<Match> i=matches.matchIterator(); i.hasNext(); ) { + Match match=i.next(); + removeItemByIdentity(match.getItem()); + } + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/package-info.java b/container-search/src/main/java/com/yahoo/prelude/semantics/package-info.java new file mode 100644 index 00000000000..6adbd065352 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.semantics; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/parser/package-info.java b/container-search/src/main/java/com/yahoo/prelude/semantics/parser/package-info.java new file mode 100644 index 00000000000..309c3f7a456 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/parser/package-info.java @@ -0,0 +1,5 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.prelude.semantics.parser; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AddingProductionRule.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AddingProductionRule.java new file mode 100644 index 00000000000..91eef25a8b0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AddingProductionRule.java @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +/** + * A production rule which <i>adds</i> the production to the matched query + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class AddingProductionRule extends ProductionRule { + + protected String getSymbol() { return "+>"; } + + public void setProduction(ProductionList productionList) { + super.setProduction(productionList); + productionList.setReplacing(false); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AndCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AndCondition.java new file mode 100644 index 00000000000..2c826df9196 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/AndCondition.java @@ -0,0 +1,39 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which consists of a list of alternatives to match at any location + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class AndCondition extends CompositeCondition { + + // TODO: Not in use. What was this for? Remove? + + public AndCondition() { + } + + public boolean doesMatch(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,true); + choicepoint.updateState(); + boolean matches=allSubConditionsMatches(e); + if (!matches) + choicepoint.backtrack(); + return matches; + } + + protected boolean useParentheses() { + return (getParent()!=null + && ! (getParent() instanceof ChoiceCondition)); + } + + protected String toInnerString() { + return toInnerString(" & "); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ChoiceCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ChoiceCondition.java new file mode 100644 index 00000000000..5cf3d4bf7a4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ChoiceCondition.java @@ -0,0 +1,33 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which consists of a list of alternatives to match at a specific location + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ChoiceCondition extends CompositeCondition { + + public ChoiceCondition() { + } + + public boolean doesMatch(RuleEvaluation e) { + //if (e.currentItem()==null) return false; + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition subCondition= i.next(); + if (subCondition.matches(e)) + return true; + } + + return false; + } + + protected String toInnerString() { + return toInnerString(", "); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ComparisonCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ComparisonCondition.java new file mode 100644 index 00000000000..0d24368cf28 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ComparisonCondition.java @@ -0,0 +1,170 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which is true of the <i>values</i> of its two subconditions are true + * and both have the same value + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class ComparisonCondition extends CompositeCondition { + + private Operator operator; + + public ComparisonCondition(Condition leftCondition,String operatorString,Condition rightCondition) { + operator=Operator.get(operatorString); + addCondition(leftCondition); + addCondition(rightCondition); + } + + protected boolean doesMatch(RuleEvaluation evaluation) { + Object left=null; + Object right=null; + boolean matches=false; + Choicepoint choicepoint=evaluation.getChoicepoint(this,true); + try { + matches=getLeftCondition().matches(evaluation); + if (!matches) return false; + + left=evaluation.getValue(); + evaluation.setValue(null); + + choicepoint.backtrackPosition(); + matches=getRightCondition().matches(evaluation); + if (!matches) return false; + + right=evaluation.getValue(); + evaluation.setValue(right); + matches=operator.compare(left,right); + return matches; + } + finally { + if (!matches) + choicepoint.backtrack(); + traceResult(matches,evaluation,left,right); + } + } + + protected void traceResult(boolean matches,RuleEvaluation e) { + // Uses our own logging method instead + } + + protected void traceResult(boolean matches,RuleEvaluation e,Object left,Object right) { + if (matches && e.getTraceLevel()>=3) + e.trace(3,"Matched '" + this + "'" + getMatchInfoString(e) + " at " + e.previousItem() + " as " + left + operator + right + " is true"); + if (!matches && e.getTraceLevel()>=3) + e.trace(3,"Did not match '" + this + "' at " + e.currentItem() + " as " + left + operator + right + " is false"); + } + + public Condition getLeftCondition() { + return getCondition(0); + } + + public void setLeftCondition(Condition leftCondition) { + setCondition(0,leftCondition); + } + + public Condition getRightCondition() { + return getCondition(1); + } + + public void setRightCondition(Condition rightCondition) { + setCondition(1,rightCondition); + } + + protected String toInnerString() { + return toInnerString(operator.toString()); + } + + private static final class Operator { + + private String operatorString; + + private static Map<String, Operator> operators=new HashMap<>(); + + public static final Operator equals=new Operator("="); + public static final Operator largerequals=new Operator(">="); + public static final Operator smallerequals=new Operator("<="); + public static final Operator larger=new Operator(">"); + public static final Operator smaller=new Operator("<"); + public static final Operator different=new Operator("!="); + public static final Operator contains=new Operator("=~"); + + private Operator(String operator) { + this.operatorString=operator; + operators.put(operatorString,this); + } + + private static Operator get(String operatorString) { + Operator operator=operators.get(operatorString); + if (operator==null) + throw new IllegalArgumentException("Unknown operator '" + operatorString + "'"); + return operator; + } + + public boolean compare(Object left,Object right) { + if (this==equals) + return equals(left,right); + if (this==different) + return !equals(left,right); + + if (left==null || right==null) return false; + + if (this==contains) + return contains(left,right); + if (this==largerequals) + return larger(left,right) || equals(left,right); + if (this==smallerequals) + return !larger(left,right); + if (this==larger) + return larger(left,right); + if (this==smaller) + return !larger(left,right) && !equals(left,right); + throw new RuntimeException("Programming error, fix this method"); + } + + private boolean equals(Object left,Object right) { + if (left==null && right==null) return true; + if (left==null) return false; + return left.equals(right); + } + + /** True if left contains right */ + private boolean contains(Object left,Object right) { + if (left instanceof Collection) + return ((Collection<?>)left).contains(right); + else + return left.toString().indexOf(right.toString())>=0; + } + + /** true if left is larger than right */ + private boolean larger(Object left,Object right) { + if ((left instanceof Number) && (right instanceof Number)) + return ((Number)left).doubleValue()>((Number)right).doubleValue(); + else + return left.toString().compareTo(right.toString())>0; + } + + public int hashCode() { + return operatorString.hashCode(); + } + + public boolean equals(Object other) { + if ( ! (other instanceof Operator)) return false; + return other.toString().equals(this.toString()); + } + + public String toString() { + return operatorString; + } + + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeCondition.java new file mode 100644 index 00000000000..e7fd8d599d4 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeCondition.java @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which contains a list of conditions + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class CompositeCondition extends Condition { + + private List<Condition> conditions=new java.util.ArrayList<>(); + + public CompositeCondition() { + } + + public void preMatchHook(RuleEvaluation e) { + super.preMatchHook(e); + if (e.getTraceLevel()>=3) { + e.trace(3,"Evaluating '" + this + "'" + " at " + e.currentItem()); + e.indentTrace(); + } + } + + public void postMatchHook(RuleEvaluation e) { + if (e.getTraceLevel()>=3) { + e.unindentTrace(); + } + } + + protected boolean hasOpenChoicepoint(RuleEvaluation evaluation) { + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition subCondition=i.next(); + if (subCondition.hasOpenChoicepoint(evaluation)) + return true; + } + return false; + } + + public void addCondition(Condition condition) { + conditions.add(condition); + condition.setParent(this); + } + + /** Sets the condition at the given index */ + public void setCondition(int index,Condition condition) { + conditions.set(index,condition); + } + + /** Returns the number of subconditions */ + public int conditionSize() { return conditions.size(); } + + /** + * Returns the condition at the given index + * + * @param i the 0-base index + * @return the condition at this index + * @throws IndexOutOfBoundsException if there is no condition at this index + */ + public Condition getCondition(int i) { + return conditions.get(i); + } + + /** + * Returns the condition at the given index + * + * @param i the 0-base index + * @return the removed condition + * @throws IndexOutOfBoundsException if there is no condition at this index + */ + public Condition removeCondition(int i) { + Condition condition=conditions.remove(i); + condition.setParent(null); + return condition; + } + + /** Returns an iterator of the immediate children of this condition */ + public Iterator<Condition> conditionIterator() { return conditions.iterator(); } + + public void makeReferences(RuleBase rules) { + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition condition=i.next(); + condition.makeReferences(rules); + } + } + + /** Whether this should be output with parentheses, default is parent!=null */ + protected boolean useParentheses() { + return getParent()!=null; + } + + protected String toInnerString(String conditionSeparator) { + if (getLabel()!=null) + return getLabel() + ":(" + conditionsToString(conditionSeparator) + ")"; + else if (useParentheses()) + return "(" + conditionsToString(conditionSeparator) + ")"; + else + return conditionsToString(conditionSeparator); + } + + protected final String conditionsToString(String conditionSeparator) { + StringBuilder buffer=new StringBuilder(); + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(conditionSeparator); + } + return buffer.toString(); + } + + /** Returns whether all the conditions of this matches the current evaluation state */ + protected final boolean allSubConditionsMatches(RuleEvaluation e) { + for (Iterator<Condition> i=conditionIterator(); i.hasNext(); ) { + Condition subCondition=i.next(); + if (!subCondition.matches(e)) + return false; + } + return true; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeItemCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeItemCondition.java new file mode 100644 index 00000000000..18fbbb04412 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/CompositeItemCondition.java @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition on the presense of a particular kind of composite item (possibly also with a particular content) + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + * @since 5.1.15 + */ +public class CompositeItemCondition extends CompositeCondition { + + @Override + protected boolean doesMatch(RuleEvaluation e) { + Choicepoint choicepoint = e.getChoicepoint(this,true); + choicepoint.updateState(); + boolean matches = e.currentItem().getItem().getParent() instanceof PhraseItem + && allSubConditionsMatches(e); + if ( ! matches) + choicepoint.backtrack(); + return matches; + + } + + @Override + protected String toInnerString() { + if (getLabel()!=null) + return getLabel() + ":(" + toInnerStringBody() + ")"; + else if (useParentheses()) + return "(" + toInnerStringBody() + ")"; + else + return toInnerStringBody(); + } + + private String toInnerStringBody() { + return "\"" + conditionsToString(" ") + "\""; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Condition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Condition.java new file mode 100644 index 00000000000..f2029ede6fa --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Condition.java @@ -0,0 +1,255 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.FlattenedItem; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * Superclass of all kinds of conditions of production rules + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class Condition { + + /** The parent of this condition, or null if this is not nested */ + private CompositeCondition parent=null; + + /** + * The label of this condition, or null if none. + * Specified by label:condition + * The label is also the default context is no context is speficied explicitly + */ + private String label=null; + + /** + * The name space refered by this match, or null if the default (query) + * Specified by namespace.condition in rules. + */ + private String nameSpace=null; + + /** + * The name of the context created by this, or null if none + * Specified by context/condition in rules + */ + private String contextName; + + /** Position constraints of the terms matched by this condition */ + private Anchor anchor=Anchor.NONE; + + public static enum Anchor { + NONE, START, END, BOTH; + public static Anchor create(boolean start,boolean end) { + if (start && end) return Anchor.BOTH; + if (start) return Anchor.START; + if (end) return Anchor.END; + return NONE; + } + } + + public Condition() { + this(null,null); + } + + public Condition(String label) { + this(label,null); + } + + public Condition(String label,String context) { + this.label=label; + this.contextName=context; + } + + /** + * Sets the name whatever is matched by this condition can be refered as, or null + * to make it unreferable + */ + public void setContextName(String contextName) { this.contextName=contextName; } + + /** + * Returns the name whatever is matched by this condition can be refered as, or null + * if it is unreferable + */ + public String getContextName() { return contextName; } + + /** Returns whether this is referable, returns context!=null by default */ + protected boolean isReferable() { return contextName!=null; } + + /** Sets the label of this. Set to null to use the default */ + public String getLabel() { return label; } + + /** Returns the label of this, or null if none (the default) */ + public void setLabel(String label) { this.label = label; } + + /** Returns the name of the namespace of this, or null if default (query) */ + public String getNameSpace() { return nameSpace; } + + /** Sets the name of the namespace of this */ + public void setNameSpace(String nameSpace) { this.nameSpace=nameSpace; } + + /** Returns the condition this is nested within, or null if it is not nested */ + public CompositeCondition getParent() { return parent; } + + /** Called by CompositeCondition.addCondition() */ + void setParent(CompositeCondition parent) { this.parent=parent; } + + /** Sets a positional constraint on this condition */ + public void setAnchor(Anchor anchor) { this.anchor=anchor; } + + /** Returns the positional constraint on this anchor. This is never null */ + public Anchor getAnchor() { return anchor; } + + /** + * <p>Returns whether this condition matches the given evaluation + * at the <i>current</i> location of the evaluation. Calls the doesMatch + * method of each condition subtype.</p> + */ + public final boolean matches(RuleEvaluation e) { + // TODO: With this algoritm, each choice point will move to the next choice on each reevaluation + // In the case where there are multiple ellipses, we may want to do globally coordinated + // moves of all the choice points instead + try { + preMatchHook(e); + + if (!matchesStartAnchor(e)) return false; + + String higherLabel=e.getCurrentLabel(); + if (getLabel()!=null) + e.setCurrentLabel(getLabel()); + + boolean matches=doesMatch(e); + while (!matches && hasOpenChoicepoint(e)) { + matches=doesMatch(e); + } + + e.setCurrentLabel(higherLabel); + + if (!matchesEndAnchor(e)) return false; + + traceResult(matches,e); + return matches; + } + finally { + postMatchHook(e); + } + + } + + /** Check start anchor. Trace level 4 if no match */ + protected boolean matchesStartAnchor(RuleEvaluation e) { + if (anchor!=Anchor.START && anchor!=Anchor.BOTH) return true; + if (e.getPosition()==0) return true; + if (e.getTraceLevel()>=4) + e.trace(4,this + " must be at the start, which " + e.currentItem() + " isn't"); + return false; + } + + /** Check start anchor. Trace level 4 if no match */ + protected boolean matchesEndAnchor(RuleEvaluation e) { + if (anchor!=Anchor.END && anchor!=Anchor.BOTH) return true; + if (e.getPosition()>=e.items().size()) return true; + if (e.getTraceLevel()>=4) + e.trace(4,this + " must be at the end, which " + e.currentItem() + " isn't"); + return false; + } + + protected void traceResult(boolean matches,RuleEvaluation e) { + if (matches && e.getTraceLevel()>=3) + e.trace(3,"Matched '" + this + "'" + getMatchInfoString(e) + " at " + e.previousItem()); + if (!matches && e.getTraceLevel()>=4) + e.trace(4,"Did not match '" + this + "' at " + e.currentItem()); + } + + protected String getMatchInfoString(RuleEvaluation e) { + String matchInfo=getMatchInfo(e); + if (matchInfo==null) return ""; + return " as '" + matchInfo + "'"; + } + + /** + * Called when match is called, before anything else. + * Always call super.preMatchHook when overriding. + */ + protected void preMatchHook(RuleEvaluation e) { + e.entering(contextName); + } + + /** + * Called just before match returns, on any return condition including exceptions. + * Always call super.postMatchHook when overriding + */ + protected void postMatchHook(RuleEvaluation e) { + e.leaving(contextName); + } + + /** + * Override this to return a string describing what this condition has matched in this evaluation. + * Will only be called when this condition is actually matched in this condition + * + * @return info about what is matched, or null if there is no info to return (default) + */ + protected String getMatchInfo(RuleEvaluation e) { return null; } + + /** + * Returns whether this condition matches the given evaluation + * at the <i>current</i> location of the evaluation. If there is a + * match, the evaluation must be advanced to the location beyond + * the matching item(s) before this method returns. + */ + protected abstract boolean doesMatch(RuleEvaluation e); + + /** + * Returns whether there is an <i>open choice</i> in this or any of its subconditions. + * Returns false by default, must be overriden by conditions which may generate + * choices open accross multiple calls to matches, or contain such conditions. + */ + protected boolean hasOpenChoicepoint(RuleEvaluation e) { + return false; + } + + /** Override if references needs to be set in this condition of its children */ + public void makeReferences(RuleBase rules) { } + + protected String getLabelString() { + if (label==null) return ""; + return label + ":"; + } + + /** Whether the label matches the current item, true if there is no current item */ + protected boolean labelMatches(RuleEvaluation e) { + FlattenedItem flattenedItem=e.currentItem(); + if (flattenedItem==null) return true; + TermItem item=flattenedItem.getItem(); + if (item==null) return true; + return labelMatches(item,e); + } + + protected boolean labelMatches(TermItem evaluationTerm,RuleEvaluation e) { + String indexName=evaluationTerm.getIndexName(); + String label=getLabel(); + if (label==null) + label=e.getCurrentLabel(); + if ("".equals(indexName) && label==null) return true; + if (indexName.equals(label)) return true; + if (e.getTraceLevel()>=4) + e.trace(4,"'" + this + "' does not match, label of " + e.currentItem() + " was required to be " + label); + return false; + } + + /** All instances of this produces a parseable string output */ + protected abstract String toInnerString(); + + protected boolean isDefaultContextName() { return false; } + + public String toString() { + String contextString=""; + String nameSpaceString=""; + if (contextName!=null && !isDefaultContextName()) + contextString=contextName + "/"; + if (getNameSpace()!=null) + nameSpaceString=getNameSpace() + "."; + return contextString + nameSpaceString + toInnerString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ConditionReference.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ConditionReference.java new file mode 100644 index 00000000000..855a8b802ba --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ConditionReference.java @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.querytransform.PhraseMatcher; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.EvaluationException; +import com.yahoo.prelude.semantics.engine.FlattenedItem; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +import java.util.Map; + +/** + * A reference to a named condition + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ConditionReference extends Condition { + + /** The name of the referenced rule */ + private String conditionName; + + /** + * The actual condition references by this, or null if not initialized or not found, + * or if this is really an automata reference + */ + private NamedCondition namedCondition; + + /** + * True if this condition should be looked up in the automata + * annotations of the item instead of by reference to another item + */ + private boolean automataLookup=false; + + public ConditionReference(String conditionName) { + this(null,conditionName); + } + + public ConditionReference(String label,String conditionName) { + super(label); + Validator.ensureNotNull("Name of referenced condition",conditionName); + this.conditionName=conditionName; + setContextName(conditionName); + } + + /** Returns the name of the referenced rule, never null */ + public String getConditionName() { return conditionName; } + + public void setConditionName(String name) { this.conditionName=name; } + + public boolean doesMatch(RuleEvaluation e) { + if (automataLookup) return automataMatch(e); + + if (namedCondition==null) + throw new EvaluationException("Condition reference '" + conditionName + + "' not found or not initialized"); + + return namedCondition.matches(e); + } + + private boolean automataMatch(RuleEvaluation e) { + FlattenedItem current=e.currentItem(); + if (current==null) return false; + + Object annotation=current.getItem().getAnnotation(conditionName); + if (annotation==null) return false; + if (! (annotation instanceof PhraseMatcher.Phrase)) return false; + + PhraseMatcher.Phrase phrase=(PhraseMatcher.Phrase)annotation; + + Choicepoint choicePoint=e.getChoicepoint(this,true); + boolean matches=automataMatchPhrase(phrase,e); + + if (!matches && e.isInNegation()) { // TODO: Temporary hack! Works for single items only + e.addMatch(current,null); + } + + if ((!matches && !e.isInNegation() || (matches && e.isInNegation()))) + choicePoint.backtrackPosition(); + + return matches; + } + + private boolean automataMatchPhrase(PhraseMatcher.Phrase phrase,RuleEvaluation e) { + for (PhraseMatcher.Phrase.MatchIterator i=phrase.itemIterator(); i.hasNext(); ) { + i.next(); + FlattenedItem current=e.currentItem(); + if (current==null) return false; + if (!labelMatches(e.currentItem().getItem(),e)) return false; + if (!e.isInNegation()) + e.addMatch(current,i.getReplace()); + e.next(); + } + if (phrase.getLength()>phrase.getBackedLength()) return false; // The underlying composite item has changed + return true; + } + + public void makeReferences(RuleBase ruleBase) { + namedCondition=ruleBase.getCondition(conditionName); + if (namedCondition==null) { // Then this may reference some automata value, if we have an automata + if (ruleBase.usesAutomata()) + automataLookup=true; + else + throw new RuleBaseException("Referenced condition '" + conditionName + + "' does not exist in " + ruleBase); + } + } + + protected boolean hasOpenChoicepoint(RuleEvaluation e) { + if (namedCondition==null) return false; + return namedCondition.getCondition().hasOpenChoicepoint(e); + } + + protected boolean isDefaultContextName() { + return getContextName()==null || getContextName().equals(conditionName); + } + + protected String toInnerString() { + return "[" + conditionName + "]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/EllipsisCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/EllipsisCondition.java new file mode 100644 index 00000000000..84a470ff64e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/EllipsisCondition.java @@ -0,0 +1,130 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.FlattenedItem; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which greedily matches anything, represented as "..." + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class EllipsisCondition extends Condition { + + /** Whether this ellipsis is actually referable (enclosed in []) or not */ + private boolean referable; + + /** Creates a referable ellipsis condition with no label */ + public EllipsisCondition() { + this(true); + } + + /** Creates an ellipsis condition with no label */ + public EllipsisCondition(boolean referable) { + this(null,referable); + } + + /** Creates an ellipsis condition */ + public EllipsisCondition(String label,boolean referable) { + super(label); + this.referable=referable; + if (referable) + setContextName("..."); + } + + public EllipsisCondition(String label,String context) { + super(label,context); + } + + public boolean doesMatch(RuleEvaluation e) { + // We use a choice point to remember which untried alternatives are not tried (if any) + // We never need to backtrack to this choice - backtracking is done by the parent + // if this choice gives a global invalid state + Choicepoint choicepoint=e.getChoicepoint(this,false); + if (choicepoint==null) { // First try + choicepoint=e.getChoicepoint(this,true); + } + else { + if (!choicepoint.isOpen()) return false; + } + + // Match all the rest of the items the first time, then all except the last item and so on + int numberOfTermsToMatch=e.itemCount() - e.currentPosition() - choicepoint.tryCount(); + if (numberOfTermsToMatch<0) { + choicepoint.close(); + return false; + } + choicepoint.addTry(); + + String matchedTerms=matchTerms(numberOfTermsToMatch,e); + e.setValue(matchedTerms); + return true; + } + + private String matchTerms(int numberOfTerms,RuleEvaluation e) { + StringBuilder b=new StringBuilder(); + for (int i=0; i<numberOfTerms; i++) { + e.addMatch(e.currentItem(),e.currentItem().getItem().getIndexedString()); + b.append(e.currentItem().getItem().stringValue()); + if (i<(numberOfTerms-1)) + b.append(" "); + e.next(); + } + return b.toString(); + } + + public String getMatchInfo(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,false); + if (choicepoint==null) return null; + + return spaceSeparated(e.items().subList(choicepoint.getState().getPosition(), + e.itemCount() - choicepoint.tryCount() +1 )); + } + + private String spaceSeparated(List<FlattenedItem> items) { + StringBuilder buffer=new StringBuilder(); + for (Iterator<FlattenedItem> i=items.iterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(" "); + } + return buffer.toString(); + } + + /** Returns whether this ellipsis condition can be referred from a production */ + public boolean isReferable() { + return referable || super.isReferable(); + } + + /** Sets whether this ellipsis condition can be referred from a production or not */ + public void setReferable(boolean referable) { + this.referable=referable; + if (referable && getContextName()==null) + setContextName("..."); + if (!referable && "...".equals(getContextName())) + setContextName(null); + } + + protected boolean hasOpenChoicepoint(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,false); + if (choicepoint==null) return false; // Not tried yet + if (!choicepoint.isOpen()) return false; + return true; + } + + protected boolean isDefaultContextName() { + return (getContextName()==null || getContextName().equals("...")); + } + + protected String toInnerString() { + if (referable) + return "[...]"; + else + return "..."; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java new file mode 100644 index 00000000000..3cde8bba5ff --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java @@ -0,0 +1,30 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which is always true, and which has it's own value as return value + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class LiteralCondition extends Condition { + + private String value; + + public LiteralCondition(String value) { + this.value=value; + } + + protected boolean doesMatch(RuleEvaluation e) { + e.setValue(value); + return true; + } + + public void setValue(String value) { this.value=value; } + + public String getValue() { return value; } + + public String toInnerString() { return "'" + value + "'"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java new file mode 100644 index 00000000000..23404fbc6e2 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralPhraseProduction.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import com.yahoo.prelude.query.PhraseItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A literal phrase produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class LiteralPhraseProduction extends TermProduction { + + private List<String> terms=new ArrayList<>(); + + /** Creates a new produced literal phrase */ + public LiteralPhraseProduction() { + super(); + } + + /** + * Creates a new produced literal phrase + * + * @param label the label of the produced term + */ + public LiteralPhraseProduction(String label) { + super(label); + } + + /** Adds a term to this phrase */ + public void addTerm(String term) { + Validator.ensureNotNull("A term in a produced phrase",term); + terms.add(term); + } + + /** Returns a read only view of the terms produced by this, never null */ + public List<String> getTerms() { return Collections.unmodifiableList(terms); } + + public void produce(RuleEvaluation e,int offset) { + PhraseItem newPhrase=new PhraseItem(); + newPhrase.setIndexName(getLabel()); + for (String term : terms) + newPhrase.addItem(new WordItem(term)); + + if (replacing) { + Match matched=e.getNonreferencedMatch(0); + insertMatch(e,matched,newPhrase,offset); + } + else { + newPhrase.setWeight(getWeight()); + if (e.getTraceLevel()>=6) + e.trace(6,"Adding '" + newPhrase + "'"); + e.addItem(newPhrase,getTermType()); + } + } + + public String toInnerTermString() { + return getLabelString() + "\"" + getSpaceSeparated(terms) + "\""; + } + + private String getSpaceSeparated(List<String> terms) { + StringBuilder builder=new StringBuilder(); + for (Iterator<String> i=terms.iterator(); i.hasNext(); ) { + builder.append(i.next()); + if (i.hasNext()) + builder.append(" "); + } + return builder.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java new file mode 100644 index 00000000000..f157fd6901d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralTermProduction.java @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A literal term produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class LiteralTermProduction extends TermProduction { + + private String literal; + + /** + * Creates a new produced literal term + * + * @param literal the label of the condition this should take it's value from + */ + public LiteralTermProduction(String literal) { + super(); + setLiteral(literal); + } + + /** + * Creates a new produced literal term + * + * @param literal the label of the condition this should take it's value from + * @param termType the type of term to produce + */ + public LiteralTermProduction(String literal, TermType termType) { + super(termType); + setLiteral(literal); + } + + /** + * Creates a new produced literal term + * + * @param label the label of the produced term + * @param literal this term word + * @param termType the type of term to produce + */ + public LiteralTermProduction(String label,String literal, TermType termType) { + super(label,termType); + setLiteral(literal); + } + + /** The literal term value, never null */ + public void setLiteral(String literal) { + Validator.ensureNotNull("A produced term",literal); + this.literal=literal; + } + + /** Returns the term word produced, never null */ + public String getLiteral() { return literal; } + + public void produce(RuleEvaluation e,int offset) { + WordItem newItem=new WordItem(literal,getLabel()); + if (replacing) { + Match matched=e.getNonreferencedMatch(0); + insertMatch(e,matched,newItem,offset); + } + else { + newItem.setWeight(getWeight()); + if (e.getTraceLevel()>=6) + e.trace(6,"Adding '" + newItem + "'"); + e.addItem(newItem,getTermType()); + } + } + + public String toInnerTermString() { + return getLabelString() + literal; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java new file mode 100644 index 00000000000..ca1d623847d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java @@ -0,0 +1,57 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition given a name which enables it to be referenced from other conditions. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class NamedCondition { + + private String conditionName; + + private Condition condition; + + public NamedCondition(String name,Condition condition) { + this.conditionName=name; + this.condition=condition; + } + + public String getName() { return conditionName; } + + public void setName(String name) { this.conditionName = name; } + + public Condition getCondition() { return condition; } + + public void setCondition(Condition condition) { this.condition = condition; } + + public boolean matches(RuleEvaluation e) { + if (e.getTraceLevel()>=3) { + e.trace(3,"Evaluating '" + this + "' at " + e.currentItem()); + e.indentTrace(); + } + + boolean matches=condition.matches(e); + + if (e.getTraceLevel()>=3) { + e.unindentTrace(); + if (matches) + e.trace(3,"Matched '" + this + "' at " + e.previousItem()); + else if (e.getTraceLevel()>=4) + e.trace(4,"Did not match '" + this + "' at " + e.currentItem()); + } + return matches; + } + + /** + * Returns the canonical string representation of this named condition. + * This string representation can always be reparsed to produce an + * identical rule to this one. + */ + public String toString() { + return "[" + conditionName + "] :- " + condition.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java new file mode 100644 index 00000000000..0c73427ad82 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java @@ -0,0 +1,56 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A production in a specified namespace + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class NamespaceProduction extends Production { + + /** The label in this namespace */ + private String namespace; + + /** The key ito set in the namespace */ + private String key; + + /** The value to set in the namespace */ + private String value=null; + + /** Creates a produced template term with no label and the default type */ + public NamespaceProduction(String namespace,String key,String value) { + setNamespace(namespace); + this.key=key; + this.value=value; + } + + public String getNamespace() { return namespace; } + + public final void setNamespace(String namespace) { + if (!namespace.equals("parameter")) + throw new RuleBaseException("Can not produce into namespace '" + namespace + + ". Only the 'parameter' name space can be referenced currently"); + this.namespace = namespace; + } + + public String getKey() { return key; } + + public void setKey(String key) { this.key = key; } + + public String getValue() { return value; } + + public void setValue(String value) { this.value = value; } + + public void produce(RuleEvaluation e,int offset) { + e.getEvaluation().getQuery().properties().set(key, value); + } + + /** All instances of this produces a parseable string output */ + public String toInnerString() { + return namespace + "." + key + "='" + value + "'"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NotCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NotCondition.java new file mode 100644 index 00000000000..64a10ea821a --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NotCondition.java @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which matches if its contained condition doesn't. + * NotCondition inverts the term checking but not the label checking. + * That is, it means "label:!term", it does not mean "!label:term". + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon Bratseth</a> + */ +public class NotCondition extends Condition { + + private Condition condition; + + public NotCondition(Condition condition) { + this.condition=condition; + } + + public Condition getCondtiion() { return condition; } + + public void setCondition(Condition condition) { this.condition=condition; } + + protected boolean doesMatch(RuleEvaluation e) { + e.setInNegation(!e.isInNegation()); + boolean matches=!condition.matches(e); + e.setInNegation(!e.isInNegation()); + return matches; + } + + public String toInnerString() { + return "!" + condition; + } + + public void makeReferences(RuleBase ruleBase) { + condition.makeReferences(ruleBase); + } + + protected boolean hasOpenChoicepoint(RuleEvaluation evaluation) { + return condition.hasOpenChoicepoint(evaluation); + } + + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Production.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Production.java new file mode 100644 index 00000000000..cc6a9c87fb0 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/Production.java @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Set; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A new term produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class Production { + + /** True to add, false to replace, default true */ + protected boolean replacing=true; + + /** The (0-base) position of this term in the productions of this rule */ + private int position=0; + + /** The weight (strength) of this production as a percentage (default is 100) */ + private int weight=100; + + /** Creates a produced template term with no label and the default type */ + public Production() { + } + + /** True to replace, false to add, if this production can do both. Default true. */ + public void setReplacing(boolean replacing) { this.replacing=replacing; } + + public int getPosition() { return position; } + + public void setPosition(int position) { this.position = position; } + + /** Sets the weight of this production as a percentage (default is 100) */ + public void setWeight(int weight) { this.weight=weight; } + + /** Returns the weight of this production as a percentage (default is 100) */ + public int getWeight() { return weight; } + + /** + * Produces this at the current match + * + * @param e the evaluation context containing the current match and the query + * @param offset the offset position at which to produce this. Offsets are used to produce multiple items + * at one position, inserted in the right order. + */ + public abstract void produce(RuleEvaluation e,int offset); + + /** + * Called to add the references into the condition of this rule made by this production + * into the given set. The default implementation is void, override for productions + * which refers to the condition + */ + void addMatchReferences(Set<String> matchReferences) { } + + /** All instances of this produces a parseable string output */ + public final String toString() { + return toInnerString() + (getWeight()!=100 ? ("!" + getWeight()) : ""); + } + + /** All instances of this produces a parseable string output */ + protected abstract String toInnerString(); + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionList.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionList.java new file mode 100644 index 00000000000..3397a9ada1e --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionList.java @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A list of the productions of a rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ProductionList { + + private List<Production> productions =new java.util.ArrayList<>(); + + /** True to replace by the production, false to add it */ + private boolean replacing=true; + + public void addProduction(Production term) { + term.setReplacing(replacing); + term.setPosition(productions.size()); + productions.add(term); + } + + /** True to replace, false to add, default true */ + void setReplacing(boolean replacing) { + for (Iterator<Production> i=productions.iterator(); i.hasNext(); ) { + Production production=i.next(); + production.setReplacing(replacing); + } + + this.replacing=replacing; + } + + /** Returns an unmodifiable view of the productions in this */ + public List<Production> productionList() { return Collections.unmodifiableList(productions); } + + public int getTermCount() { return productions.size(); } + + void addMatchReferences(Set<String> matchReferences) { + for (Iterator<Production> i=productions.iterator(); i.hasNext(); ) { + Production term=i.next(); + term.addMatchReferences(matchReferences); + } + } + + public void produce(RuleEvaluation e) { + for (int i=0; i<productions.size(); i++) { + productions.get(i).produce(e,i); + } + } + + public String toString() { + StringBuilder buffer=new StringBuilder(); + for (Iterator<Production> i=productions.iterator(); i.hasNext(); ) { + buffer.append(i.next().toString()); + if (i.hasNext()) + buffer.append(" "); + } + return buffer.toString(); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionRule.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionRule.java new file mode 100644 index 00000000000..55be2aa2afd --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ProductionRule.java @@ -0,0 +1,100 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Collections; +import java.util.Iterator; +import java.util.Set; + +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A query rewriting rule. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class ProductionRule { + + /** What must be true for this rule to be true */ + private Condition condition; + + /** What is produced when this rule is true */ + private ProductionList production=new ProductionList(); + + /** The set of match name Strings which the production part of this rule references */ + private Set<String> matchReferences=new java.util.LinkedHashSet<>(); + + /** Sets what must be true for this rule to be true */ + public void setCondition(Condition condition) { this.condition=condition; } + + public Condition getCondition() { return condition; } + + /** Sets what is produced when this rule is true */ + public void setProduction(ProductionList production) { this.production=production; } + + public ProductionList getProduction() { return production; } + + /** Returns whether this rule matches the given query */ + public boolean matches(RuleEvaluation e) { + e.setMatchReferences(matchReferences); + return condition.matches(e); + } + + /** + * Returns the set of context names the production of this rule references + * + * @return an unmodifiable Set of condition context name Strings + */ + public Set<String> matchReferences() { + return Collections.unmodifiableSet(matchReferences); + } + + public void makeReferences(RuleBase rules) { + condition.makeReferences(rules); + production.addMatchReferences(matchReferences); + } + + /** Carries out the production of this rule */ + public void produce(RuleEvaluation e) { + production.produce(e); + } + + /** + * Returns the canonical string representation of this rule. + * This string representation can always be reparsed to produce an + * identical rule to this one. + */ + public String toString() { + return condition.toString() + " " + getSymbol() + " " + production.toString(); + } + + /** + * Returns the symbol of this production rule. + * All rules are on the form <code>condition symbol production</code>. + */ + protected abstract String getSymbol(); + + /** + * Returns true if it is known that this rule matches its own output. + * If it does, it will only be evaluated once, to avoid infinite loops. + * This default implementation returns false; + */ + public boolean isLoop() { + // TODO: There are many more possible loops, we should probably detect + // a few more obvious ones + if (conditionIsEllipsAndOtherNameSpacesOnly(getCondition())) return true; + return false; + } + + private boolean conditionIsEllipsAndOtherNameSpacesOnly(Condition condition) { + if (condition instanceof EllipsisCondition) return true; + if (! (condition instanceof CompositeCondition)) return false; + for (Iterator<Condition> i=((CompositeCondition)condition).conditionIterator(); i.hasNext(); ) { + Condition child= i.next(); + if (child.getNameSpace()==null && conditionIsEllipsAndOtherNameSpacesOnly(child)) + return true; + } + return false; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java new file mode 100644 index 00000000000..319e1969174 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java @@ -0,0 +1,110 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Set; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.engine.EvaluationException; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.ReferencedMatches; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A term produced by a production rule which takes it's actual term value + * from one or more terms matched in the condition + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReferenceTermProduction extends TermProduction { + + private String reference; + + /** + * Creates a new produced reference term + * + * @param reference the label of the condition this should take it's value from + */ + public ReferenceTermProduction(String reference) { + super(); + setReference(reference); + } + + /** + * Creates a new produced reference term + * + * @param reference the label of the condition this should take it's value from + * @param termType the type of the term to produce + */ + public ReferenceTermProduction(String reference, TermType termType) { + super(termType); + setReference(reference); + } + + /** + * Creates a new produced reference term + * + * @param label the label of the produced term + * @param reference the label of the condition this should take it's value from + */ + public ReferenceTermProduction(String label,String reference) { + super(label); + setReference(reference); + } + + /** + * Creates a new produced reference term + * + * @param label the label of the produced term + * @param reference the label of the condition this should take it's value from + * @param termType the type of term to produce + */ + public ReferenceTermProduction(String label,String reference, TermType termType) { + super(label,termType); + setReference(reference); + } + + /** The label of the condition this should take its value from, never null */ + public void setReference(String reference) { + Validator.ensureNotNull("reference name of a produced reference term",reference); + this.reference =reference; + } + + /** Returns the label of the condition this should take its value from, never null */ + public String getReference() { return reference; } + + void addMatchReferences(Set<String> matchReferences) { + matchReferences.add(reference); + } + + public void produce(RuleEvaluation e,int offset) { + ReferencedMatches referencedMatches=e.getReferencedMatches(reference); + if (referencedMatches==null) + throw new EvaluationException("Referred match '" + reference + "' not found"); + if (replacing) { + replaceMatches(e,referencedMatches); + } + else { + addMatches(e,referencedMatches); + } + } + + public void replaceMatches(RuleEvaluation e,ReferencedMatches referencedMatches) { + Item referencedItem=referencedMatches.toItem(getLabel()); + if (referencedItem==null) return; + e.removeMatches(referencedMatches); + insertMatch(e, referencedMatches.matchIterator().next(),referencedItem,0); + } + + private void addMatches(RuleEvaluation e,ReferencedMatches referencedMatches) { + Item referencedItem=referencedMatches.toItem(getLabel()); + if (referencedItem==null) return; + e.addItem(referencedItem,getTermType()); + } + + public String toInnerTermString() { + return getLabelString() + "[" + reference + "]"; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReplacingProductionRule.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReplacingProductionRule.java new file mode 100644 index 00000000000..76433ec693c --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReplacingProductionRule.java @@ -0,0 +1,41 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A production rule which <i>replaces</i> matched terms by the production + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class ReplacingProductionRule extends ProductionRule { + + /** Carries out the production of this rule */ + public void produce(RuleEvaluation e) { + removeNonreferencedMatches(e); + if (e.getTraceLevel()>=5) { + e.trace(5,"Removed terms to get '" + e.getEvaluation().getQuery().getModel().getQueryTree().getRoot() + "', will add terms"); + } + super.produce(e); + } + + /** Remove items until there's only one item left */ + private void removeNonreferencedMatches(RuleEvaluation e) { + int itemCount=e.getEvaluation().getQuerySize(); + + // Remove items backwards to ease index handling + for (int i=e.getNonreferencedMatchCount()-1; i>=0; i--) { + // Ensure we don't produce an empty query + if (getProduction().getTermCount()==0 && itemCount==1) + break; + itemCount--; + + Match match=e.getNonreferencedMatch(i); + match.getItem().getParent().removeItem(match.getPosition()); + } + } + + protected String getSymbol() { return "->"; } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SequenceCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SequenceCondition.java new file mode 100644 index 00000000000..3ba929da021 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SequenceCondition.java @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import java.util.Iterator; + +import com.yahoo.prelude.semantics.engine.Choicepoint; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A set of conditions which much match the query in sequence + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class SequenceCondition extends CompositeCondition { + + public SequenceCondition() { + } + + public boolean doesMatch(RuleEvaluation e) { + Choicepoint choicepoint=e.getChoicepoint(this,true); + choicepoint.updateState(); + boolean matches=allSubConditionsMatches(e); + if (!matches) + choicepoint.backtrack(); + return matches; + } + + protected boolean useParentheses() { + return (getParent()!=null + && ! (getParent() instanceof ChoiceCondition)); + } + + public String toInnerString() { + return toInnerString(" "); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SuperCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SuperCondition.java new file mode 100644 index 00000000000..0b7b3b4a30b --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/SuperCondition.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A condition which evaluates the <i>last included</i> version of + * the named condition this is a premise of. + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class SuperCondition extends Condition { + + private Condition condition; + + public void setCondition(Condition condition) { + this.condition=condition; + } + + public Condition getCondition() { + return condition; + } + + public boolean doesMatch(RuleEvaluation e) { + return condition.matches(e); + } + + public String toInnerString() { + if (condition==null) + return "@super"; + else + return condition.toString(); + } + + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java new file mode 100644 index 00000000000..3558ef2b227 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.TermItem; +import com.yahoo.prelude.semantics.engine.NameSpace; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; + +/** + * A term in a rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public class TermCondition extends Condition { + + private String term,termPlusS; + + /** Creates an invalid term */ + public TermCondition() { } + + public TermCondition(String term) { + this(null,term); + } + + public TermCondition(String label,String term) { + super(label); + this.term=term; + termPlusS=term + "s"; + } + + public String getTerm() { return term; } + + public void setTerm(String term) { + this.term=term; + termPlusS=term + "s"; + } + + protected boolean doesMatch(RuleEvaluation e) { + // TODO: Move this into the respective namespaces when query becomes one */ + if (getNameSpace()!=null) { + NameSpace nameSpace=e.getEvaluation().getNameSpace(getNameSpace()); + return nameSpace.matches(term,e); + } + else { + if (e.currentItem()==null) + return false; + + if (!labelMatches(e)) return false; + + String matchedValue=termMatches(e.currentItem().getItem(),e.getEvaluation().getStemming()); + boolean matches=matchedValue!=null && labelMatches(e.currentItem().getItem(),e); + if ((matches && !e.isInNegation() || (!matches && e.isInNegation()))) { + e.addMatch(e.currentItem(),matchedValue); + e.setValue(term); + e.next(); + } + return matches; + } + } + + /** Returns a non-null replacement term if there is a match, null otherwise */ + private String termMatches(TermItem queryTerm,boolean stemming){ + String queryTermString=queryTerm.stringValue(); + + // The terms are the same + boolean matches=queryTermString.equals(term); + if (matches) return term; + + if (stemming) + if (termMatchesWithStemming(queryTermString)) return term; + + return null; + } + + private boolean termMatchesWithStemming(String queryTermString) { + if (queryTermString.length()<3) return false; // Don't stem very short terms + + // The query term minus s is the same + boolean matches=queryTermString.equals(termPlusS); + if (matches) return true; + + // The query term plus s is the same + matches=term.equals(queryTermString + "s"); + if (matches) return true; + + return false; + } + + public String toInnerString() { + return getLabelString() + term; + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermProduction.java new file mode 100644 index 00000000000..6490d21e319 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermProduction.java @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.rule; + +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.TermType; +import com.yahoo.prelude.semantics.engine.Match; +import com.yahoo.prelude.semantics.engine.RuleEvaluation; +import com.yahoo.protect.Validator; + +/** + * A new term produced by a production rule + * + * @author <a href="mailto:bratseth@yahoo-inc.com">Jon S Bratseth</a> + */ +public abstract class TermProduction extends Production { + + /** The label of this term, or null if none */ + private String label=null; + + /** The type of term to produce */ + private TermType termType; + + /** Creates a produced template term with no label and the default type */ + public TermProduction() { + this(null,TermType.DEFAULT); + } + + /** Creates a produced template term with the default term type */ + public TermProduction(String label) { + this(label,TermType.DEFAULT); + } + + /** Creates a produced template term with no label */ + public TermProduction(TermType termType) { + this(null,termType); + } + + public TermProduction(String label, TermType termType) { + this.label=label; + setTermType(termType); + } + + /** Sets the label of this. Set to null to use the default */ + public String getLabel() { return label; } + + /** Returns the label of this, or null if none (the default) */ + public void setLabel(String label) { this.label = label; } + + /** Returns the type of term to produce, never null. Default is DEFAULT */ + public TermType getTermType() { return termType; } + + /** Sets the term type to produce */ + public void setTermType(TermType termType) { + Validator.ensureNotNull("Type of produced Term",termType); + this.termType=termType; + } + + /** + * Inserts newItem at the position of this match + * TODO: Move to ruleevaluation + */ + protected void insertMatch(RuleEvaluation e,Match matched, Item newItem,int offset) { + newItem.setWeight(getWeight()); + int insertPosition=matched.getPosition()+offset; + + // This check is necessary (?) because earlier items may have been removed + // after we recorded the match position. It is sort of hackish. A cleaner + // solution would be to update the match position on changes + if (insertPosition>matched.getParent().getItemCount()) { + insertPosition=matched.getParent().getItemCount(); + } + + e.insertItem(newItem,matched.getParent(),insertPosition,getTermType()); + if (e.getTraceLevel()>=6) + e.trace(6,"Inserted item '" + newItem + "' at position " + insertPosition + " producing " + e.getEvaluation().getQuery().getModel().getQueryTree()); + } + + protected String getLabelString() { + if (label==null) return ""; + return label + ":"; + } + + /** All instances of this produces a parseable string output */ + public final String toInnerString() { + if (termType==null) + return toInnerTermString(); + else + return termType.toSign() + toInnerTermString(); + } + + protected abstract String toInnerTermString(); + +} |