From 0d095ccb083e66c99701bf0e2186cd0913227b58 Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Mon, 10 Jan 2022 19:24:03 +0100 Subject: Stem by linguistics in rule bases Also add a @language directive to stem in other languages than english. --- .../java/com/yahoo/prelude/semantics/RuleBase.java | 61 +++++++------- .../com/yahoo/prelude/semantics/RuleImporter.java | 94 ++++++++-------------- .../yahoo/prelude/semantics/SemanticSearcher.java | 15 ++-- .../semantics/benchmark/RuleBaseBenchmark.java | 7 +- .../semantics/engine/RuleBaseLinguistics.java | 54 +++++++++++++ .../yahoo/prelude/semantics/engine/RuleEngine.java | 3 +- .../prelude/semantics/rule/LiteralCondition.java | 2 +- .../prelude/semantics/rule/NamedCondition.java | 12 +-- .../semantics/rule/NamespaceProduction.java | 10 +-- .../semantics/rule/ReferenceTermProduction.java | 2 +- .../prelude/semantics/rule/TermCondition.java | 52 ++++-------- 11 files changed, 165 insertions(+), 147 deletions(-) create mode 100644 container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleBaseLinguistics.java (limited to 'container-search/src/main/java/com/yahoo/prelude') diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java index 2b8515b6db8..8e137d99951 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java @@ -1,19 +1,34 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.semantics; +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.StemMode; +import com.yahoo.prelude.semantics.engine.RuleBaseLinguistics; +import com.yahoo.prelude.semantics.rule.CompositeCondition; +import com.yahoo.prelude.semantics.rule.Condition; +import com.yahoo.prelude.semantics.rule.NamedCondition; +import com.yahoo.prelude.semantics.rule.ProductionRule; +import com.yahoo.prelude.semantics.rule.SuperCondition; import com.yahoo.search.Query; import com.yahoo.prelude.querytransform.PhraseMatcher; import com.yahoo.prelude.semantics.engine.RuleEngine; import com.yahoo.prelude.semantics.parser.ParseException; -import com.yahoo.prelude.semantics.rule.*; import com.yahoo.protect.Validator; import java.io.File; -import java.util.*; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; /** - * A set of semantic production rules and named conditions used to analyze - * and rewrite queries + * A set of semantic production rules and named conditions used to analyze and rewrite queries * * @author bratseth */ @@ -26,7 +41,7 @@ public class RuleBase { private String source; /** The name of the automata file used, or null if none */ - protected String automataFileName = null; + private String automataFileName = null; /** * True if this rule base is default. @@ -61,29 +76,26 @@ public class RuleBase { */ private boolean usesAutomata = false; - /** Should we allow stemmed matches? */ - private boolean stemming = true; - - /** Creates an empty rule base. TODO: Disallow */ - public RuleBase() { - } + private RuleBaseLinguistics linguistics; /** Creates an empty rule base */ - public RuleBase(String name) { - setName(name); + public RuleBase(String name, Linguistics linguistics) { + this.name = name; + this.linguistics = new RuleBaseLinguistics(StemMode.BEST, Language.ENGLISH, linguistics); } /** - * Creates a rule base from a file + * Creates a rule base from file * - * @param ruleFile the rule file to read. The name of the file (minus path) becomes the rule base name + * @param ruleFile the rule file to read. The name of the file (minus path) becomes the rule base name. * @param automataFile the automata file, or null to not use an automata * @throws java.io.IOException if there is a problem reading one of the files * @throws ParseException if the rule file can not be parsed correctly * @throws RuleBaseException if the rule file contains inconsistencies */ - public static RuleBase createFromFile(String ruleFile, String automataFile) throws java.io.IOException, ParseException { - return new RuleImporter().importFile(ruleFile, automataFile); + public static RuleBase createFromFile(String ruleFile, String automataFile, Linguistics linguistics) + throws java.io.IOException, ParseException { + return new RuleImporter(linguistics).importFile(ruleFile, automataFile); } /** @@ -96,18 +108,13 @@ public class RuleBase { * @throws com.yahoo.prelude.semantics.parser.ParseException if the rule file can not be parsed correctly * @throws com.yahoo.prelude.semantics.RuleBaseException if the rule file contains inconsistencies */ - public static RuleBase createFromString(String name, String ruleString, String automataFile) throws java.io.IOException, ParseException { - RuleBase base = new RuleImporter().importString(ruleString, automataFile, new RuleBase()); + public static RuleBase createFromString(String name, String ruleString, String automataFile, Linguistics linguistics) + throws java.io.IOException, ParseException { + RuleBase base = new RuleImporter(linguistics).importString(ruleString, automataFile); base.setName(name); return base; } - /** Set to true to enable stemmed matches. True by default */ - public void setStemming(boolean stemming) { this.stemming = stemming; } - - /** Returns whether stemmed matches are allowed. True by default */ - public boolean getStemming() { return stemming; } - /** *

Include another rule base into this. This transfers ownership * of the given rule base - it can not be subsequently used for any purpose @@ -171,7 +178,7 @@ public class RuleBase { resolveSuper(condition, superCondition); } - private void resolveSuper(Condition condition,Condition superCondition) { + private void resolveSuper(Condition condition, Condition superCondition) { if (condition instanceof SuperCondition) { ((SuperCondition)condition).setCondition(superCondition); } @@ -336,7 +343,7 @@ public class RuleBase { // TODO: Values are not added right now protected void annotatePhrase(PhraseMatcher.Phrase phrase,Query query,int traceLevel) { - for (StringTokenizer tokens = new StringTokenizer(phrase.getData(),"|",false) ; tokens.hasMoreTokens(); ) { + for (StringTokenizer tokens = new StringTokenizer(phrase.getData(), "|", false); tokens.hasMoreTokens(); ) { String token = tokens.nextToken(); int semicolonIndex = token.indexOf(";"); String annotation = token; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java index 45569050882..acbf9a7ffb6 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java @@ -10,8 +10,9 @@ import java.util.Arrays; import java.util.List; import com.yahoo.io.IOUtils; -import com.yahoo.io.reader.NamedReader; -import com.yahoo.prelude.semantics.parser.*; +import com.yahoo.language.Linguistics; +import com.yahoo.prelude.semantics.parser.ParseException; +import com.yahoo.prelude.semantics.parser.SemanticsParser; /** * Imports rule bases from various sources. @@ -24,51 +25,47 @@ import com.yahoo.prelude.semantics.parser.*; // rule bases included into others, while neither the rule base or the parser knows. public class RuleImporter { - /** - * If this is set, imported rule bases are looked up in this config - * otherwise, they are looked up as files - */ - private SemanticRulesConfig config; + /** If this is set, imported rule bases are looked up in this config otherwise, they are looked up as files. */ + private final SemanticRulesConfig config; - /** - * Ignore requests to read automata files. - * Useful to validate rule bases without having automatas present - */ - private boolean ignoreAutomatas; + /** Ignore requests to read automata files. Useful to validate rule bases without having automatas present. */ + private final boolean ignoreAutomatas; - /** - * Ignore requests to include files. - * Useful to validate rule bases one by one in config - */ - private boolean ignoreIncludes = false; + /** Ignore requests to include files. Useful to validate rule bases one by one in config. */ + private final boolean ignoreIncludes; + + private Linguistics linguistics; /** Create a rule importer which will read from file */ - public RuleImporter() { - this(null, false); + public RuleImporter(Linguistics linguistics) { + this(null, false, linguistics); } /** Create a rule importer which will read from a config object */ - public RuleImporter(SemanticRulesConfig config) { - this(config, false); + public RuleImporter(SemanticRulesConfig config, Linguistics linguistics) { + this(config, false, linguistics); } - public RuleImporter(boolean ignoreAutomatas) { - this(null, ignoreAutomatas); + public RuleImporter(boolean ignoreAutomatas, Linguistics linguistics) { + this(null, ignoreAutomatas, linguistics); } - public RuleImporter(boolean ignoreAutomatas, boolean ignoreIncludes) { - this(null, ignoreAutomatas, ignoreIncludes); + public RuleImporter(boolean ignoreAutomatas, boolean ignoreIncludes, Linguistics linguistics) { + this(null, ignoreAutomatas, ignoreIncludes, linguistics); } - public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas) { - this.config = config; - this.ignoreAutomatas = ignoreAutomatas; + public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas, Linguistics linguistics) { + this(config, ignoreAutomatas, false, linguistics); } - public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas, boolean ignoreIncludes) { + public RuleImporter(SemanticRulesConfig config, + boolean ignoreAutomatas, + boolean ignoreIncludes, + Linguistics linguistics) { this.config = config; this.ignoreAutomatas = ignoreAutomatas; this.ignoreIncludes = ignoreIncludes; + this.linguistics = linguistics; } /** @@ -91,33 +88,18 @@ public class RuleImporter { * @throws ParseException if the file does not contain a valid semantic rule set */ public RuleBase importFile(String fileName, String automataFile) throws IOException, ParseException { - return importFile(fileName, automataFile, null); - } - - /** - * Imports semantic rules from a file - * - * @param fileName the rule file to use - * @param automataFile the automata file to use, or null to not use any - * @param ruleBase an existing rule base to import these rules into, or null to create a new - * @throws java.io.IOException if the file can not be read for some reason - * @throws ParseException if the file does not contain a valid semantic rule set - */ - public RuleBase importFile(String fileName, String automataFile, RuleBase ruleBase) throws IOException, ParseException { - ruleBase = privateImportFile(fileName, automataFile, ruleBase); + var ruleBase = privateImportFile(fileName, automataFile); ruleBase.initialize(); return ruleBase; } - public RuleBase privateImportFile(String fileName, String automataFile, RuleBase ruleBase) throws IOException, ParseException { + public RuleBase privateImportFile(String fileName, String automataFile) throws IOException, ParseException { BufferedReader reader = null; try { reader = IOUtils.createReader(fileName, "utf-8"); File file = new File(fileName); String absoluteFileName = file.getAbsolutePath(); - if (ruleBase == null) - ruleBase = new RuleBase(); - ruleBase.setName(stripLastName(file.getName())); + var ruleBase = new RuleBase(stripLastName(file.getName()), linguistics); privateImportFromReader(reader, absoluteFileName, automataFile, ruleBase); return ruleBase; } @@ -157,18 +139,17 @@ public class RuleImporter { /** Returns an unitialized rule base */ private RuleBase privateImportFromDirectory(String ruleBaseName, RuleBase ruleBase) throws IOException, ParseException { - RuleBase include = new RuleBase(); String includeDir = new File(ruleBase.getSource()).getParentFile().getAbsolutePath(); if (!ruleBaseName.endsWith(".sr")) ruleBaseName = ruleBaseName + ".sr"; File importFile = new File(includeDir, ruleBaseName); if ( ! importFile.exists()) throw new IOException("No file named '" + shortenPath(importFile.getPath()) + "'"); - return privateImportFile(importFile.getPath(), null, include); + return privateImportFile(importFile.getPath(), null); } /** Returns an unitialized rule base */ - private RuleBase privateImportFromConfig(String ruleBaseName) throws IOException, ParseException { + private RuleBase privateImportFromConfig(String ruleBaseName) throws ParseException { SemanticRulesConfig.Rulebase ruleBaseConfig = findRuleBaseConfig(config,ruleBaseName); if (ruleBaseConfig == null) ruleBaseConfig = findRuleBaseConfig(config, stripLastName(ruleBaseName)); @@ -224,8 +205,7 @@ public class RuleImporter { /** Imports an unitialized rule base */ public RuleBase privateImportConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws ParseException { if (config == null) throw new IllegalStateException("Must initialize with config if importing from config"); - RuleBase ruleBase = new RuleBase(); - ruleBase.setName(ruleBaseConfig.name()); + RuleBase ruleBase = new RuleBase(ruleBaseConfig.name(), linguistics); return privateImportFromReader(new StringReader(ruleBaseConfig.rules()), "semantic-rules.cfg", ruleBaseConfig.automata(),ruleBase); @@ -253,14 +233,10 @@ public class RuleImporter { /** Returns an unitialized rule base */ public RuleBase privateImportFromReader(Reader reader, String sourceName, String automataFile, RuleBase ruleBase) throws ParseException { try { - if (ruleBase == null) { - ruleBase = new RuleBase(); - if (sourceName == null) - sourceName = "anonymous"; - ruleBase.setName(sourceName); - } + if (ruleBase == null) + ruleBase = new RuleBase(sourceName == null ? "anonymous" : sourceName, linguistics); ruleBase.setSource(sourceName.replace('\\', '/')); - new SemanticsParser(reader).semanticRules(ruleBase, this); + new SemanticsParser(reader, linguistics).semanticRules(ruleBase, this); if (automataFile != null && !automataFile.isEmpty()) ruleBase.setAutomataFile(automataFile.replace('\\', '/')); return ruleBase; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java index f9d968a3a4d..a8167fd2001 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java @@ -4,6 +4,7 @@ package com.yahoo.prelude.semantics; import com.google.inject.Inject; import com.yahoo.component.chain.dependencies.After; import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.language.Linguistics; import com.yahoo.prelude.ConfigurationException; import com.yahoo.search.Query; import com.yahoo.search.Result; @@ -13,7 +14,9 @@ import com.yahoo.search.result.ErrorMessage; import com.yahoo.search.searchchain.Execution; import com.yahoo.search.searchchain.PhaseNames; -import java.util.*; +import java.util.Arrays; +import java.util.List; +import java.util.Map; import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; @@ -38,7 +41,7 @@ public class SemanticSearcher extends Searcher { /** Creates a semantic searcher using the given default rule base */ public SemanticSearcher(RuleBase ruleBase) { - this(Collections.singletonList(ruleBase)); + this(List.of(ruleBase)); defaultRuleBase = ruleBase; } @@ -47,8 +50,8 @@ public class SemanticSearcher extends Searcher { } @Inject - public SemanticSearcher(SemanticRulesConfig config) { - this(toList(config)); + public SemanticSearcher(SemanticRulesConfig config, Linguistics linguistics) { + this(toList(config, linguistics)); } public SemanticSearcher(List ruleBases) { @@ -59,9 +62,9 @@ public class SemanticSearcher extends Searcher { } } - private static List toList(SemanticRulesConfig config) { + private static List toList(SemanticRulesConfig config, Linguistics linguistics) { try { - RuleImporter ruleImporter = new RuleImporter(config); + RuleImporter ruleImporter = new RuleImporter(config, linguistics); List ruleBaseList = new java.util.ArrayList<>(); for (SemanticRulesConfig.Rulebase ruleBaseConfig : config.rulebase()) { RuleBase ruleBase = ruleImporter.importConfig(ruleBaseConfig); diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java index 938d12b271b..75b6e831983 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java @@ -9,6 +9,7 @@ import java.util.ArrayList; import java.util.Date; import java.util.Iterator; +import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.search.Query; import com.yahoo.prelude.semantics.RuleBase; import com.yahoo.prelude.semantics.RuleImporter; @@ -27,7 +28,7 @@ public class RuleBaseBenchmark { fsaFile = null; } } - RuleBase ruleBase = new RuleImporter().importFile(ruleBaseFile,fsaFile); + RuleBase ruleBase = new RuleImporter(new SimpleLinguistics()).importFile(ruleBaseFile, fsaFile); ArrayList queries = new ArrayList<>(); BufferedReader reader = new BufferedReader(new FileReader(queryFile)); String line; @@ -35,7 +36,7 @@ public class RuleBaseBenchmark { queries.add(line); } Date start = new Date(); - for (int i=0;i iter = queries.iterator(); iter.hasNext(); ){ String queryString = iter.next(); Query query = new Query("?query="+queryString); @@ -43,7 +44,7 @@ public class RuleBaseBenchmark { } } Date end = new Date(); - long elapsed = end.getTime()-start.getTime(); + long elapsed = end.getTime() - start.getTime(); System.out.print("BENCHMARK: rulebase=" + ruleBaseFile + "\n fsa=" + fsaFile + "\n queries=" + queryFile + diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleBaseLinguistics.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleBaseLinguistics.java new file mode 100644 index 00000000000..c5519632d6d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleBaseLinguistics.java @@ -0,0 +1,54 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.StemList; +import com.yahoo.language.process.StemMode; + +import java.util.List; +import java.util.Objects; + +/** + * Linguistics for a rule base + * + * @author bratseth + */ +public class RuleBaseLinguistics { + + private final StemMode stemMode; + private final Language language; + private final Linguistics linguistics; + + /** Creates a rule base with default settings */ + public RuleBaseLinguistics(Linguistics linguistics) { + this(StemMode.BEST, Language.ENGLISH, linguistics); + } + + + public RuleBaseLinguistics(StemMode stemMode, Language language, Linguistics linguistics) { + this.stemMode = Objects.requireNonNull(stemMode); + this.language = Objects.requireNonNull(language); + this.linguistics = Objects.requireNonNull(linguistics); + } + + public RuleBaseLinguistics withStemMode(StemMode stemMode) { + return new RuleBaseLinguistics(stemMode, language, linguistics); + } + + public RuleBaseLinguistics withLanguage(Language language) { + return new RuleBaseLinguistics(stemMode, language, linguistics); + } + + public Linguistics linguistics() { return linguistics; } + + /** Processes this term according to the linguistics of this rule base */ + public String process(String term) { + if (stemMode == StemMode.NONE) return term; + List stems = linguistics.getStemmer().stem(term, StemMode.BEST, language); + if (stems.isEmpty()) return term; + if (stems.get(0).isEmpty()) return term; + return stems.get(0).get(0); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java index e7ed05730cb..dd6610d1184 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java @@ -17,7 +17,7 @@ import java.util.ListIterator; */ public class RuleEngine { - private RuleBase rules; + private final RuleBase rules; public RuleEngine(RuleBase rules) { this.rules=rules; @@ -38,7 +38,6 @@ public class RuleEngine { boolean matchedAnything = false; Evaluation evaluation = new Evaluation(query, traceLevel); - evaluation.setStemming(rules.getStemming()); if (traceLevel >= 2) evaluation.trace(2,"Evaluating query '" + evaluation.getQuery().getModel().getQueryTree().getRoot() + "':"); for (ListIterator i = rules.ruleIterator(); i.hasNext(); ) { diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java index 42bf0560726..b85dd892047 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java @@ -4,7 +4,7 @@ package com.yahoo.prelude.semantics.rule; import com.yahoo.prelude.semantics.engine.RuleEvaluation; /** - * A condition which is always true, and which has it's own value as return value + * A condition which is always true, and which has its own value as return value * * @author bratseth */ diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java index b2592a36353..a267d274d5a 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java @@ -14,9 +14,9 @@ public class NamedCondition { private Condition condition; - public NamedCondition(String name,Condition condition) { - this.conditionName=name; - this.condition=condition; + public NamedCondition(String name, Condition condition) { + this.conditionName = name; + this.condition = condition; } public String getName() { return conditionName; } @@ -28,18 +28,18 @@ public class NamedCondition { public void setCondition(Condition condition) { this.condition = condition; } public boolean matches(RuleEvaluation e) { - if (e.getTraceLevel()>=3) { + if (e.getTraceLevel() >= 3) { e.trace(3,"Evaluating '" + this + "' at " + e.currentItem()); e.indentTrace(); } boolean matches=condition.matches(e); - if (e.getTraceLevel()>=3) { + if (e.getTraceLevel() >= 3) { e.unindentTrace(); if (matches) e.trace(3,"Matched '" + this + "' at " + e.previousItem()); - else if (e.getTraceLevel()>=4) + else if (e.getTraceLevel() >= 4) e.trace(4,"Did not match '" + this + "' at " + e.currentItem()); } return matches; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java index 099a8562ece..e6f32a83dd9 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java @@ -18,13 +18,13 @@ public class NamespaceProduction extends Production { private String key; /** The value to set in the namespace */ - private String value=null; + private String value; /** Creates a produced template term with no label and the default type */ - public NamespaceProduction(String namespace,String key,String value) { + public NamespaceProduction(String namespace, String key, String value) { setNamespace(namespace); - this.key=key; - this.value=value; + this.key = key; + this.value = value; } public String getNamespace() { return namespace; } @@ -44,7 +44,7 @@ public class NamespaceProduction extends Production { public void setValue(String value) { this.value = value; } - public void produce(RuleEvaluation e,int offset) { + public void produce(RuleEvaluation e, int offset) { e.getEvaluation().getQuery().properties().set(key, value); } diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java index b36744dc397..af7abf325e7 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java @@ -12,7 +12,7 @@ import com.yahoo.prelude.semantics.engine.RuleEvaluation; import com.yahoo.protect.Validator; /** - * A term produced by a production rule which takes it's actual term value + * A term produced by a production rule which takes its actual term value * from one or more terms matched in the condition * * @author bratseth diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java index 38d1fc9b83b..6e2d3de7d08 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java @@ -3,6 +3,7 @@ package com.yahoo.prelude.semantics.rule; import com.yahoo.prelude.query.TermItem; import com.yahoo.prelude.semantics.engine.NameSpace; +import com.yahoo.prelude.semantics.engine.RuleBaseLinguistics; import com.yahoo.prelude.semantics.engine.RuleEvaluation; /** @@ -12,39 +13,38 @@ import com.yahoo.prelude.semantics.engine.RuleEvaluation; */ public class TermCondition extends Condition { - private String term, termPlusS; + private final RuleBaseLinguistics linguistics; + private String originalTerm; + private String term; - /** Creates an invalid term */ - public TermCondition() { } - - public TermCondition(String term) { - this(null,term); + public TermCondition(String term, RuleBaseLinguistics linguistics) { + this(null, term, linguistics); } - public TermCondition(String label, String term) { + public TermCondition(String label, String term, RuleBaseLinguistics linguistics) { super(label); - this.term = term; - termPlusS = term + "s"; + this.linguistics = linguistics; + this.originalTerm = term; + this.term = linguistics.process(term); } public String getTerm() { return term; } public void setTerm(String term) { this.term = term; - termPlusS = term + "s"; } protected boolean doesMatch(RuleEvaluation e) { // TODO: Move this into the respective namespaces when query becomes one */ if (getNameSpace() != null) { NameSpace nameSpace = e.getEvaluation().getNameSpace(getNameSpace()); - return nameSpace.matches(term, e); + return nameSpace.matches(originalTerm, e); // No processing of terms in namespaces } else { if (e.currentItem() == null) return false; if ( ! labelMatches(e)) return false; - String matchedValue = termMatches(e.currentItem().getItem(), e.getEvaluation().getStemming()); + String matchedValue = termMatches(e.currentItem().getItem()); boolean matches = matchedValue!=null && labelMatches(e.currentItem().getItem(), e); if ((matches && !e.isInNegation() || (!matches && e.isInNegation()))) { e.addMatch(e.currentItem(), matchedValue); @@ -56,31 +56,9 @@ public class TermCondition extends Condition { } /** Returns a non-null replacement term if there is a match, null otherwise */ - private String termMatches(TermItem queryTerm, boolean stemming) { - String queryTermString = queryTerm.stringValue(); - - // The terms are the same - boolean matches = queryTermString.equals(term); - if (matches) return term; - - if (stemming) - if (termMatchesWithStemming(queryTermString)) return term; - - return null; - } - - private boolean termMatchesWithStemming(String queryTermString) { - if (queryTermString.length() < 3) return false; // Don't stem very short terms - - // The query term minus s is the same - boolean matches = queryTermString.equals(termPlusS); - if (matches) return true; - - // The query term plus s is the same - matches = term.equals(queryTermString + "s"); - if (matches) return true; - - return false; + private String termMatches(TermItem queryTerm) { + boolean matches = linguistics.process(queryTerm.stringValue()).equals(term); + return matches ? term : null; } public String toInnerString() { -- cgit v1.2.3