diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-01-10 19:24:03 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2022-01-10 19:24:03 +0100 |
commit | 0d095ccb083e66c99701bf0e2186cd0913227b58 (patch) | |
tree | 920508b9106035a9a26cb2f1be6badc2fb1c417f /container-search | |
parent | 75852e3ce2a075c73c0845a8000df4db4c1f7260 (diff) |
Stem by linguistics in rule bases
Also add a @language directive to stem in other languages than english.
Diffstat (limited to 'container-search')
28 files changed, 499 insertions, 337 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java index 2b8515b6db8..8e137d99951 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleBase.java @@ -1,19 +1,34 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.semantics; +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.StemMode; +import com.yahoo.prelude.semantics.engine.RuleBaseLinguistics; +import com.yahoo.prelude.semantics.rule.CompositeCondition; +import com.yahoo.prelude.semantics.rule.Condition; +import com.yahoo.prelude.semantics.rule.NamedCondition; +import com.yahoo.prelude.semantics.rule.ProductionRule; +import com.yahoo.prelude.semantics.rule.SuperCondition; import com.yahoo.search.Query; import com.yahoo.prelude.querytransform.PhraseMatcher; import com.yahoo.prelude.semantics.engine.RuleEngine; import com.yahoo.prelude.semantics.parser.ParseException; -import com.yahoo.prelude.semantics.rule.*; import com.yahoo.protect.Validator; import java.io.File; -import java.util.*; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; /** - * A set of semantic production rules and named conditions used to analyze - * and rewrite queries + * A set of semantic production rules and named conditions used to analyze and rewrite queries * * @author bratseth */ @@ -26,7 +41,7 @@ public class RuleBase { private String source; /** The name of the automata file used, or null if none */ - protected String automataFileName = null; + private String automataFileName = null; /** * True if this rule base is default. @@ -61,29 +76,26 @@ public class RuleBase { */ private boolean usesAutomata = false; - /** Should we allow stemmed matches? */ - private boolean stemming = true; - - /** Creates an empty rule base. TODO: Disallow */ - public RuleBase() { - } + private RuleBaseLinguistics linguistics; /** Creates an empty rule base */ - public RuleBase(String name) { - setName(name); + public RuleBase(String name, Linguistics linguistics) { + this.name = name; + this.linguistics = new RuleBaseLinguistics(StemMode.BEST, Language.ENGLISH, linguistics); } /** - * Creates a rule base from a file + * Creates a rule base from file * - * @param ruleFile the rule file to read. The name of the file (minus path) becomes the rule base name + * @param ruleFile the rule file to read. The name of the file (minus path) becomes the rule base name. * @param automataFile the automata file, or null to not use an automata * @throws java.io.IOException if there is a problem reading one of the files * @throws ParseException if the rule file can not be parsed correctly * @throws RuleBaseException if the rule file contains inconsistencies */ - public static RuleBase createFromFile(String ruleFile, String automataFile) throws java.io.IOException, ParseException { - return new RuleImporter().importFile(ruleFile, automataFile); + public static RuleBase createFromFile(String ruleFile, String automataFile, Linguistics linguistics) + throws java.io.IOException, ParseException { + return new RuleImporter(linguistics).importFile(ruleFile, automataFile); } /** @@ -96,18 +108,13 @@ public class RuleBase { * @throws com.yahoo.prelude.semantics.parser.ParseException if the rule file can not be parsed correctly * @throws com.yahoo.prelude.semantics.RuleBaseException if the rule file contains inconsistencies */ - public static RuleBase createFromString(String name, String ruleString, String automataFile) throws java.io.IOException, ParseException { - RuleBase base = new RuleImporter().importString(ruleString, automataFile, new RuleBase()); + public static RuleBase createFromString(String name, String ruleString, String automataFile, Linguistics linguistics) + throws java.io.IOException, ParseException { + RuleBase base = new RuleImporter(linguistics).importString(ruleString, automataFile); base.setName(name); return base; } - /** Set to true to enable stemmed matches. True by default */ - public void setStemming(boolean stemming) { this.stemming = stemming; } - - /** Returns whether stemmed matches are allowed. True by default */ - public boolean getStemming() { return stemming; } - /** * <p>Include another rule base into this. This <b>transfers ownership</b> * of the given rule base - it can not be subsequently used for any purpose @@ -171,7 +178,7 @@ public class RuleBase { resolveSuper(condition, superCondition); } - private void resolveSuper(Condition condition,Condition superCondition) { + private void resolveSuper(Condition condition, Condition superCondition) { if (condition instanceof SuperCondition) { ((SuperCondition)condition).setCondition(superCondition); } @@ -336,7 +343,7 @@ public class RuleBase { // TODO: Values are not added right now protected void annotatePhrase(PhraseMatcher.Phrase phrase,Query query,int traceLevel) { - for (StringTokenizer tokens = new StringTokenizer(phrase.getData(),"|",false) ; tokens.hasMoreTokens(); ) { + for (StringTokenizer tokens = new StringTokenizer(phrase.getData(), "|", false); tokens.hasMoreTokens(); ) { String token = tokens.nextToken(); int semicolonIndex = token.indexOf(";"); String annotation = token; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java index 45569050882..acbf9a7ffb6 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/RuleImporter.java @@ -10,8 +10,9 @@ import java.util.Arrays; import java.util.List; import com.yahoo.io.IOUtils; -import com.yahoo.io.reader.NamedReader; -import com.yahoo.prelude.semantics.parser.*; +import com.yahoo.language.Linguistics; +import com.yahoo.prelude.semantics.parser.ParseException; +import com.yahoo.prelude.semantics.parser.SemanticsParser; /** * Imports rule bases from various sources. @@ -24,51 +25,47 @@ import com.yahoo.prelude.semantics.parser.*; // rule bases included into others, while neither the rule base or the parser knows. public class RuleImporter { - /** - * If this is set, imported rule bases are looked up in this config - * otherwise, they are looked up as files - */ - private SemanticRulesConfig config; + /** If this is set, imported rule bases are looked up in this config otherwise, they are looked up as files. */ + private final SemanticRulesConfig config; - /** - * Ignore requests to read automata files. - * Useful to validate rule bases without having automatas present - */ - private boolean ignoreAutomatas; + /** Ignore requests to read automata files. Useful to validate rule bases without having automatas present. */ + private final boolean ignoreAutomatas; - /** - * Ignore requests to include files. - * Useful to validate rule bases one by one in config - */ - private boolean ignoreIncludes = false; + /** Ignore requests to include files. Useful to validate rule bases one by one in config. */ + private final boolean ignoreIncludes; + + private Linguistics linguistics; /** Create a rule importer which will read from file */ - public RuleImporter() { - this(null, false); + public RuleImporter(Linguistics linguistics) { + this(null, false, linguistics); } /** Create a rule importer which will read from a config object */ - public RuleImporter(SemanticRulesConfig config) { - this(config, false); + public RuleImporter(SemanticRulesConfig config, Linguistics linguistics) { + this(config, false, linguistics); } - public RuleImporter(boolean ignoreAutomatas) { - this(null, ignoreAutomatas); + public RuleImporter(boolean ignoreAutomatas, Linguistics linguistics) { + this(null, ignoreAutomatas, linguistics); } - public RuleImporter(boolean ignoreAutomatas, boolean ignoreIncludes) { - this(null, ignoreAutomatas, ignoreIncludes); + public RuleImporter(boolean ignoreAutomatas, boolean ignoreIncludes, Linguistics linguistics) { + this(null, ignoreAutomatas, ignoreIncludes, linguistics); } - public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas) { - this.config = config; - this.ignoreAutomatas = ignoreAutomatas; + public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas, Linguistics linguistics) { + this(config, ignoreAutomatas, false, linguistics); } - public RuleImporter(SemanticRulesConfig config, boolean ignoreAutomatas, boolean ignoreIncludes) { + public RuleImporter(SemanticRulesConfig config, + boolean ignoreAutomatas, + boolean ignoreIncludes, + Linguistics linguistics) { this.config = config; this.ignoreAutomatas = ignoreAutomatas; this.ignoreIncludes = ignoreIncludes; + this.linguistics = linguistics; } /** @@ -91,33 +88,18 @@ public class RuleImporter { * @throws ParseException if the file does not contain a valid semantic rule set */ public RuleBase importFile(String fileName, String automataFile) throws IOException, ParseException { - return importFile(fileName, automataFile, null); - } - - /** - * Imports semantic rules from a file - * - * @param fileName the rule file to use - * @param automataFile the automata file to use, or null to not use any - * @param ruleBase an existing rule base to import these rules into, or null to create a new - * @throws java.io.IOException if the file can not be read for some reason - * @throws ParseException if the file does not contain a valid semantic rule set - */ - public RuleBase importFile(String fileName, String automataFile, RuleBase ruleBase) throws IOException, ParseException { - ruleBase = privateImportFile(fileName, automataFile, ruleBase); + var ruleBase = privateImportFile(fileName, automataFile); ruleBase.initialize(); return ruleBase; } - public RuleBase privateImportFile(String fileName, String automataFile, RuleBase ruleBase) throws IOException, ParseException { + public RuleBase privateImportFile(String fileName, String automataFile) throws IOException, ParseException { BufferedReader reader = null; try { reader = IOUtils.createReader(fileName, "utf-8"); File file = new File(fileName); String absoluteFileName = file.getAbsolutePath(); - if (ruleBase == null) - ruleBase = new RuleBase(); - ruleBase.setName(stripLastName(file.getName())); + var ruleBase = new RuleBase(stripLastName(file.getName()), linguistics); privateImportFromReader(reader, absoluteFileName, automataFile, ruleBase); return ruleBase; } @@ -157,18 +139,17 @@ public class RuleImporter { /** Returns an unitialized rule base */ private RuleBase privateImportFromDirectory(String ruleBaseName, RuleBase ruleBase) throws IOException, ParseException { - RuleBase include = new RuleBase(); String includeDir = new File(ruleBase.getSource()).getParentFile().getAbsolutePath(); if (!ruleBaseName.endsWith(".sr")) ruleBaseName = ruleBaseName + ".sr"; File importFile = new File(includeDir, ruleBaseName); if ( ! importFile.exists()) throw new IOException("No file named '" + shortenPath(importFile.getPath()) + "'"); - return privateImportFile(importFile.getPath(), null, include); + return privateImportFile(importFile.getPath(), null); } /** Returns an unitialized rule base */ - private RuleBase privateImportFromConfig(String ruleBaseName) throws IOException, ParseException { + private RuleBase privateImportFromConfig(String ruleBaseName) throws ParseException { SemanticRulesConfig.Rulebase ruleBaseConfig = findRuleBaseConfig(config,ruleBaseName); if (ruleBaseConfig == null) ruleBaseConfig = findRuleBaseConfig(config, stripLastName(ruleBaseName)); @@ -224,8 +205,7 @@ public class RuleImporter { /** Imports an unitialized rule base */ public RuleBase privateImportConfig(SemanticRulesConfig.Rulebase ruleBaseConfig) throws ParseException { if (config == null) throw new IllegalStateException("Must initialize with config if importing from config"); - RuleBase ruleBase = new RuleBase(); - ruleBase.setName(ruleBaseConfig.name()); + RuleBase ruleBase = new RuleBase(ruleBaseConfig.name(), linguistics); return privateImportFromReader(new StringReader(ruleBaseConfig.rules()), "semantic-rules.cfg", ruleBaseConfig.automata(),ruleBase); @@ -253,14 +233,10 @@ public class RuleImporter { /** Returns an unitialized rule base */ public RuleBase privateImportFromReader(Reader reader, String sourceName, String automataFile, RuleBase ruleBase) throws ParseException { try { - if (ruleBase == null) { - ruleBase = new RuleBase(); - if (sourceName == null) - sourceName = "anonymous"; - ruleBase.setName(sourceName); - } + if (ruleBase == null) + ruleBase = new RuleBase(sourceName == null ? "anonymous" : sourceName, linguistics); ruleBase.setSource(sourceName.replace('\\', '/')); - new SemanticsParser(reader).semanticRules(ruleBase, this); + new SemanticsParser(reader, linguistics).semanticRules(ruleBase, this); if (automataFile != null && !automataFile.isEmpty()) ruleBase.setAutomataFile(automataFile.replace('\\', '/')); return ruleBase; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java index f9d968a3a4d..a8167fd2001 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/SemanticSearcher.java @@ -4,6 +4,7 @@ package com.yahoo.prelude.semantics; import com.google.inject.Inject; import com.yahoo.component.chain.dependencies.After; import com.yahoo.component.chain.dependencies.Before; +import com.yahoo.language.Linguistics; import com.yahoo.prelude.ConfigurationException; import com.yahoo.search.Query; import com.yahoo.search.Result; @@ -13,7 +14,9 @@ import com.yahoo.search.result.ErrorMessage; import com.yahoo.search.searchchain.Execution; import com.yahoo.search.searchchain.PhaseNames; -import java.util.*; +import java.util.Arrays; +import java.util.List; +import java.util.Map; import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; @@ -38,7 +41,7 @@ public class SemanticSearcher extends Searcher { /** Creates a semantic searcher using the given default rule base */ public SemanticSearcher(RuleBase ruleBase) { - this(Collections.singletonList(ruleBase)); + this(List.of(ruleBase)); defaultRuleBase = ruleBase; } @@ -47,8 +50,8 @@ public class SemanticSearcher extends Searcher { } @Inject - public SemanticSearcher(SemanticRulesConfig config) { - this(toList(config)); + public SemanticSearcher(SemanticRulesConfig config, Linguistics linguistics) { + this(toList(config, linguistics)); } public SemanticSearcher(List<RuleBase> ruleBases) { @@ -59,9 +62,9 @@ public class SemanticSearcher extends Searcher { } } - private static List<RuleBase> toList(SemanticRulesConfig config) { + private static List<RuleBase> toList(SemanticRulesConfig config, Linguistics linguistics) { try { - RuleImporter ruleImporter = new RuleImporter(config); + RuleImporter ruleImporter = new RuleImporter(config, linguistics); List<RuleBase> ruleBaseList = new java.util.ArrayList<>(); for (SemanticRulesConfig.Rulebase ruleBaseConfig : config.rulebase()) { RuleBase ruleBase = ruleImporter.importConfig(ruleBaseConfig); diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java index 938d12b271b..75b6e831983 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/benchmark/RuleBaseBenchmark.java @@ -9,6 +9,7 @@ import java.util.ArrayList; import java.util.Date; import java.util.Iterator; +import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.search.Query; import com.yahoo.prelude.semantics.RuleBase; import com.yahoo.prelude.semantics.RuleImporter; @@ -27,7 +28,7 @@ public class RuleBaseBenchmark { fsaFile = null; } } - RuleBase ruleBase = new RuleImporter().importFile(ruleBaseFile,fsaFile); + RuleBase ruleBase = new RuleImporter(new SimpleLinguistics()).importFile(ruleBaseFile, fsaFile); ArrayList<String> queries = new ArrayList<>(); BufferedReader reader = new BufferedReader(new FileReader(queryFile)); String line; @@ -35,7 +36,7 @@ public class RuleBaseBenchmark { queries.add(line); } Date start = new Date(); - for (int i=0;i<iterations;i++){ + for (int i=0; i<iterations; i++){ for (Iterator<String> iter = queries.iterator(); iter.hasNext(); ){ String queryString = iter.next(); Query query = new Query("?query="+queryString); @@ -43,7 +44,7 @@ public class RuleBaseBenchmark { } } Date end = new Date(); - long elapsed = end.getTime()-start.getTime(); + long elapsed = end.getTime() - start.getTime(); System.out.print("BENCHMARK: rulebase=" + ruleBaseFile + "\n fsa=" + fsaFile + "\n queries=" + queryFile + diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleBaseLinguistics.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleBaseLinguistics.java new file mode 100644 index 00000000000..c5519632d6d --- /dev/null +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleBaseLinguistics.java @@ -0,0 +1,54 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.engine; + +import com.yahoo.language.Language; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.StemList; +import com.yahoo.language.process.StemMode; + +import java.util.List; +import java.util.Objects; + +/** + * Linguistics for a rule base + * + * @author bratseth + */ +public class RuleBaseLinguistics { + + private final StemMode stemMode; + private final Language language; + private final Linguistics linguistics; + + /** Creates a rule base with default settings */ + public RuleBaseLinguistics(Linguistics linguistics) { + this(StemMode.BEST, Language.ENGLISH, linguistics); + } + + + public RuleBaseLinguistics(StemMode stemMode, Language language, Linguistics linguistics) { + this.stemMode = Objects.requireNonNull(stemMode); + this.language = Objects.requireNonNull(language); + this.linguistics = Objects.requireNonNull(linguistics); + } + + public RuleBaseLinguistics withStemMode(StemMode stemMode) { + return new RuleBaseLinguistics(stemMode, language, linguistics); + } + + public RuleBaseLinguistics withLanguage(Language language) { + return new RuleBaseLinguistics(stemMode, language, linguistics); + } + + public Linguistics linguistics() { return linguistics; } + + /** Processes this term according to the linguistics of this rule base */ + public String process(String term) { + if (stemMode == StemMode.NONE) return term; + List<StemList> stems = linguistics.getStemmer().stem(term, StemMode.BEST, language); + if (stems.isEmpty()) return term; + if (stems.get(0).isEmpty()) return term; + return stems.get(0).get(0); + } + +} diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java index e7ed05730cb..dd6610d1184 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/engine/RuleEngine.java @@ -17,7 +17,7 @@ import java.util.ListIterator; */ public class RuleEngine { - private RuleBase rules; + private final RuleBase rules; public RuleEngine(RuleBase rules) { this.rules=rules; @@ -38,7 +38,6 @@ public class RuleEngine { boolean matchedAnything = false; Evaluation evaluation = new Evaluation(query, traceLevel); - evaluation.setStemming(rules.getStemming()); if (traceLevel >= 2) evaluation.trace(2,"Evaluating query '" + evaluation.getQuery().getModel().getQueryTree().getRoot() + "':"); for (ListIterator<ProductionRule> i = rules.ruleIterator(); i.hasNext(); ) { diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java index 42bf0560726..b85dd892047 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/LiteralCondition.java @@ -4,7 +4,7 @@ package com.yahoo.prelude.semantics.rule; import com.yahoo.prelude.semantics.engine.RuleEvaluation; /** - * A condition which is always true, and which has it's own value as return value + * A condition which is always true, and which has its own value as return value * * @author bratseth */ diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java index b2592a36353..a267d274d5a 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamedCondition.java @@ -14,9 +14,9 @@ public class NamedCondition { private Condition condition; - public NamedCondition(String name,Condition condition) { - this.conditionName=name; - this.condition=condition; + public NamedCondition(String name, Condition condition) { + this.conditionName = name; + this.condition = condition; } public String getName() { return conditionName; } @@ -28,18 +28,18 @@ public class NamedCondition { public void setCondition(Condition condition) { this.condition = condition; } public boolean matches(RuleEvaluation e) { - if (e.getTraceLevel()>=3) { + if (e.getTraceLevel() >= 3) { e.trace(3,"Evaluating '" + this + "' at " + e.currentItem()); e.indentTrace(); } boolean matches=condition.matches(e); - if (e.getTraceLevel()>=3) { + if (e.getTraceLevel() >= 3) { e.unindentTrace(); if (matches) e.trace(3,"Matched '" + this + "' at " + e.previousItem()); - else if (e.getTraceLevel()>=4) + else if (e.getTraceLevel() >= 4) e.trace(4,"Did not match '" + this + "' at " + e.currentItem()); } return matches; diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java index 099a8562ece..e6f32a83dd9 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/NamespaceProduction.java @@ -18,13 +18,13 @@ public class NamespaceProduction extends Production { private String key; /** The value to set in the namespace */ - private String value=null; + private String value; /** Creates a produced template term with no label and the default type */ - public NamespaceProduction(String namespace,String key,String value) { + public NamespaceProduction(String namespace, String key, String value) { setNamespace(namespace); - this.key=key; - this.value=value; + this.key = key; + this.value = value; } public String getNamespace() { return namespace; } @@ -44,7 +44,7 @@ public class NamespaceProduction extends Production { public void setValue(String value) { this.value = value; } - public void produce(RuleEvaluation e,int offset) { + public void produce(RuleEvaluation e, int offset) { e.getEvaluation().getQuery().properties().set(key, value); } diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java index b36744dc397..af7abf325e7 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/ReferenceTermProduction.java @@ -12,7 +12,7 @@ import com.yahoo.prelude.semantics.engine.RuleEvaluation; import com.yahoo.protect.Validator; /** - * A term produced by a production rule which takes it's actual term value + * A term produced by a production rule which takes its actual term value * from one or more terms matched in the condition * * @author bratseth diff --git a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java index 38d1fc9b83b..6e2d3de7d08 100644 --- a/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java +++ b/container-search/src/main/java/com/yahoo/prelude/semantics/rule/TermCondition.java @@ -3,6 +3,7 @@ package com.yahoo.prelude.semantics.rule; import com.yahoo.prelude.query.TermItem; import com.yahoo.prelude.semantics.engine.NameSpace; +import com.yahoo.prelude.semantics.engine.RuleBaseLinguistics; import com.yahoo.prelude.semantics.engine.RuleEvaluation; /** @@ -12,39 +13,38 @@ import com.yahoo.prelude.semantics.engine.RuleEvaluation; */ public class TermCondition extends Condition { - private String term, termPlusS; + private final RuleBaseLinguistics linguistics; + private String originalTerm; + private String term; - /** Creates an invalid term */ - public TermCondition() { } - - public TermCondition(String term) { - this(null,term); + public TermCondition(String term, RuleBaseLinguistics linguistics) { + this(null, term, linguistics); } - public TermCondition(String label, String term) { + public TermCondition(String label, String term, RuleBaseLinguistics linguistics) { super(label); - this.term = term; - termPlusS = term + "s"; + this.linguistics = linguistics; + this.originalTerm = term; + this.term = linguistics.process(term); } public String getTerm() { return term; } public void setTerm(String term) { this.term = term; - termPlusS = term + "s"; } protected boolean doesMatch(RuleEvaluation e) { // TODO: Move this into the respective namespaces when query becomes one */ if (getNameSpace() != null) { NameSpace nameSpace = e.getEvaluation().getNameSpace(getNameSpace()); - return nameSpace.matches(term, e); + return nameSpace.matches(originalTerm, e); // No processing of terms in namespaces } else { if (e.currentItem() == null) return false; if ( ! labelMatches(e)) return false; - String matchedValue = termMatches(e.currentItem().getItem(), e.getEvaluation().getStemming()); + String matchedValue = termMatches(e.currentItem().getItem()); boolean matches = matchedValue!=null && labelMatches(e.currentItem().getItem(), e); if ((matches && !e.isInNegation() || (!matches && e.isInNegation()))) { e.addMatch(e.currentItem(), matchedValue); @@ -56,31 +56,9 @@ public class TermCondition extends Condition { } /** Returns a non-null replacement term if there is a match, null otherwise */ - private String termMatches(TermItem queryTerm, boolean stemming) { - String queryTermString = queryTerm.stringValue(); - - // The terms are the same - boolean matches = queryTermString.equals(term); - if (matches) return term; - - if (stemming) - if (termMatchesWithStemming(queryTermString)) return term; - - return null; - } - - private boolean termMatchesWithStemming(String queryTermString) { - if (queryTermString.length() < 3) return false; // Don't stem very short terms - - // The query term minus s is the same - boolean matches = queryTermString.equals(termPlusS); - if (matches) return true; - - // The query term plus s is the same - matches = term.equals(queryTermString + "s"); - if (matches) return true; - - return false; + private String termMatches(TermItem queryTerm) { + boolean matches = linguistics.process(queryTerm.stringValue()).equals(term); + return matches ? term : null; } public String toInnerString() { diff --git a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java index 72a8b02a960..b1d64329927 100644 --- a/container-search/src/main/java/com/yahoo/search/result/FeatureData.java +++ b/container-search/src/main/java/com/yahoo/search/result/FeatureData.java @@ -23,6 +23,8 @@ import java.util.Set; /** * A wrapper for structured data representing feature values: A map of floats and tensors. * This class is immutable but not thread safe. + * + * @author bratseth */ public class FeatureData implements Inspectable, JsonProducer { diff --git a/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj b/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj index d79f78ef896..46117374e59 100644 --- a/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj +++ b/container-search/src/main/javacc/com/yahoo/prelude/semantics/parser/SemanticsParser.jj @@ -6,7 +6,6 @@ options { CACHE_TOKENS = true; DEBUG_PARSER = false; ERROR_REPORTING = true; - STATIC = false; UNICODE_INPUT = true; } @@ -15,12 +14,23 @@ PARSER_BEGIN(SemanticsParser) package com.yahoo.prelude.semantics.parser; import com.yahoo.javacc.UnicodeUtilities; +import com.yahoo.language.process.StemMode; +import com.yahoo.language.Linguistics; +import com.yahoo.language.Language; import com.yahoo.prelude.semantics.*; import com.yahoo.prelude.semantics.rule.*; +import com.yahoo.prelude.semantics.engine.RuleBaseLinguistics; import com.yahoo.prelude.query.TermType; public class SemanticsParser { + private RuleBaseLinguistics linguistics; + + public SemanticsParser(java.io.Reader stream, Linguistics linguistics) { + this(stream); + this.linguistics = new RuleBaseLinguistics(linguistics); + } + } PARSER_END(SemanticsParser) @@ -77,6 +87,7 @@ TOKEN : <SMALLER: "<"> | <SMALLEREQUALS: "<="> | <STEMMINGDIRECTIVE: "@stemming"> | + <LANGUAGEDIRECTIVE: "@language"> | <SUPERDIRECTIVE: "@super"> | <IDENTIFIER: (~[ "\u0000"-"\u002f","\u003a"-"\u003f","\u005b"-"\u005d","\u007b"-"\u00a7","\u00a9","\u00ab"-"\u00ae","\u00b0"-"\u00b3","\u00b6"-"\u00b7","\u00b9","\u00bb"-"\u00bf", @@ -114,16 +125,20 @@ RuleBase semanticRules(RuleBase rules,RuleImporter importer) : // ---------------------------------- Directive --------------------------------------- -RuleBase directive(RuleBase rules,RuleImporter importer) : +RuleBase directive(RuleBase rules, RuleImporter importer) : { String name; } { - ( includeDirective(rules,importer) | defaultDirective(rules) | automataDirective(rules,importer) | stemmingDirective(rules) ) + ( includeDirective(rules, importer) | + defaultDirective(rules) | + automataDirective(rules, importer) | + stemmingDirective(rules) | + languageDirective(rules) ) { return rules; } } -void includeDirective(RuleBase rules,RuleImporter importer) : +void includeDirective(RuleBase rules, RuleImporter importer) : { String name; } @@ -131,25 +146,24 @@ void includeDirective(RuleBase rules,RuleImporter importer) : <INCLUDEDIRECTIVE> <LEFTBRACE> name=stringOrLiteral() <RIGHTBRACE> (<SEMICOLON>)? { try { - importer.include(name,rules); + importer.include(name, rules); } catch (java.io.IOException e) { - ParseException ep=new ParseException("Could not read included rule base '" + - name + "'"); + ParseException ep=new ParseException("Could not read included rule base '" + name + "'"); ep.initCause(e); throw ep; } } } -void automataDirective(RuleBase rules,RuleImporter importer) : +void automataDirective(RuleBase rules, RuleImporter importer) : { String name; } { - <AUTOMATADIRECTIVE> <LEFTBRACE> name=stringOrLiteral() <RIGHTBRACE> (<SEMICOLON>)? + <AUTOMATADIRECTIVE> <LEFTBRACE> name = stringOrLiteral() <RIGHTBRACE> (<SEMICOLON>)? { - importer.setAutomata(rules,name); + importer.setAutomata(rules, name); } } @@ -168,9 +182,20 @@ void stemmingDirective(RuleBase rules) : String booleanString; } { - <STEMMINGDIRECTIVE> <LEFTBRACE> booleanString=stringOrLiteral() <RIGHTBRACE> (<SEMICOLON>)? + <STEMMINGDIRECTIVE> <LEFTBRACE> booleanString = stringOrLiteral() <RIGHTBRACE> (<SEMICOLON>)? + { + linguistics = linguistics.withStemMode(Boolean.parseBoolean(booleanString) ? StemMode.BEST : StemMode.NONE); + } +} + +void languageDirective(RuleBase rules) : +{ + String languageString; +} +{ + <LANGUAGEDIRECTIVE> <LEFTBRACE> languageString = stringOrLiteral() <RIGHTBRACE> (<SEMICOLON>)? { - rules.setStemming(Boolean.parseBoolean(booleanString)); + linguistics = linguistics.withLanguage(Language.from(languageString)); } } @@ -183,10 +208,10 @@ void productionRule(RuleBase rules) : ProductionList production=null; } { - condition=topLevelCondition() rule=productionRuleType() ( production=productionList() )? <SEMICOLON> + condition = topLevelCondition() rule = productionRuleType() ( production = productionList() )? <SEMICOLON> { rule.setCondition(condition); - if (production!=null) rule.setProduction(production); + if (production != null) rule.setProduction(production); rules.addRule(rule); } } @@ -201,16 +226,16 @@ ProductionRule productionRuleType() : ProductionList productionList() : { - ProductionList productionList=new ProductionList(); + ProductionList productionList = new ProductionList(); Production production; int weight=100; } { - ( production=production() (<EXCLAMATION> weight=number())? + ( production = production() (<EXCLAMATION> weight = number())? { production.setWeight(weight); productionList.addProduction(production); - weight=100; + weight = 100; } (<NL>)* ) + { return productionList; } @@ -221,7 +246,7 @@ Production production() : Production production; } { - ( LOOKAHEAD(2) production=namespaceProduction() | production=termProduction() ) + ( LOOKAHEAD(2) production = namespaceProduction() | production = termProduction() ) { return production; } } @@ -229,12 +254,12 @@ TermProduction termProduction() : { TermProduction termProduction; TermType termType; - String label=null; + String label = null; } { - termType=termType() - ( LOOKAHEAD(2) label=label() )? - ( termProduction=nonphraseTermProduction() | termProduction=phraseProduction() ) + termType = termType() + ( LOOKAHEAD(2) label = label() )? + ( termProduction = nonphraseTermProduction() | termProduction = phraseProduction() ) { termProduction.setLabel(label); @@ -248,8 +273,8 @@ TermProduction nonphraseTermProduction() : TermProduction termProduction; } { - ( termProduction=referenceTermProduction() | - termProduction=literalTermProduction() ) + ( termProduction = referenceTermProduction() | + termProduction = literalTermProduction() ) { return termProduction; } @@ -257,14 +282,14 @@ TermProduction nonphraseTermProduction() : LiteralPhraseProduction phraseProduction() : { - LiteralPhraseProduction phraseProduction=new LiteralPhraseProduction(); - String term=null; + LiteralPhraseProduction phraseProduction = new LiteralPhraseProduction(); + String term = null; } { <QUOTE> ( - term=identifier() + term = identifier() { phraseProduction.addTerm(term); } )+ <QUOTE> @@ -277,11 +302,11 @@ NamespaceProduction namespaceProduction() : { String namespace; String key; - String value=null; + String value = null; } { - namespace=identifier() <DOT> key=stringOrLiteral() <EQUALS> value=identifierOrLiteral() - { return new NamespaceProduction(namespace,key,value); } + namespace = identifier() <DOT> key = stringOrLiteral() <EQUALS> value = identifierOrLiteral() + { return new NamespaceProduction(namespace, key, value); } } ReferenceTermProduction referenceTermProduction() : @@ -289,7 +314,7 @@ ReferenceTermProduction referenceTermProduction() : String reference; } { - <LEFTSQUAREBRACKET> reference=referenceIdentifier() <RIGHTSQUAREBRACKET> + <LEFTSQUAREBRACKET> reference = referenceIdentifier() <RIGHTSQUAREBRACKET> { return new ReferenceTermProduction(reference); } } @@ -298,7 +323,7 @@ LiteralTermProduction literalTermProduction() : String literal; } { - literal=identifier() + literal = identifier() { return new LiteralTermProduction(literal); } } @@ -319,7 +344,7 @@ String referenceIdentifier() : String reference; } { - ( reference=identifier() { return reference; } ) + ( reference = identifier() { return reference; } ) | ( <ELLIPSIS> { return "..."; } ) } @@ -332,25 +357,25 @@ void namedCondition(RuleBase rules) : Condition condition; } { - <LEFTSQUAREBRACKET> conditionName=identifier() <RIGHTSQUAREBRACKET> <CONDITION> condition=topLevelCondition() <SEMICOLON> - { rules.addCondition(new NamedCondition(conditionName,condition)); } + <LEFTSQUAREBRACKET> conditionName = identifier() <RIGHTSQUAREBRACKET> <CONDITION> condition = topLevelCondition() <SEMICOLON> + { rules.addCondition(new NamedCondition(conditionName, condition)); } } Condition topLevelCondition() : { Condition condition; - boolean startAnchor=false; - boolean endAnchor=false; + boolean startAnchor = false; + boolean endAnchor = false; } { - ( <DOT> { startAnchor=true; } )? + ( <DOT> { startAnchor = true; } )? ( - LOOKAHEAD(3) condition=choiceCondition() | - LOOKAHEAD(3) condition=sequenceCondition() + LOOKAHEAD(3) condition = choiceCondition() | + LOOKAHEAD(3) condition = sequenceCondition() ) - ( LOOKAHEAD(2) <DOT> { endAnchor=true; } )? + ( LOOKAHEAD(2) <DOT> { endAnchor = true; } )? { - condition.setAnchor(Condition.Anchor.create(startAnchor,endAnchor)); + condition.setAnchor(Condition.Anchor.create(startAnchor, endAnchor)); return condition; } } @@ -361,8 +386,8 @@ Condition condition() : } { ( - ( LOOKAHEAD(3) condition=choiceCondition() - | condition=terminalCondition() ) + ( LOOKAHEAD(3) condition = choiceCondition() + | condition = terminalCondition() ) { return condition; } @@ -374,8 +399,8 @@ Condition terminalOrSequenceCondition() : Condition condition; } { - ( LOOKAHEAD(3) condition=sequenceCondition() | - condition=terminalCondition() ) + ( LOOKAHEAD(3) condition = sequenceCondition() | + condition = terminalCondition() ) { return condition; } } @@ -384,20 +409,20 @@ Condition terminalCondition() : Condition condition; } { - ( condition=notCondition() | condition=terminalOrComparisonCondition() ) + ( condition = notCondition() | condition = terminalOrComparisonCondition() ) { return condition; } } Condition terminalOrComparisonCondition() : { - Condition condition,rightCondition; + Condition condition, rightCondition; String comparison; } { - condition=reallyTerminalCondition() - ( comparison=comparison() ( LOOKAHEAD(2) rightCondition=nestedCondition() | rightCondition=reallyTerminalCondition() ) -// ( comparison=comparison() rightCondition=condition() - { condition=new ComparisonCondition(condition,comparison,rightCondition); } + condition = reallyTerminalCondition() + ( comparison = comparison() ( LOOKAHEAD(2) rightCondition = nestedCondition() | rightCondition = reallyTerminalCondition() ) +// ( comparison = comparison() rightCondition = condition() + { condition = new ComparisonCondition(condition, comparison, rightCondition); } ) ? { return condition; } @@ -405,10 +430,10 @@ Condition terminalOrComparisonCondition() : Condition reallyTerminalCondition() : { - String label=null; - String context=null; - String nameSpace=null; - Condition condition=null; + String label = null; + String context = null; + String nameSpace = null; + Condition condition = null; } { // This body looks like this to distinguish these two cases @@ -416,20 +441,20 @@ Condition reallyTerminalCondition() : // condition . (end anchor) ( LOOKAHEAD(8) ( - ( LOOKAHEAD(2) context=context() )? - ( nameSpace=nameSpace() ) - ( LOOKAHEAD(2) label=label() )? - condition=terminalConditionBody() + ( LOOKAHEAD(2) context = context() )? + ( nameSpace = nameSpace() ) + ( LOOKAHEAD(2) label = label() )? + condition = terminalConditionBody() ) | ( - ( LOOKAHEAD(2) context=context() )? - ( LOOKAHEAD(2) label=label() )? - condition=terminalConditionBody() + ( LOOKAHEAD(2) context = context() )? + ( LOOKAHEAD(2) label = label() )? + condition = terminalConditionBody() ) ) { - if (context!=null) + if (context != null) condition.setContextName(context); condition.setLabel(label); condition.setNameSpace(nameSpace); @@ -440,18 +465,18 @@ Condition reallyTerminalCondition() : Condition terminalConditionBody() : { - Condition condition=null; + Condition condition = null; } { ( - LOOKAHEAD(2) condition=conditionReference() | - condition=termCondition() | - condition=nestedCondition() | - condition=nonReferableEllipsisCondition() | - condition=referableEllipsisCondition() | - condition=superCondition() | - condition=literalCondition() | - condition=compositeItemCondition()) + LOOKAHEAD(2) condition = conditionReference() | + condition = termCondition() | + condition = nestedCondition() | + condition = nonReferableEllipsisCondition() | + condition = referableEllipsisCondition() | + condition = superCondition() | + condition = literalCondition() | + condition = compositeItemCondition()) { return condition; } } @@ -460,7 +485,7 @@ Condition notCondition() : Condition condition; } { - <EXCLAMATION> condition=terminalOrComparisonCondition() + <EXCLAMATION> condition = terminalOrComparisonCondition() { return new NotCondition(condition); } } @@ -470,7 +495,7 @@ ConditionReference conditionReference() : String conditionName; } { - <LEFTSQUAREBRACKET> conditionName=identifier() <RIGHTSQUAREBRACKET> + <LEFTSQUAREBRACKET> conditionName = identifier() <RIGHTSQUAREBRACKET> { return new ConditionReference(conditionName); } } @@ -494,23 +519,23 @@ Condition nestedCondition() : Condition condition; } { - <LEFTBRACE> condition=choiceCondition() <RIGHTBRACE> + <LEFTBRACE> condition = choiceCondition() <RIGHTBRACE> { return condition; } } Condition sequenceCondition() : { - SequenceCondition sequenceCondition=new SequenceCondition(); + SequenceCondition sequenceCondition = new SequenceCondition(); Condition condition; } { - condition=terminalCondition() + condition = terminalCondition() { sequenceCondition.addCondition(condition); } - ( LOOKAHEAD(2) condition=terminalCondition() + ( LOOKAHEAD(2) condition = terminalCondition() { sequenceCondition.addCondition(condition); } )* { - if (sequenceCondition.conditionSize()==1) + if (sequenceCondition.conditionSize() == 1) return sequenceCondition.removeCondition(0); else return sequenceCondition; @@ -519,17 +544,17 @@ Condition sequenceCondition() : Condition choiceCondition() : { - ChoiceCondition choiceCondition=new ChoiceCondition(); + ChoiceCondition choiceCondition = new ChoiceCondition(); Condition condition; } { - condition=terminalOrSequenceCondition() + condition = terminalOrSequenceCondition() { choiceCondition.addCondition(condition); } - ( LOOKAHEAD(3) (<NL>)* <COMMA> (<NL>)* condition=terminalOrSequenceCondition() + ( LOOKAHEAD(3) (<NL>)* <COMMA> (<NL>)* condition = terminalOrSequenceCondition() { choiceCondition.addCondition(condition); } ) * { - if (choiceCondition.conditionSize()==1) + if (choiceCondition.conditionSize() == 1) return choiceCondition.removeCondition(0); else return choiceCondition; @@ -542,7 +567,7 @@ TermCondition termCondition() : } { ( str = identifier() ) - { return new TermCondition(str); } + { return new TermCondition(str, linguistics); } } SuperCondition superCondition() : { } @@ -566,7 +591,7 @@ CompositeItemCondition compositeItemCondition() : CompositeItemCondition compositeItemCondition = new CompositeItemCondition(); } { - ( <QUOTE> ( condition=terminalConditionBody() { compositeItemCondition.addCondition(condition); } ) <QUOTE> ) + ( <QUOTE> ( condition = terminalConditionBody() { compositeItemCondition.addCondition(condition); } ) <QUOTE> ) { return compositeItemCondition; } } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/parser/test/SemanticsParserTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/parser/test/SemanticsParserTestCase.java index ca5bb4d4cd2..bf99a709df3 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/parser/test/SemanticsParserTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/parser/test/SemanticsParserTestCase.java @@ -4,6 +4,7 @@ package com.yahoo.prelude.semantics.parser.test; import java.util.Iterator; import com.yahoo.javacc.UnicodeUtilities; +import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.prelude.semantics.RuleBase; import com.yahoo.prelude.semantics.RuleImporter; import com.yahoo.prelude.semantics.parser.ParseException; @@ -24,8 +25,8 @@ public class SemanticsParserTestCase { @Test public void testRuleReading() throws java.io.IOException, ParseException { - RuleBase rules=new RuleImporter().importFile(ROOT + "rules.sr"); - Iterator<?> i=rules.ruleIterator(); + RuleBase rules = new RuleImporter(new SimpleLinguistics()).importFile(ROOT + "rules.sr"); + Iterator<?> i = rules.ruleIterator(); assertEquals("[listing] [preposition] [place] -> listing:[listing] place:[place]!150", i.next().toString()); assertEquals("[listing] [place] +> place:[place]", @@ -36,10 +37,10 @@ public class SemanticsParserTestCase { i.next().toString()); assertEquals("digital camera -> digicamera", i.next().toString()); - assertEquals("(parameter.ranking='cat'), (parameter.ranking='cat0') -> one",i.next().toString()); + assertEquals("(parameter.ranking='cat'), (parameter.ranking='cat0') -> one", i.next().toString()); assertFalse(i.hasNext()); - i=rules.conditionIterator(); + i = rules.conditionIterator(); assertEquals("[listing] :- restaurant, shop, cafe, hotel", i.next().toString()); assertEquals("[preposition] :- in, at, near", @@ -53,7 +54,7 @@ public class SemanticsParserTestCase { assertFalse(i.hasNext()); assertTrue(rules.isDefault()); - assertEquals(ROOT + "semantics.fsa",rules.getAutomataFile()); + assertEquals(ROOT + "semantics.fsa", rules.getAutomataFile()); } } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/BacktrackingTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/BacktrackingTestCase.java index ac1791ae91a..394752f8aa1 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/BacktrackingTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/BacktrackingTestCase.java @@ -30,7 +30,7 @@ public class BacktrackingTestCase { static { try { - searcher = new SemanticSearcher(new RuleImporter().importFile(root + "backtrackingrules.sr")); + searcher = new SemanticSearcher(new RuleImporter(new SimpleLinguistics()).importFile(root + "backtrackingrules.sr")); } catch (Exception e) { throw new RuntimeException(e); diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConditionTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConditionTestCase.java index 86ee9b5948b..eb69372c22b 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConditionTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConditionTestCase.java @@ -1,6 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.semantics.test; +import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.prelude.semantics.engine.RuleBaseLinguistics; import com.yahoo.search.Query; import com.yahoo.prelude.semantics.RuleBase; import com.yahoo.prelude.semantics.engine.Evaluation; @@ -24,15 +26,17 @@ public class ConditionTestCase { @Test public void testTermCondition() { - TermCondition term=new TermCondition("foo"); - Query query=new Query("?query=foo"); + var linguistics = new RuleBaseLinguistics(new SimpleLinguistics()); + TermCondition term = new TermCondition("foo", linguistics); + Query query = new Query("?query=foo"); assertTrue(term.matches(new Evaluation(query).freshRuleEvaluation())); } @Test public void testSequenceCondition() { - TermCondition term1 = new TermCondition("foo"); - TermCondition term2 = new TermCondition("bar"); + var linguistics = new RuleBaseLinguistics(new SimpleLinguistics()); + TermCondition term1 = new TermCondition("foo", linguistics); + TermCondition term2 = new TermCondition("bar",linguistics); SequenceCondition sequence = new SequenceCondition(); sequence.addCondition(term1); sequence.addCondition(term2); @@ -46,8 +50,9 @@ public class ConditionTestCase { @Test public void testChoiceCondition() { - TermCondition term1 = new TermCondition("foo"); - TermCondition term2 = new TermCondition("bar"); + var linguistics = new RuleBaseLinguistics(new SimpleLinguistics()); + TermCondition term1 = new TermCondition("foo", linguistics); + TermCondition term2 = new TermCondition("bar", linguistics); ChoiceCondition choice = new ChoiceCondition(); choice.addCondition(term1); choice.addCondition(term2); @@ -61,7 +66,8 @@ public class ConditionTestCase { @Test public void testNamedConditionReference() { - TermCondition term = new TermCondition("foo"); + var linguistics = new RuleBaseLinguistics(new SimpleLinguistics()); + TermCondition term = new TermCondition("foo", linguistics); NamedCondition named = new NamedCondition("cond",term); ConditionReference reference = new ConditionReference("cond"); @@ -69,8 +75,7 @@ public class ConditionTestCase { ProductionRule rule = new ReplacingProductionRule(); rule.setCondition(reference); rule.setProduction(new ProductionList()); - RuleBase ruleBase = new RuleBase(); - ruleBase.setName("test"); + RuleBase ruleBase = new RuleBase("test", linguistics.linguistics()); ruleBase.addCondition(named); ruleBase.addRule(rule); ruleBase.initialize(); diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java index 80c9e898302..6d5b9459833 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ConfigurationTestCase.java @@ -37,7 +37,7 @@ public class ConfigurationTestCase { static { semanticRulesConfig = new ConfigGetter<>(SemanticRulesConfig.class).getConfig("file:" + root + "semantic-rules.cfg"); - searcher=new SemanticSearcher(semanticRulesConfig); + searcher = new SemanticSearcher(semanticRulesConfig, new SimpleLinguistics()); } protected void assertSemantics(String result, String input, String baseName) { @@ -54,46 +54,46 @@ public class ConfigurationTestCase { @Test public void testReadingConfigurationRuleBase() { - RuleBase parent=searcher.getRuleBase("parent"); + RuleBase parent = searcher.getRuleBase("parent"); assertNotNull(parent); - assertEquals("parent",parent.getName()); - assertEquals("semantic-rules.cfg",parent.getSource()); + assertEquals("parent", parent.getName()); + assertEquals("semantic-rules.cfg", parent.getSource()); } @Test - public void testParent() throws Exception { - assertSemantics("vehiclebrand:audi","audi cars","parent"); - assertSemantics("vehiclebrand:alfa","alfa bus","parent"); - assertSemantics("AND vehiclebrand:bmw expensivetv","bmw motorcycle","parent.sr"); - assertSemantics("AND vw car", "vw cars","parent"); - assertSemantics("AND skoda car", "skoda cars","parent.sr"); + public void testParent() { + assertSemantics("vehiclebrand:audi", "audi cars", "parent"); + assertSemantics("vehiclebrand:alfa", "alfa bus", "parent"); + assertSemantics("AND vehiclebrand:bmw expensivetv", "bmw motorcycle", "parent.sr"); + assertSemantics("AND vw car", "vw cars", "parent"); + assertSemantics("AND skoda car", "skoda cars", "parent.sr"); } @Test - public void testChild1() throws Exception { - assertSemantics("vehiclebrand:skoda","audi cars","child1.sr"); - assertSemantics("vehiclebrand:alfa", "alfa bus","child1"); - assertSemantics("AND vehiclebrand:bmw expensivetv","bmw motorcycle","child1"); - assertSemantics("vehiclebrand:skoda","vw cars","child1"); - assertSemantics("AND skoda car", "skoda cars","child1"); + public void testChild1() { + assertSemantics("vehiclebrand:skoda", "audi cars", "child1.sr"); + assertSemantics("vehiclebrand:alfa", "alfa bus", "child1"); + assertSemantics("AND vehiclebrand:bmw expensivetv", "bmw motorcycle", "child1"); + assertSemantics("vehiclebrand:skoda", "vw cars", "child1"); + assertSemantics("AND skoda car", "skoda cars", "child1"); } @Test - public void testChild2() throws Exception { - assertSemantics("vehiclebrand:audi","audi cars","child2"); - assertSemantics("vehiclebrand:alfa","alfa bus","child2.sr"); - assertSemantics("AND vehiclebrand:bmw expensivetv","bmw motorcycle","child2.sr"); - assertSemantics("AND vw car","vw cars","child2"); - assertSemantics("vehiclebrand:skoda","skoda cars","child2"); + public void testChild2() { + assertSemantics("vehiclebrand:audi", "audi cars", "child2"); + assertSemantics("vehiclebrand:alfa", "alfa bus", "child2.sr"); + assertSemantics("AND vehiclebrand:bmw expensivetv", "bmw motorcycle", "child2.sr"); + assertSemantics("AND vw car", "vw cars", "child2"); + assertSemantics("vehiclebrand:skoda", "skoda cars", "child2"); } @Test - public void testGrandchild() throws Exception { - assertSemantics("vehiclebrand:skoda","audi cars","grandchild.sr"); - assertSemantics("vehiclebrand:alfa","alfa bus","grandchild"); - assertSemantics("AND vehiclebrand:bmw expensivetv","bmw motorcycle","grandchild"); - assertSemantics("vehiclebrand:skoda","vw cars","grandchild"); - assertSemantics("vehiclebrand:skoda","skoda cars","grandchild"); + public void testGrandchild() { + assertSemantics("vehiclebrand:skoda", "audi cars", "grandchild.sr"); + assertSemantics("vehiclebrand:alfa", "alfa bus", "grandchild"); + assertSemantics("AND vehiclebrand:bmw expensivetv", "bmw motorcycle", "grandchild"); + assertSemantics("vehiclebrand:skoda", "vw cars", "grandchild"); + assertSemantics("vehiclebrand:skoda", "skoda cars", "grandchild"); } @Test diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/DuplicateRuleTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/DuplicateRuleTestCase.java index fb86beaa9bc..76c8c3966b7 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/DuplicateRuleTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/DuplicateRuleTestCase.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.semantics.test; +import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.prelude.semantics.RuleBaseException; import com.yahoo.prelude.semantics.RuleImporter; import com.yahoo.prelude.semantics.parser.ParseException; @@ -14,18 +15,18 @@ import static org.junit.Assert.fail; */ public class DuplicateRuleTestCase { - private final String root="src/test/java/com/yahoo/prelude/semantics/test/rulebases/"; + private final String root = "src/test/java/com/yahoo/prelude/semantics/test/rulebases/"; @Test public void testDuplicateRuleBaseLoading() throws java.io.IOException, ParseException { if (System.currentTimeMillis() > 0) return; // TODO: Include this test... try { - new RuleImporter().importFile(root + "rules.sr"); + new RuleImporter(new SimpleLinguistics()).importFile(root + "rules.sr"); fail("Did not detect duplicate condition names"); } catch (RuleBaseException e) { - assertEquals("Duplicate condition 'something' in 'duplicaterules.sr'",e.getMessage()); + assertEquals("Duplicate condition 'something' in 'duplicaterules.sr'", e.getMessage()); } } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java index d93fd218259..e9364074281 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/InheritanceTestCase.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.StringTokenizer; import com.yahoo.component.chain.Chain; +import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.search.Query; import com.yahoo.prelude.semantics.RuleBase; import com.yahoo.prelude.semantics.RuleBaseException; @@ -24,7 +25,6 @@ import static org.junit.Assert.fail; /** * @author bratseth */ -@SuppressWarnings("deprecation") public class InheritanceTestCase { private static final String root = "src/test/java/com/yahoo/prelude/semantics/test/rulebases/"; @@ -34,10 +34,10 @@ public class InheritanceTestCase { static { try { - parent = RuleBase.createFromFile(root + "inheritingrules/parent.sr", null); - child1 = RuleBase.createFromFile(root + "inheritingrules/child1.sr", null); - child2 = RuleBase.createFromFile(root + "inheritingrules/child2.sr", null); - grandchild = RuleBase.createFromFile(root + "inheritingrules/grandchild.sr", null); + parent = RuleBase.createFromFile(root + "inheritingrules/parent.sr", null, new SimpleLinguistics()); + child1 = RuleBase.createFromFile(root + "inheritingrules/child1.sr", null, new SimpleLinguistics()); + child2 = RuleBase.createFromFile(root + "inheritingrules/child2.sr", null, new SimpleLinguistics()); + grandchild = RuleBase.createFromFile(root + "inheritingrules/grandchild.sr", null, new SimpleLinguistics()); grandchild.setDefault(true); searcher = new SemanticSearcher(parent, child1, child2, grandchild); @@ -77,7 +77,7 @@ public class InheritanceTestCase { public void testInclusionOrderAndContentDump() { StringTokenizer lines = new StringTokenizer(grandchild.toContentString(),"\n",false); assertEquals("vw -> audi", lines.nextToken()); - assertEquals("cars -> car", lines.nextToken()); + assertEquals("car -> car", lines.nextToken()); assertEquals("[brand] [vehicle] -> vehiclebrand:[brand]", lines.nextToken()); assertEquals("vehiclebrand:bmw +> expensivetv", lines.nextToken()); assertEquals("vehiclebrand:audi -> vehiclebrand:skoda", lines.nextToken()); diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ParameterTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ParameterTestCase.java index cd5743c6d77..376da065f4d 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ParameterTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ParameterTestCase.java @@ -20,66 +20,65 @@ public class ParameterTestCase extends RuleBaseAbstractTestCase { /** Tests parameter literal matching */ @Test public void testLiteralEquals() { - assertSemantics("a","a"); - assertSemantics("RANK a foo:a","a&ranking=category"); - assertSemantics("a","a&ranking=somethingelse"); - assertSemantics("a","a&otherparam=category"); + assertSemantics("a", "a"); + assertSemantics("RANK a foo:a", "a&ranking=category"); + assertSemantics("a", "a&ranking=somethingelse"); + assertSemantics("a", "a&otherparam=category"); } /** Tests parameter matching of larger */ @Test public void testLarger() { - assertSemantics("a","a"); - assertSemantics("AND a largepage","a&hits=11"); - assertSemantics("AND a largepage","a&hits=12"); + assertSemantics("a", "a"); + assertSemantics("AND a largepage", "a&hits=11"); + assertSemantics("AND a largepage", "a&hits=12"); } /** Tests parameter containment matching */ @Test public void testContainsAsList() { assertSemantics("a","a"); - assertSemantics("AND a intent:music","a&search=music"); - assertSemantics("AND a intent:music","a&search=music,books"); - assertSemantics("AND a intent:music","a&search=kanoos,music,books"); + assertSemantics("AND a intent:music", "a&search=music"); + assertSemantics("AND a intent:music", "a&search=music,books"); + assertSemantics("AND a intent:music", "a&search=kanoos,music,books"); } /** Tests parameter production */ @Test public void testParameterProduction() { - assertParameterSemantics("AND a b c","a b c","search","[letters, alphabet]"); - assertParameterSemantics("AND a c d","a c d","search","[letters, someletters]"); - assertParameterSemantics("+(AND a d e) -letter:c","a d e","search","[someletters]"); - assertParameterSemantics("AND a d f","a d f","rank-profile","foo"); - assertParameterSemantics("AND a f g","a f g","grouping.nolearning","true"); + assertParameterSemantics("AND a b c", "a b c", "search", "[letters, alphabet]"); + assertParameterSemantics("AND a c d", "a c d", "search", "[letters, someletters]"); + assertParameterSemantics("+(AND a d e) -letter:c", "a d e", "search", "[someletters]"); + assertParameterSemantics("AND a d f", "a d f", "rank-profile", "foo"); + assertParameterSemantics("AND a f g", "a f g", "grouping.nolearning", "true"); } @Test public void testMultipleAlternativeParameterValuesInCondition() { - assertInputRankParameterSemantics("one","foo","cat"); - assertInputRankParameterSemantics("one","foo","cat0"); - assertInputRankParameterSemantics("one","bar","cat"); - assertInputRankParameterSemantics("one","bar","cat0"); - assertInputRankParameterSemantics("AND one one","foo+bar","cat0"); - assertInputRankParameterSemantics("AND fuki sushi","fuki+sushi","cat0"); + assertInputRankParameterSemantics("one", "foo", "cat"); + assertInputRankParameterSemantics("one", "foo", "cat0"); + assertInputRankParameterSemantics("one", "bar", "cat"); + assertInputRankParameterSemantics("one", "bar", "cat0"); + assertInputRankParameterSemantics("AND one one", "foo+bar", "cat0"); + assertInputRankParameterSemantics("AND fuki sushi", "fuki+sushi", "cat0"); } - private void assertInputRankParameterSemantics(String producedQuery,String inputQuery, - String rankParameterValue) { - assertInputRankParameterSemantics(producedQuery,inputQuery,rankParameterValue,0); + private void assertInputRankParameterSemantics(String producedQuery,String inputQuery, String rankParameterValue) { + assertInputRankParameterSemantics(producedQuery, inputQuery, rankParameterValue, 0); } - private void assertInputRankParameterSemantics(String producedQuery,String inputQuery, - String rankParameterValue,int tracelevel) { - Query query=new Query("?query=" + inputQuery + "&tracelevel=0&tracelevel.rules=" + tracelevel); + private void assertInputRankParameterSemantics(String producedQuery, String inputQuery, + String rankParameterValue, int tracelevel) { + Query query = new Query("?query=" + inputQuery + "&tracelevel=0&tracelevel.rules=" + tracelevel); query.getRanking().setProfile(rankParameterValue); query.properties().set("tracelevel.rules", tracelevel); assertSemantics(producedQuery, query); } - private void assertParameterSemantics(String producedQuery,String inputQuery, - String producedParameterName,String producedParameterValue) { - Query query=assertSemantics(producedQuery,inputQuery); - assertEquals(producedParameterValue,query.properties().getString(producedParameterName)); + private void assertParameterSemantics(String producedQuery, String inputQuery, + String producedParameterName, String producedParameterValue) { + Query query = assertSemantics(producedQuery, inputQuery); + assertEquals(producedParameterValue, query.properties().getString(producedParameterName)); } } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ProductionRuleTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ProductionRuleTestCase.java index 8b883759215..b91e9441a2b 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/ProductionRuleTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/ProductionRuleTestCase.java @@ -1,6 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.semantics.test; +import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.prelude.semantics.engine.RuleBaseLinguistics; import com.yahoo.search.Query; import com.yahoo.prelude.semantics.RuleBase; import com.yahoo.prelude.semantics.engine.Evaluation; @@ -25,7 +27,8 @@ public class ProductionRuleTestCase { @Test public void testProductionRule() { - TermCondition term = new TermCondition("sony"); + var linguistics = new RuleBaseLinguistics(new SimpleLinguistics()); + TermCondition term = new TermCondition("sony", linguistics); NamedCondition named = new NamedCondition("brand", term); ConditionReference reference = new ConditionReference("brand"); @@ -38,8 +41,7 @@ public class ProductionRuleTestCase { rule.setProduction(productionList); // To initialize the condition reference... - RuleBase ruleBase = new RuleBase(); - ruleBase.setName("test"); + RuleBase ruleBase = new RuleBase("test", linguistics.linguistics()); ruleBase.addCondition(named); ruleBase.addRule(rule); ruleBase.initialize(); diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseAbstractTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseAbstractTestCase.java index baccb73cd93..84e47edae29 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseAbstractTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseAbstractTestCase.java @@ -2,6 +2,7 @@ package com.yahoo.prelude.semantics.test; import com.yahoo.component.chain.Chain; +import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.search.Query; import com.yahoo.prelude.semantics.RuleBase; import com.yahoo.prelude.semantics.RuleBaseException; @@ -16,7 +17,7 @@ import java.util.List; import static org.junit.Assert.assertEquals; /** - * Tests semantic searching + * DO NOT USE. Use RuleBaseTester instead * * @author bratseth */ @@ -37,7 +38,7 @@ public abstract class RuleBaseAbstractTestCase { try { if (automataFileName != null) automataFileName = root + automataFileName; - RuleBase ruleBase = RuleBase.createFromFile(root + ruleBaseName, automataFileName); + RuleBase ruleBase = RuleBase.createFromFile(root + ruleBaseName, automataFileName, new SimpleLinguistics()); return new SemanticSearcher(ruleBase); } catch (Exception e) { throw new RuleBaseException("Initialization of rule base '" + ruleBaseName + "' failed",e); diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseTester.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseTester.java new file mode 100644 index 00000000000..cc9e758a0e0 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/RuleBaseTester.java @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.prelude.semantics.test; + +import com.yahoo.component.chain.Chain; +import com.yahoo.language.opennlp.OpenNlpLinguistics; +import com.yahoo.prelude.semantics.RuleBase; +import com.yahoo.prelude.semantics.RuleBaseException; +import com.yahoo.prelude.semantics.SemanticSearcher; +import com.yahoo.search.Query; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.test.QueryTestCase; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * Helper for testing with a rule base. + * Replace subclassing of RuleBaseAbstractTestCase by this. + * + * @author bratseth + */ +public class RuleBaseTester { + + private final String root = "src/test/java/com/yahoo/prelude/semantics/test/rulebases/"; + private final SemanticSearcher searcher; + + public RuleBaseTester(String ruleBaseName) { + this(ruleBaseName, null); + } + + public RuleBaseTester(String ruleBaseName, String automataFileName) { + searcher = createSearcher(ruleBaseName, automataFileName); + } + + private SemanticSearcher createSearcher(String ruleBaseName,String automataFileName) { + try { + if (automataFileName != null) + automataFileName = root + automataFileName; + RuleBase ruleBase = RuleBase.createFromFile(root + ruleBaseName, automataFileName, new OpenNlpLinguistics()); + return new SemanticSearcher(ruleBase); + } catch (Exception e) { + throw new RuleBaseException("Initialization of rule base '" + ruleBaseName + "' failed", e); + } + } + + public Query assertSemantics(String result, String input) { + return assertSemantics(result, input, 0); + } + + public Query assertSemantics(String result, String input, int tracelevel) { + return assertSemantics(result, input, tracelevel, Query.Type.ALL); + } + + public Query assertSemantics(String result, String input, int tracelevel, Query.Type queryType) { + Query query = new Query("?query=" + QueryTestCase.httpEncode(input) + "&tracelevel=0&tracelevel.rules=" + tracelevel + + "&language=und&type=" + queryType); + return assertSemantics(result, query); + } + + public Query assertSemantics(String result, Query query) { + createExecution(searcher).search(query); + assertEquals(result, query.getModel().getQueryTree().getRoot().toString()); + return query; + } + + private Execution createExecution(Searcher searcher) { + return new Execution(chainedAsSearchChain(searcher), Execution.Context.createContextStub()); + } + + private Chain<Searcher> chainedAsSearchChain(Searcher topOfChain) { + List<Searcher> searchers = new ArrayList<>(); + searchers.add(topOfChain); + return new Chain<>(searchers); + } + +} diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java index 29cc5c6e23a..76b2d3991c1 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/SemanticSearcherTestCase.java @@ -142,11 +142,6 @@ public class SemanticSearcherTestCase extends RuleBaseAbstractTestCase { } @Test - public void testPluralReplaceBecomesSingular() { - assertSemantics("AND from:paris to:texas","pariss to texass"); - } - - @Test public void testOrProduction() { assertSemantics("OR something somethingelse", "something"); } @@ -155,7 +150,7 @@ public class SemanticSearcherTestCase extends RuleBaseAbstractTestCase { @Test public void testWeightedSetItem() { Query q = new Query(); - WeightedSetItem weightedSet=new WeightedSetItem("fieldName"); + WeightedSetItem weightedSet = new WeightedSetItem("fieldName"); weightedSet.addToken("a", 1); weightedSet.addToken("b", 2); q.getModel().getQueryTree().setRoot(weightedSet); diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/StemmingTestCase.java b/container-search/src/test/java/com/yahoo/prelude/semantics/test/StemmingTestCase.java index 6702a1ca1d9..b8efbf7422b 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/StemmingTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/StemmingTestCase.java @@ -4,34 +4,52 @@ package com.yahoo.prelude.semantics.test; import org.junit.Test; /** - * Tests a case reported by tularam + * Tests stemming. * * @author bratseth */ -public class StemmingTestCase extends RuleBaseAbstractTestCase { +public class StemmingTestCase { - public StemmingTestCase() { - super("stemming.sr"); + @Test + public void testRewritingDueToStemmingInQuery() { + var tester = new RuleBaseTester("stemming.sr"); + tester.assertSemantics("+(AND i:vehicle TRUE) -i:s", "i:cars -i:s"); } @Test - public void testRewritingDueToStemmingInQuery() { - assertSemantics("+(AND i:vehicle TRUE) -i:s","i:cars -i:s"); + public void testNoRewritingDueToStemmingInQueryWhenStemmingDisabled() { + var tester = new RuleBaseTester("stemming-none.sr"); + tester.assertSemantics("+i:cars -i:s", "i:cars -i:s"); } @Test public void testRewritingDueToStemmingInRule() { - assertSemantics("+(AND i:animal TRUE) -i:s","i:horse -i:s"); + var tester = new RuleBaseTester("stemming.sr"); + tester.assertSemantics("+(AND i:animal TRUE) -i:s", "i:horse -i:s"); + } + + @Test + public void testNoRewritingDueToStemmingInRuleWhenStemmingDisabled() { + var tester = new RuleBaseTester("stemming-none.sr"); + tester.assertSemantics("+i:horse -i:s", "i:horse -i:s"); } @Test public void testRewritingDueToExactMatch() { - assertSemantics("+(AND i:arts i:sciences TRUE) -i:s","i:as -i:s"); + var tester = new RuleBaseTester("stemming.sr"); + tester.assertSemantics("+(AND i:arts i:sciences TRUE) -i:s", "i:as -i:s"); + } + + @Test + public void testEnglishStemming() { + var tester = new RuleBaseTester("stemming.sr"); + tester.assertSemantics("i:drive", "i:going"); } @Test - public void testNoRewritingBecauseShortWordsAreNotStemmed() { - assertSemantics("+i:a -i:s","i:a -i:s"); + public void testFrenchStemming() { + var tester = new RuleBaseTester("stemming-french.sr"); + tester.assertSemantics("i:going", "i:going"); } } diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming-french.sr b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming-french.sr new file mode 100644 index 00000000000..1ccafd04344 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming-french.sr @@ -0,0 +1,8 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@stemming(true) +@language(fr) + +i:as -> i:arts i:sciences; +i:car -> i:vehicle; +i:horses -> i:animal; +i:go -> i:drive; diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming-none.sr b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming-none.sr new file mode 100644 index 00000000000..44f6e40a308 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming-none.sr @@ -0,0 +1,6 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@stemming(false) + +i:car -> i:vehicle; +i:horses -> i:animal; +i:go -> i:drive; diff --git a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming.sr b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming.sr index f68706646c2..ea73e385b3a 100644 --- a/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming.sr +++ b/container-search/src/test/java/com/yahoo/prelude/semantics/test/rulebases/stemming.sr @@ -1,5 +1,7 @@ # Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. @stemming(true) + i:as -> i:arts i:sciences; i:car -> i:vehicle; i:horses -> i:animal; +i:go -> i:drive; |