diff options
author | bjormel <bjormel@yahooinc.com> | 2023-10-01 12:23:12 +0000 |
---|---|---|
committer | bjormel <bjormel@yahooinc.com> | 2023-10-01 12:23:12 +0000 |
commit | e9058b555d4dfea2f6c872d9a677e8678b569569 (patch) | |
tree | fa1b67c6e39712c1e0d9f308b0dd55573b43f913 /lucene-linguistics/src/main/java/com | |
parent | 0ad931fa86658904fe9212b014d810236b0e00e4 (diff) | |
parent | 16030193ec04ee41e98779a3d7ee6a6c1d0d0d6f (diff) |
Merge branch 'master' into bjormel/aws-main-controller
Diffstat (limited to 'lucene-linguistics/src/main/java/com')
-rw-r--r-- | lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java | 45 |
1 files changed, 35 insertions, 10 deletions
diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java index 67a430a28dc..92ea77cdc13 100644 --- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java @@ -11,9 +11,9 @@ import org.apache.lucene.analysis.custom.CustomAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import java.io.IOException; -import java.nio.file.Path; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; import java.util.logging.Logger; @@ -60,13 +60,15 @@ class AnalyzerFactory { } private Analyzer createAnalyzer(AnalyzerKey analyzerKey) { - if (null != config.analysis(analyzerKey.languageCode())) { + LuceneAnalysisConfig.Analysis analysis = analysisConfig(analyzerKey); + if (null != analysis) { log.config("Creating analyzer for " + analyzerKey + " from config"); - return createAnalyzer(analyzerKey, config.analysis(analyzerKey.languageCode())); + return createAnalyzer(analyzerKey, analysis); } - if (null != analyzerComponents.getComponent(analyzerKey.languageCode())) { + Analyzer analyzerFromComponents = fromComponents(analyzerKey); + if (null != analyzerFromComponents) { log.config("Using analyzer for " + analyzerKey + " from components"); - return analyzerComponents.getComponent(analyzerKey.languageCode()); + return analyzerFromComponents; } if (null != defaultAnalyzers.get(analyzerKey.language())) { log.config("Using Analyzer for " + analyzerKey + " from a list of default language analyzers"); @@ -77,6 +79,24 @@ class AnalyzerFactory { return defaultAnalyzer; } + /** + * First, checks if more specific (language + stemMode) analysis is configured. + * Second, checks if analysis is configured only for a languageCode. + */ + private LuceneAnalysisConfig.Analysis analysisConfig(AnalyzerKey analyzerKey) { + LuceneAnalysisConfig.Analysis analysis = config.analysis(analyzerKey.languageCodeAndStemMode()); + return (null != analysis) ? analysis : config.analysis(analyzerKey.languageCode()); + } + + /** + * First, checks if a component is configured for a languageCode + StemMode. + * Second, checks if Analyzer is configured only for a languageCode. + */ + private Analyzer fromComponents(AnalyzerKey analyzerKey) { + Analyzer analyzer = analyzerComponents.getComponent(analyzerKey.languageCodeAndStemMode()); + return (null != analyzer) ? analyzer : analyzerComponents.getComponent(analyzerKey.languageCode()); + } + private Analyzer createAnalyzer(AnalyzerKey analyzerKey, LuceneAnalysisConfig.Analysis analysis) { try { CustomAnalyzer.Builder builder = config.configDir() @@ -143,9 +163,14 @@ class AnalyzerFactory { private record AnalyzerKey(Language language, StemMode stemMode, boolean removeAccents) { - // TODO: Identity here is determined by language only. - // Would it make sense to combine language + stemMode + removeAccents to make - // a composite key so we can have more variations possible? + /** + * Combines the languageCode and the stemMode. + * It allows to specify up to 6 (5 StemModes and only language code) analyzers per language. + * The `/` is used so that it doesn't conflict with ComponentRegistry keys. + */ + public String languageCodeAndStemMode() { + return language.languageCode() + "/" + stemMode.toString(); + } public String languageCode() { return language.languageCode(); @@ -155,12 +180,12 @@ class AnalyzerFactory { public boolean equals(Object o) { if (o == this) return true; if ( ! (o instanceof AnalyzerKey other)) return false; - return other.language == this.language; + return other.language == this.language && other.stemMode == this.stemMode; } @Override public int hashCode() { - return language.hashCode(); + return Objects.hash(language, stemMode); } } |