aboutsummaryrefslogtreecommitdiffstats
path: root/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
diff options
context:
space:
mode:
Diffstat (limited to 'lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java')
-rw-r--r--lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java110
1 files changed, 110 insertions, 0 deletions
diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
new file mode 100644
index 00000000000..955e18474f7
--- /dev/null
+++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
@@ -0,0 +1,110 @@
+package com.yahoo.language.lucene;
+
+import com.yahoo.language.Language;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.ar.ArabicAnalyzer;
+import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
+import org.apache.lucene.analysis.bn.BengaliAnalyzer;
+import org.apache.lucene.analysis.ca.CatalanAnalyzer;
+import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
+import org.apache.lucene.analysis.cz.CzechAnalyzer;
+import org.apache.lucene.analysis.da.DanishAnalyzer;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.el.GreekAnalyzer;
+import org.apache.lucene.analysis.en.EnglishAnalyzer;
+import org.apache.lucene.analysis.es.SpanishAnalyzer;
+import org.apache.lucene.analysis.et.EstonianAnalyzer;
+import org.apache.lucene.analysis.eu.BasqueAnalyzer;
+import org.apache.lucene.analysis.fa.PersianAnalyzer;
+import org.apache.lucene.analysis.fi.FinnishAnalyzer;
+import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.ga.IrishAnalyzer;
+import org.apache.lucene.analysis.gl.GalicianAnalyzer;
+import org.apache.lucene.analysis.hi.HindiAnalyzer;
+import org.apache.lucene.analysis.hu.HungarianAnalyzer;
+import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
+import org.apache.lucene.analysis.id.IndonesianAnalyzer;
+import org.apache.lucene.analysis.it.ItalianAnalyzer;
+import org.apache.lucene.analysis.lt.LithuanianAnalyzer;
+import org.apache.lucene.analysis.lv.LatvianAnalyzer;
+import org.apache.lucene.analysis.ne.NepaliAnalyzer;
+import org.apache.lucene.analysis.nl.DutchAnalyzer;
+import org.apache.lucene.analysis.no.NorwegianAnalyzer;
+import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
+import org.apache.lucene.analysis.ro.RomanianAnalyzer;
+import org.apache.lucene.analysis.ru.RussianAnalyzer;
+import org.apache.lucene.analysis.sr.SerbianAnalyzer;
+import org.apache.lucene.analysis.sv.SwedishAnalyzer;
+import org.apache.lucene.analysis.ta.TamilAnalyzer;
+import org.apache.lucene.analysis.te.TeluguAnalyzer;
+import org.apache.lucene.analysis.th.ThaiAnalyzer;
+import org.apache.lucene.analysis.tr.TurkishAnalyzer;
+
+import java.util.Map;
+
+import static java.util.Map.entry;
+
+public class DefaultAnalyzers {
+
+ private static DefaultAnalyzers INSTANCE;
+ private final Map<Language, Analyzer> analyzerClasses;
+
+ private DefaultAnalyzers() {
+ analyzerClasses = Map.ofEntries(
+ entry(Language.ARABIC, new ArabicAnalyzer()),
+ entry(Language.BULGARIAN, new BulgarianAnalyzer()),
+ entry(Language.BENGALI, new BengaliAnalyzer()),
+ // analyzerClasses.put(Language.BRASILIAN, new BrazilianAnalyzer())
+ entry(Language.CATALAN, new CatalanAnalyzer()),
+ // cjk analyzer?
+ entry(Language.KURDISH, new SoraniAnalyzer()),
+ entry(Language.CZECH, new CzechAnalyzer()),
+ entry(Language.DANISH, new DanishAnalyzer()),
+ entry(Language.GERMAN, new GermanAnalyzer()),
+ entry(Language.GREEK, new GreekAnalyzer()),
+ entry(Language.ENGLISH, new EnglishAnalyzer()),
+ entry(Language.SPANISH, new SpanishAnalyzer()),
+ entry(Language.ESTONIAN, new EstonianAnalyzer()),
+ entry(Language.BASQUE, new BasqueAnalyzer()),
+ entry(Language.PERSIAN, new PersianAnalyzer()),
+ entry(Language.FINNISH, new FinnishAnalyzer()),
+ entry(Language.FRENCH, new FrenchAnalyzer()),
+ entry(Language.IRISH, new IrishAnalyzer()),
+ entry(Language.GALICIAN, new GalicianAnalyzer()),
+ entry(Language.HINDI, new HindiAnalyzer()),
+ entry(Language.HUNGARIAN, new HungarianAnalyzer()),
+ entry(Language.ARMENIAN, new ArmenianAnalyzer()),
+ entry(Language.INDONESIAN, new IndonesianAnalyzer()),
+ entry(Language.ITALIAN, new ItalianAnalyzer()),
+ entry(Language.LITHUANIAN, new LithuanianAnalyzer()),
+ entry(Language.LATVIAN, new LatvianAnalyzer()),
+ entry(Language.NEPALI, new NepaliAnalyzer()),
+ entry(Language.DUTCH, new DutchAnalyzer()),
+ entry(Language.NORWEGIAN_BOKMAL, new NorwegianAnalyzer()),
+ entry(Language.PORTUGUESE, new PortugueseAnalyzer()),
+ entry(Language.ROMANIAN, new RomanianAnalyzer()),
+ entry(Language.RUSSIAN, new RussianAnalyzer()),
+ entry(Language.SERBIAN, new SerbianAnalyzer()),
+ entry(Language.SWEDISH, new SwedishAnalyzer()),
+ entry(Language.TAMIL, new TamilAnalyzer()),
+ entry(Language.TELUGU, new TeluguAnalyzer()),
+ entry(Language.THAI, new ThaiAnalyzer()),
+ entry(Language.TURKISH, new TurkishAnalyzer())
+ );
+ }
+
+ public static DefaultAnalyzers getInstance() {
+ if (INSTANCE == null) {
+ INSTANCE = new DefaultAnalyzers();
+ }
+ return INSTANCE;
+ }
+
+ public Analyzer get(Language language) {
+ return analyzerClasses.get(language);
+ }
+
+ public Analyzer get(String languageCode) {
+ return analyzerClasses.get(Language.fromLanguageTag(languageCode));
+ }
+}