aboutsummaryrefslogtreecommitdiffstats
path: root/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
blob: 95b11301d47a90134744513e10ca438e6d97d625 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
package com.yahoo.language.lucene;

import com.yahoo.language.Language;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.et.EstonianAnalyzer;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.ga.IrishAnalyzer;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.lt.LithuanianAnalyzer;
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.ne.NepaliAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.sr.SerbianAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.ta.TamilAnalyzer;
import org.apache.lucene.analysis.te.TeluguAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;

import java.util.Map;

import static java.util.Map.entry;

/**
 * @author dainiusjocas
 */
class DefaultAnalyzers {

    private final Map<Language, Analyzer> analyzerClasses;

    public DefaultAnalyzers() {
        analyzerClasses = Map.ofEntries(
                entry(Language.ARABIC, new ArabicAnalyzer()),
                entry(Language.BULGARIAN, new BulgarianAnalyzer()),
                entry(Language.BENGALI, new BengaliAnalyzer()),
                // analyzerClasses.put(Language.BRASILIAN, new BrazilianAnalyzer())
                entry(Language.CATALAN, new CatalanAnalyzer()),
                // cjk analyzer?
                entry(Language.KURDISH, new SoraniAnalyzer()),
                entry(Language.CZECH, new CzechAnalyzer()),
                entry(Language.DANISH, new DanishAnalyzer()),
                entry(Language.GERMAN, new GermanAnalyzer()),
                entry(Language.GREEK, new GreekAnalyzer()),
                entry(Language.ENGLISH, new EnglishAnalyzer()),
                entry(Language.SPANISH, new SpanishAnalyzer()),
                entry(Language.ESTONIAN, new EstonianAnalyzer()),
                entry(Language.BASQUE, new BasqueAnalyzer()),
                entry(Language.PERSIAN, new PersianAnalyzer()),
                entry(Language.FINNISH, new FinnishAnalyzer()),
                entry(Language.FRENCH, new FrenchAnalyzer()),
                entry(Language.IRISH, new IrishAnalyzer()),
                entry(Language.GALICIAN, new GalicianAnalyzer()),
                entry(Language.HINDI, new HindiAnalyzer()),
                entry(Language.HUNGARIAN, new HungarianAnalyzer()),
                entry(Language.ARMENIAN, new ArmenianAnalyzer()),
                entry(Language.INDONESIAN, new IndonesianAnalyzer()),
                entry(Language.ITALIAN, new ItalianAnalyzer()),
                entry(Language.LITHUANIAN, new LithuanianAnalyzer()),
                entry(Language.LATVIAN, new LatvianAnalyzer()),
                entry(Language.NEPALI, new NepaliAnalyzer()),
                entry(Language.DUTCH, new DutchAnalyzer()),
                entry(Language.NORWEGIAN_BOKMAL, new NorwegianAnalyzer()),
                entry(Language.PORTUGUESE, new PortugueseAnalyzer()),
                entry(Language.ROMANIAN, new RomanianAnalyzer()),
                entry(Language.RUSSIAN, new RussianAnalyzer()),
                entry(Language.SERBIAN, new SerbianAnalyzer()),
                entry(Language.SWEDISH, new SwedishAnalyzer()),
                entry(Language.TAMIL, new TamilAnalyzer()),
                entry(Language.TELUGU, new TeluguAnalyzer()),
                entry(Language.THAI, new ThaiAnalyzer()),
                entry(Language.TURKISH, new TurkishAnalyzer())
        );
    }

    public Analyzer get(Language language) {
        return analyzerClasses.get(language);
    }

}