aboutsummaryrefslogtreecommitdiffstats
path: root/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
blob: e550d8aea4330e9a9746846e0674b9a40d7cb9de (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.lucene;

import com.yahoo.language.Language;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.et.EstonianAnalyzer;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.ga.IrishAnalyzer;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.lt.LithuanianAnalyzer;
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.ne.NepaliAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.sr.SerbianAnalyzer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.ta.TamilAnalyzer;
import org.apache.lucene.analysis.te.TeluguAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;

import java.util.Map;

import static java.util.Map.entry;

/**
 * @author dainiusjocas
 */
class DefaultAnalyzers {

    private final Map<Language, Analyzer> analyzerClasses;

    public DefaultAnalyzers() {
        analyzerClasses = Map.ofEntries(
                entry(Language.ARABIC, new ArabicAnalyzer()),
                entry(Language.BULGARIAN, new BulgarianAnalyzer()),
                entry(Language.BENGALI, new BengaliAnalyzer()),
                // analyzerClasses.put(Language.BRASILIAN, new BrazilianAnalyzer())
                entry(Language.CATALAN, new CatalanAnalyzer()),
                entry(Language.CHINESE_SIMPLIFIED, new CJKAnalyzer()),
                entry(Language.CHINESE_TRADITIONAL, new CJKAnalyzer()),
                entry(Language.JAPANESE, new CJKAnalyzer()),
                entry(Language.KOREAN, new CJKAnalyzer()),
                entry(Language.KURDISH, new SoraniAnalyzer()),
                entry(Language.CZECH, new CzechAnalyzer()),
                entry(Language.DANISH, new DanishAnalyzer()),
                entry(Language.GERMAN, new GermanAnalyzer()),
                entry(Language.GREEK, new GreekAnalyzer()),
                entry(Language.ENGLISH, new EnglishAnalyzer()),
                entry(Language.SPANISH, new SpanishAnalyzer()),
                entry(Language.ESTONIAN, new EstonianAnalyzer()),
                entry(Language.BASQUE, new BasqueAnalyzer()),
                entry(Language.PERSIAN, new PersianAnalyzer()),
                entry(Language.FINNISH, new FinnishAnalyzer()),
                entry(Language.FRENCH, new FrenchAnalyzer()),
                entry(Language.IRISH, new IrishAnalyzer()),
                entry(Language.GALICIAN, new GalicianAnalyzer()),
                entry(Language.HINDI, new HindiAnalyzer()),
                entry(Language.HUNGARIAN, new HungarianAnalyzer()),
                entry(Language.ARMENIAN, new ArmenianAnalyzer()),
                entry(Language.INDONESIAN, new IndonesianAnalyzer()),
                entry(Language.ITALIAN, new ItalianAnalyzer()),
                entry(Language.LITHUANIAN, new LithuanianAnalyzer()),
                entry(Language.LATVIAN, new LatvianAnalyzer()),
                entry(Language.NEPALI, new NepaliAnalyzer()),
                entry(Language.DUTCH, new DutchAnalyzer()),
                entry(Language.NORWEGIAN_BOKMAL, new NorwegianAnalyzer()),
                entry(Language.PORTUGUESE, new PortugueseAnalyzer()),
                entry(Language.ROMANIAN, new RomanianAnalyzer()),
                entry(Language.RUSSIAN, new RussianAnalyzer()),
                entry(Language.SERBIAN, new SerbianAnalyzer()),
                entry(Language.SWEDISH, new SwedishAnalyzer()),
                entry(Language.TAMIL, new TamilAnalyzer()),
                entry(Language.TELUGU, new TeluguAnalyzer()),
                entry(Language.THAI, new ThaiAnalyzer()),
                entry(Language.TURKISH, new TurkishAnalyzer())
        );
    }

    public Analyzer get(Language language) {
        return analyzerClasses.get(language);
    }

}