diff options
author | Dainius Jocas <dainius.jocas@gmail.com> | 2023-10-02 12:03:28 +0300 |
---|---|---|
committer | Dainius Jocas <dainius.jocas@gmail.com> | 2023-10-02 12:03:28 +0300 |
commit | c00ce99dffab347e0466c33de4a8eeda58fb4bd9 (patch) | |
tree | 6611154edc9e57aaa77dc637e6e31a346f603050 /lucene-linguistics | |
parent | 9f846510fcbd73285e87df0404c8477e66c836d5 (diff) |
lucene-linguistics: add default Chinese, Japanese, and Korean support
Diffstat (limited to 'lucene-linguistics')
-rw-r--r-- | lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java | 6 |
1 files changed, 5 insertions, 1 deletions
diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java index 95b11301d47..82d3cad7fdb 100644 --- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java @@ -6,6 +6,7 @@ import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.bn.BengaliAnalyzer; import org.apache.lucene.analysis.ca.CatalanAnalyzer; +import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.ckb.SoraniAnalyzer; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.da.DanishAnalyzer; @@ -58,7 +59,10 @@ class DefaultAnalyzers { entry(Language.BENGALI, new BengaliAnalyzer()), // analyzerClasses.put(Language.BRASILIAN, new BrazilianAnalyzer()) entry(Language.CATALAN, new CatalanAnalyzer()), - // cjk analyzer? + entry(Language.CHINESE_SIMPLIFIED, new CJKAnalyzer()), + entry(Language.CHINESE_TRADITIONAL, new CJKAnalyzer()), + entry(Language.JAPANESE, new CJKAnalyzer()), + entry(Language.KOREAN, new CJKAnalyzer()), entry(Language.KURDISH, new SoraniAnalyzer()), entry(Language.CZECH, new CzechAnalyzer()), entry(Language.DANISH, new DanishAnalyzer()), |