summaryrefslogtreecommitdiffstats
path: root/lucene-linguistics
diff options
context:
space:
mode:
authorDainius Jocas <dainius.jocas@gmail.com>2023-10-02 12:03:28 +0300
committerDainius Jocas <dainius.jocas@gmail.com>2023-10-02 12:03:28 +0300
commitc00ce99dffab347e0466c33de4a8eeda58fb4bd9 (patch)
tree6611154edc9e57aaa77dc637e6e31a346f603050 /lucene-linguistics
parent9f846510fcbd73285e87df0404c8477e66c836d5 (diff)
lucene-linguistics: add default Chinese, Japanese, and Korean support
Diffstat (limited to 'lucene-linguistics')
-rw-r--r--lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java6
1 files changed, 5 insertions, 1 deletions
diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
index 95b11301d47..82d3cad7fdb 100644
--- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
+++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
@@ -6,6 +6,7 @@ import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
+import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.da.DanishAnalyzer;
@@ -58,7 +59,10 @@ class DefaultAnalyzers {
entry(Language.BENGALI, new BengaliAnalyzer()),
// analyzerClasses.put(Language.BRASILIAN, new BrazilianAnalyzer())
entry(Language.CATALAN, new CatalanAnalyzer()),
- // cjk analyzer?
+ entry(Language.CHINESE_SIMPLIFIED, new CJKAnalyzer()),
+ entry(Language.CHINESE_TRADITIONAL, new CJKAnalyzer()),
+ entry(Language.JAPANESE, new CJKAnalyzer()),
+ entry(Language.KOREAN, new CJKAnalyzer()),
entry(Language.KURDISH, new SoraniAnalyzer()),
entry(Language.CZECH, new CzechAnalyzer()),
entry(Language.DANISH, new DanishAnalyzer()),