aboutsummaryrefslogtreecommitdiffstats
path: root/lucene-linguistics
diff options
context:
space:
mode:
Diffstat (limited to 'lucene-linguistics')
-rw-r--r--lucene-linguistics/README.md1
-rw-r--r--lucene-linguistics/pom.xml13
-rw-r--r--lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java1
-rw-r--r--lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java7
-rw-r--r--lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java1
-rw-r--r--lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java1
-rw-r--r--lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java1
-rw-r--r--lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def1
-rw-r--r--lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java1
9 files changed, 15 insertions, 12 deletions
diff --git a/lucene-linguistics/README.md b/lucene-linguistics/README.md
index 3ada42f6125..192b6b52524 100644
--- a/lucene-linguistics/README.md
+++ b/lucene-linguistics/README.md
@@ -1,3 +1,4 @@
+<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
# Vespa Lucene Linguistics
Linguistics implementation based on the [Apache Lucene](https://lucene.apache.org).
diff --git a/lucene-linguistics/pom.xml b/lucene-linguistics/pom.xml
index 18f2b1a8574..50b850b93d2 100644
--- a/lucene-linguistics/pom.xml
+++ b/lucene-linguistics/pom.xml
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
@@ -41,29 +42,19 @@
</dependency>
<dependency>
<groupId>com.yahoo.vespa</groupId>
- <artifactId>configdefinitions</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>com.yahoo.vespa</groupId>
<artifactId>annotations</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.yahoo.vespa</groupId>
- <artifactId>vespajlib</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>com.yahoo.vespa</groupId>
<artifactId>linguistics</artifactId>
<version>${project.version}</version>
+ <scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
-
<scope>provided</scope>
</dependency>
<dependency>
diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java
index 92ea77cdc13..dd338fb7d44 100644
--- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java
+++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java
@@ -1,3 +1,4 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.lucene;
import com.yahoo.component.provider.ComponentRegistry;
diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
index 95b11301d47..e550d8aea43 100644
--- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
+++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java
@@ -1,3 +1,4 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.lucene;
import com.yahoo.language.Language;
@@ -6,6 +7,7 @@ import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
+import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.da.DanishAnalyzer;
@@ -58,7 +60,10 @@ class DefaultAnalyzers {
entry(Language.BENGALI, new BengaliAnalyzer()),
// analyzerClasses.put(Language.BRASILIAN, new BrazilianAnalyzer())
entry(Language.CATALAN, new CatalanAnalyzer()),
- // cjk analyzer?
+ entry(Language.CHINESE_SIMPLIFIED, new CJKAnalyzer()),
+ entry(Language.CHINESE_TRADITIONAL, new CJKAnalyzer()),
+ entry(Language.JAPANESE, new CJKAnalyzer()),
+ entry(Language.KOREAN, new CJKAnalyzer()),
entry(Language.KURDISH, new SoraniAnalyzer()),
entry(Language.CZECH, new CzechAnalyzer()),
entry(Language.DANISH, new DanishAnalyzer()),
diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java
index 8b193c103d6..6d184d9ddb2 100644
--- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java
+++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java
@@ -1,3 +1,4 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.lucene;
import com.google.inject.Inject;
diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java
index c1fa4da4989..0beb850ca6e 100644
--- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java
+++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java
@@ -1,3 +1,4 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.lucene;
import com.yahoo.component.provider.ComponentRegistry;
diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java
index 14330723224..9977a5e4710 100644
--- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java
+++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java
@@ -1,3 +1,4 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
@ExportPackage
package com.yahoo.language.lucene;
diff --git a/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def b/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def
index 081d93ec580..19cf5087ad1 100644
--- a/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def
+++ b/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def
@@ -1,3 +1,4 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package=com.yahoo.language.lucene
# The schema ("type") for an application specified config type
diff --git a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
index fc29fcc0071..44bed2d4a75 100644
--- a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
+++ b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
@@ -1,3 +1,4 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.lucene;
import com.yahoo.component.provider.ComponentRegistry;