diff options
Diffstat (limited to 'lucene-linguistics')
9 files changed, 15 insertions, 12 deletions
diff --git a/lucene-linguistics/README.md b/lucene-linguistics/README.md index 3ada42f6125..192b6b52524 100644 --- a/lucene-linguistics/README.md +++ b/lucene-linguistics/README.md @@ -1,3 +1,4 @@ +<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> # Vespa Lucene Linguistics Linguistics implementation based on the [Apache Lucene](https://lucene.apache.org). diff --git a/lucene-linguistics/pom.xml b/lucene-linguistics/pom.xml index 18f2b1a8574..50b850b93d2 100644 --- a/lucene-linguistics/pom.xml +++ b/lucene-linguistics/pom.xml @@ -1,4 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> +<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. --> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> @@ -41,29 +42,19 @@ </dependency> <dependency> <groupId>com.yahoo.vespa</groupId> - <artifactId>configdefinitions</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>com.yahoo.vespa</groupId> <artifactId>annotations</artifactId> <version>${project.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>com.yahoo.vespa</groupId> - <artifactId>vespajlib</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>com.yahoo.vespa</groupId> <artifactId>linguistics</artifactId> <version>${project.version}</version> + <scope>provided</scope> </dependency> <dependency> <groupId>com.google.inject</groupId> <artifactId>guice</artifactId> - <scope>provided</scope> </dependency> <dependency> diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java index 92ea77cdc13..dd338fb7d44 100644 --- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/AnalyzerFactory.java @@ -1,3 +1,4 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.lucene; import com.yahoo.component.provider.ComponentRegistry; diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java index 95b11301d47..e550d8aea43 100644 --- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/DefaultAnalyzers.java @@ -1,3 +1,4 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.lucene; import com.yahoo.language.Language; @@ -6,6 +7,7 @@ import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.bn.BengaliAnalyzer; import org.apache.lucene.analysis.ca.CatalanAnalyzer; +import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.ckb.SoraniAnalyzer; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.da.DanishAnalyzer; @@ -58,7 +60,10 @@ class DefaultAnalyzers { entry(Language.BENGALI, new BengaliAnalyzer()), // analyzerClasses.put(Language.BRASILIAN, new BrazilianAnalyzer()) entry(Language.CATALAN, new CatalanAnalyzer()), - // cjk analyzer? + entry(Language.CHINESE_SIMPLIFIED, new CJKAnalyzer()), + entry(Language.CHINESE_TRADITIONAL, new CJKAnalyzer()), + entry(Language.JAPANESE, new CJKAnalyzer()), + entry(Language.KOREAN, new CJKAnalyzer()), entry(Language.KURDISH, new SoraniAnalyzer()), entry(Language.CZECH, new CzechAnalyzer()), entry(Language.DANISH, new DanishAnalyzer()), diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java index 8b193c103d6..6d184d9ddb2 100644 --- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneLinguistics.java @@ -1,3 +1,4 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.lucene; import com.google.inject.Inject; diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java index c1fa4da4989..0beb850ca6e 100644 --- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/LuceneTokenizer.java @@ -1,3 +1,4 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.lucene; import com.yahoo.component.provider.ComponentRegistry; diff --git a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java index 14330723224..9977a5e4710 100644 --- a/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java +++ b/lucene-linguistics/src/main/java/com/yahoo/language/lucene/package-info.java @@ -1,3 +1,4 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. @ExportPackage package com.yahoo.language.lucene; diff --git a/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def b/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def index 081d93ec580..19cf5087ad1 100644 --- a/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def +++ b/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def @@ -1,3 +1,4 @@ +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package=com.yahoo.language.lucene # The schema ("type") for an application specified config type diff --git a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java index fc29fcc0071..44bed2d4a75 100644 --- a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java +++ b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java @@ -1,3 +1,4 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.language.lucene; import com.yahoo.component.provider.ComponentRegistry; |