diff options
19 files changed, 208 insertions, 195 deletions
diff --git a/application/pom.xml b/application/pom.xml index 61cea1a1826..af25bda0f07 100644 --- a/application/pom.xml +++ b/application/pom.xml @@ -97,17 +97,6 @@ <scope>compile</scope> </dependency> <dependency> - <groupId>com.optimaize.languagedetector</groupId> - <artifactId>language-detector</artifactId> - <exclusions> - <exclusion> - <!-- We want to get this via jdisc-core --> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-api</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> <groupId>org.antlr</groupId> <artifactId>antlr-runtime</artifactId> </dependency> diff --git a/cloud-tenant-base-dependencies-enforcer/pom.xml b/cloud-tenant-base-dependencies-enforcer/pom.xml index da50fadbd16..b62d355480f 100644 --- a/cloud-tenant-base-dependencies-enforcer/pom.xml +++ b/cloud-tenant-base-dependencies-enforcer/pom.xml @@ -219,7 +219,6 @@ <include>com.ibm.icu:icu4j:57.1:jar:test</include> <include>com.intellij:annotations:12.0:jar:test</include> <include>com.microsoft.onnxruntime:onnxruntime:[${onnxruntime.version}]:jar:test</include> - <include>com.optimaize.languagedetector:language-detector:0.6:jar:test</include> <include>com.thaiopensource:jing:20091111:jar:test</include> <include>com.yahoo.athenz:athenz-auth-core:[${athenz.version}]:jar:test</include> <include>com.yahoo.athenz:athenz-client-common:[${athenz.version}]:jar:test</include> @@ -249,7 +248,7 @@ <include>org.apache.httpcomponents.core5:httpcore5-h2:${httpclient5.version}:jar:test</include> <include>org.apache.httpcomponents:httpclient:4.5.12:jar:test</include> <include>org.apache.httpcomponents:httpcore:4.4.13:jar:test</include> - <include>org.apache.opennlp:opennlp-tools:1.8.4:jar:test</include> + <include>org.apache.opennlp:opennlp-tools:1.9.3:jar:test</include> <include>org.apiguardian:apiguardian-api:1.1.0:jar:test</include> <include>org.codehaus.woodstox:stax2-api:3.1.4:jar:test</include> <include>org.eclipse.jetty.alpn:alpn-api:[${jetty-alpn.version}]:jar:test</include> diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java index e8be43fdc96..8396aab022e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java @@ -105,6 +105,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat addSimpleComponent("com.yahoo.language.provider.DefaultLinguisticsProvider"); addSimpleComponent("com.yahoo.language.provider.DefaultEmbedderProvider"); + addSimpleComponent("com.yahoo.language.opennlp.LangDetectModel183"); addSimpleComponent("com.yahoo.container.jdisc.SecretStoreProvider"); addSimpleComponent("com.yahoo.container.jdisc.DeprecatedSecretStoreProvider"); addSimpleComponent("com.yahoo.container.jdisc.CertificateStoreProvider"); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/ContainerClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/ContainerClusterTest.java index 560ac28b6f7..94344172a1f 100755 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/ContainerClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/ContainerClusterTest.java @@ -205,7 +205,9 @@ public class ContainerClusterTest { MockRoot root = createRoot(false); ClusterControllerContainerCluster cluster = createClusterControllerCluster(root); addClusterController(root.deployLogger(), cluster, "host-c1", root.getDeployState()); + assertFalse(contains("com.yahoo.language.provider.DefaultLinguisticsProvider", cluster.getAllComponents())); + assertFalse(contains("com.yahoo.language.opennlp.LangDetectModel183", cluster.getAllComponents())); } private static boolean contains(String componentId, Collection<Component<?, ?>> componentList) { diff --git a/container-core/src/main/java/com/yahoo/language/provider/DefaultLinguisticsProvider.java b/container-core/src/main/java/com/yahoo/language/provider/DefaultLinguisticsProvider.java index 563c9a8bdff..11ed2157401 100644 --- a/container-core/src/main/java/com/yahoo/language/provider/DefaultLinguisticsProvider.java +++ b/container-core/src/main/java/com/yahoo/language/provider/DefaultLinguisticsProvider.java @@ -6,6 +6,7 @@ import com.google.common.base.Suppliers; import com.google.inject.Inject; import com.yahoo.container.di.componentgraph.Provider; import com.yahoo.language.Linguistics; +import com.yahoo.language.opennlp.LangDetectModel; import com.yahoo.language.opennlp.OpenNlpLinguistics; /** @@ -18,10 +19,12 @@ import com.yahoo.language.opennlp.OpenNlpLinguistics; public class DefaultLinguisticsProvider implements Provider<Linguistics> { // Use lazy initialization to avoid expensive (memory-wise) instantiation - private final Supplier<Linguistics> linguisticsSupplier = Suppliers.memoize(OpenNlpLinguistics::new); + private final Supplier<Linguistics> linguisticsSupplier; @Inject - public DefaultLinguisticsProvider() { } + public DefaultLinguisticsProvider(LangDetectModel detectorModel) { + linguisticsSupplier = Suppliers.memoize(() -> new OpenNlpLinguistics(detectorModel)); + } @Override public Linguistics get() { return linguisticsSupplier.get(); } diff --git a/container-dev/pom.xml b/container-dev/pom.xml index a76f295a7b1..034081f4620 100644 --- a/container-dev/pom.xml +++ b/container-dev/pom.xml @@ -194,10 +194,6 @@ <version>${project.version}</version> <exclusions> <exclusion> - <groupId>com.optimaize.languagedetector</groupId> - <artifactId>language-detector</artifactId> - </exclusion> - <exclusion> <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-tools</artifactId> </exclusion> diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java new file mode 100644 index 00000000000..c9e78259336 --- /dev/null +++ b/linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java @@ -0,0 +1,29 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.opennlp; + +import opennlp.tools.langdetect.LanguageDetectorModel; + +import java.io.IOException; +import java.io.UncheckedIOException; + +public class LangDetectModel183 implements LangDetectModel { + + private final Object monitor = new Object(); + private LanguageDetectorModel loaded; + + @Override + public LanguageDetectorModel load() { + synchronized (monitor) { + if (loaded == null) { + try { + loaded = new LanguageDetectorModel(LangDetectModel183.class.getResourceAsStream("/models/langdetect-183.bin")); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } + return loaded; + } + +} diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java new file mode 100644 index 00000000000..9606578b3ac --- /dev/null +++ b/linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java @@ -0,0 +1,5 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.language.opennlp; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/linguistics-components/src/main/resources/models/langdetect-183.bin b/linguistics-components/src/main/resources/models/langdetect-183.bin Binary files differnew file mode 100644 index 00000000000..0b4ea89690e --- /dev/null +++ b/linguistics-components/src/main/resources/models/langdetect-183.bin diff --git a/linguistics-components/src/test/java/com/yahoo/language/opennlp/OpenNlpDetectorTestCase.java b/linguistics-components/src/test/java/com/yahoo/language/opennlp/OpenNlpDetectorTestCase.java new file mode 100644 index 00000000000..5b095ae0681 --- /dev/null +++ b/linguistics-components/src/test/java/com/yahoo/language/opennlp/OpenNlpDetectorTestCase.java @@ -0,0 +1,62 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.opennlp; + +import com.yahoo.language.Language; +import com.yahoo.language.detect.Detector; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +/** + * @author jonmv + */ +public class OpenNlpDetectorTestCase { + + @Test + public void testDetection() { + Detector detector = new OpenNlpDetector(new LangDetectModel183().load()); + + assertLanguage(Language.UNKNOWN, + "", + detector); + + assertLanguage(Language.UNKNOWN, + "Hello!", + detector); + + // from https://en.wikipedia.org/wiki/Yahoo + assertLanguage(Language.ENGLISH, + "Yahoo became a public company via an initial public offering in April 1996 and its stock price rose 600% within two years.", + detector); + + // from https://de.wikipedia.org/wiki/Yahoo + assertLanguage(Language.GERMAN, + "1996 ging Yahoo mit 46 Angestellten an die Börse. 2009 arbeiteten insgesamt rund 13.500 Mitarbeiter für Yahoo.", + detector); + + // from https://fr.wikipedia.org/wiki/Yahoo + assertLanguage(Language.FRENCH, + "À l'origine, Yahoo! était uniquement un annuaire Web.", + detector); + + // Test fallback to SimpleDetector + assertLanguage(Language.CHINESE_TRADITIONAL, // CHINESE_SIMPLIFIED input + "\u6211\u80FD\u541E\u4E0B\u73BB\u7483\u800C\u4E0D\u4F24\u8EAB\u4F53\u3002", + detector); + + // from https://ru.wikipedia.org/wiki/%D0%A0%D0%BE%D1%81%D1%81%D0%B8%D1%8F + assertLanguage(Language.RUSSIAN, + "7 февраля 2000 года Yahoo.com подвергся DDoS атаке и на несколько часов приостановил работу.", + detector); + + // https://he.wikipedia.org/wiki/Yahoo! + assertLanguage(Language.HEBREW, + "אתר יאהו! הוא אחד מאתרי האינטרנט הפופולריים ביותר בעולם, עם מעל 500 מיליון כניסות בכל יום", + detector); + } + + private void assertLanguage(Language language, String input, Detector detector) { + assertEquals(language, detector.detect(input, null).getLanguage()); + } + +} diff --git a/linguistics/pom.xml b/linguistics/pom.xml index a09f2ecb031..c48054911a2 100644 --- a/linguistics/pom.xml +++ b/linguistics/pom.xml @@ -61,10 +61,6 @@ <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-tools</artifactId> </dependency> - <dependency> - <groupId>com.optimaize.languagedetector</groupId> - <artifactId>language-detector</artifactId> - </dependency> </dependencies> <build> <plugins> diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java b/linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java new file mode 100644 index 00000000000..70664b8f79f --- /dev/null +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java @@ -0,0 +1,13 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.opennlp; + +import opennlp.tools.langdetect.LanguageDetectorModel; + +/** + * @author jonmv + */ +public interface LangDetectModel { + + LanguageDetectorModel load(); + +} diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java new file mode 100644 index 00000000000..e0c0960b920 --- /dev/null +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java @@ -0,0 +1,77 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.opennlp; + +import com.yahoo.language.Language; +import com.yahoo.language.detect.Detection; +import com.yahoo.language.detect.Detector; +import com.yahoo.language.detect.Hint; +import com.yahoo.language.simple.SimpleDetector; +import opennlp.tools.cmdline.langdetect.LanguageDetectorModelLoader; +import opennlp.tools.langdetect.LanguageDetectorConfig; +import opennlp.tools.langdetect.LanguageDetectorME; +import opennlp.tools.langdetect.LanguageDetectorModel; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * Detects the language of some sample text using {@link SimpleDetector} for CJK input, and OpenNLP otherwise. + * + * @author jonmv + */ +class OpenNlpDetector implements Detector { + + private final SimpleDetector simple = new SimpleDetector(); + private final Map<String, Language> languagesByISO3 = new HashMap<>(); + private final LanguageDetectorME detector; + private final LanguageDetectorConfig config; + + OpenNlpDetector(LanguageDetectorModel model) { + detector = new LanguageDetectorME(model); + config = new LanguageDetectorConfig(); + config.setMinDiff(0.02); + config.setChunkSize(64); + for (Locale locale : Locale.getAvailableLocales()) + languagesByISO3.put(locale.getISO3Language(), Language.fromLocale(locale)); + } + + @Override + public Detection detect(byte[] input, int offset, int length, Hint hint) { + Charset encoding = Charset.forName(simple.guessEncoding(input, offset, length)); + return new Detection(detectLanguage(new String(input, offset, length, encoding)), encoding.name(), false); + } + + @Override + public Detection detect(ByteBuffer input, Hint hint) { + if (input.hasArray()) + return detect(input.array(), input.arrayOffset() + input.position(), input.remaining(), hint); + + byte[] buffer = new byte[input.remaining()]; + input.get(buffer); + return detect(buffer, 0, buffer.length, hint); + } + + @Override + public Detection detect(String input, Hint hint) { + return new Detection(detectLanguage(input), UTF_8.name(), false); + } + + private Language detectLanguage(String input) { + Language simpleGuess = simple.guessLanguage(input); + if (simpleGuess != Language.UNKNOWN) + return simpleGuess; + + var prediction = detector.probingPredictLanguages(input, config).getLanguages()[0]; + return prediction.getConfidence() > 0.03 ? languagesByISO3.getOrDefault(prediction.getLang(), Language.UNKNOWN) + : Language.UNKNOWN; + } + +} diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java index a27e726cda8..7ee17559d88 100644 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java @@ -7,36 +7,22 @@ import com.yahoo.language.detect.Detector; import com.yahoo.language.process.Tokenizer; import com.yahoo.language.simple.SimpleDetector; import com.yahoo.language.simple.SimpleLinguistics; -import java.util.logging.Logger; -import java.util.logging.Level; +import opennlp.tools.langdetect.LanguageDetectorModel; /** - * Returns a linguistics implementation based on OpenNlp, - * and (optionally, default on) Optimaize for language detection. + * Returns a linguistics implementation based on OpenNlp. * * @author bratseth + * @author jonmv */ public class OpenNlpLinguistics extends SimpleLinguistics { - private static final Logger log = Logger.getLogger(OpenNlpLinguistics.class.getName()); private final Detector detector; - public OpenNlpLinguistics() { - this(true); - } - @Inject - public OpenNlpLinguistics(OpennlpLinguisticsConfig config) { - this(config.detector().enableOptimaize()); - } - - public OpenNlpLinguistics(boolean enableOptimaize) { - this(enableOptimaize ? new OptimaizeDetector() : new SimpleDetector()); - log.log(Level.FINE, "using "+(enableOptimaize ? "Optimaize" : "Simple")+" detector"); - } - - private OpenNlpLinguistics(Detector detector) { - this.detector = detector; + public OpenNlpLinguistics(LangDetectModel model) { + LanguageDetectorModel loaded = model.load(); + this.detector = loaded != null ? new OpenNlpDetector(loaded) : new SimpleDetector(); } @Override diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java deleted file mode 100644 index 83947c795fb..00000000000 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/OptimaizeDetector.java +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.language.opennlp; - -import com.google.common.base.Optional; -import com.optimaize.langdetect.LanguageDetector; -import com.optimaize.langdetect.LanguageDetectorBuilder; -import com.optimaize.langdetect.i18n.LdLocale; -import com.optimaize.langdetect.ngram.NgramExtractors; -import com.optimaize.langdetect.profiles.LanguageProfile; -import com.optimaize.langdetect.profiles.LanguageProfileReader; -import com.optimaize.langdetect.text.CommonTextObjectFactories; -import com.optimaize.langdetect.text.TextObjectFactory; -import com.yahoo.language.Language; -import com.yahoo.language.detect.Detection; -import com.yahoo.language.detect.Detector; -import com.yahoo.language.detect.Hint; -import com.yahoo.language.simple.SimpleDetector; -import com.yahoo.text.Utf8; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.nio.ByteBuffer; -import java.util.List; -import java.util.Locale; -import java.util.logging.Logger; -import java.util.logging.Level; - -/** - * Detects the language of some sample text using SimpleDetector for CJK and Optimaize otherwise. - * - * @author bratseth - */ -public class OptimaizeDetector implements Detector { - - private static final Object initGuard = new Object(); - private static TextObjectFactory textObjectFactory = null; - private static LanguageDetector languageDetector = null; - private static final Logger log = Logger.getLogger(OptimaizeDetector.class.getName()); - - static private void initOptimaize() { - synchronized (initGuard) { - if ((textObjectFactory != null) && (languageDetector != null)) return; - - // origin: https://github.com/optimaize/language-detector - // load all languages: - List<LanguageProfile> languageProfiles; - try { - languageProfiles = new LanguageProfileReader().readAllBuiltIn(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - - //build language detector: - languageDetector = LanguageDetectorBuilder.create(NgramExtractors.standard()) - .withProfiles(languageProfiles) - .build(); - - //create a text object factory - textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); - } - } - - private final SimpleDetector simpleDetector = new SimpleDetector(); - - public OptimaizeDetector() { - initOptimaize(); - } - - @Override - public Detection detect(byte[] input, int offset, int length, Hint hint) { - return new Detection(guessLanguage(input, offset, length), simpleDetector.guessEncoding(input), false); - } - - @Override - public Detection detect(ByteBuffer input, Hint hint) { - byte[] buf = new byte[input.remaining()]; - input.get(buf, 0, buf.length); - return detect(buf, 0, buf.length, hint); - } - - @Override - public Detection detect(String input, Hint hint) { - return new Detection(guessLanguage(input), Utf8.getCharset().name(), false); - } - - private Language guessLanguage(byte[] buf, int offset, int length) { - return guessLanguage(Utf8.toString(buf, offset, length)); - } - - public Language guessLanguage(String input) { - if (input == null || input.length() == 0) return Language.UNKNOWN; - - Language result = simpleDetector.guessLanguage(input); - if (result != Language.UNKNOWN) return result; - - return guessLanguageUsingOptimaize(input); - } - - private static Language guessLanguageUsingOptimaize(String input) { - Optional<LdLocale> result = languageDetector.detect(textObjectFactory.forText(input)); - if ( ! result.isPresent()) return Language.UNKNOWN; - log.log(Level.FINE, () -> "guessing language "+result.get()+" from input: "+input); - - return Language.fromLocale(new Locale(result.get().getLanguage())); - } - -} diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java index 53b8ad7ad70..61d446cd8d0 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleDetector.java @@ -130,10 +130,14 @@ public class SimpleDetector implements Detector { } public String guessEncoding(byte[] input) { + return guessEncoding(input, 0, input.length); + } + + public String guessEncoding(byte[] input, int offset, int length) { boolean isUtf8 = true; boolean hasHighs = false; scan: - for (int i = 0; i < input.length; i++) { + for (int i = offset; i < offset + length; i++) { final int l = isLeadingFor(input[i]); if (l < 0 || i + l >= input.length) { hasHighs = true; diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java index 3ca46dcc4f1..b10beb8c9af 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java @@ -2,8 +2,7 @@ package com.yahoo.language.simple; import com.google.inject.Inject; -import com.yahoo.collections.Tuple2; -import com.yahoo.component.Version; +import com.yahoo.component.AbstractComponent; import com.yahoo.language.Linguistics; import com.yahoo.language.detect.Detector; import com.yahoo.language.process.CharacterClasses; @@ -16,7 +15,6 @@ import com.yahoo.language.process.Stemmer; import com.yahoo.language.process.StemmerImpl; import com.yahoo.language.process.Tokenizer; import com.yahoo.language.process.Transformer; -import com.yahoo.vespa.configdefinition.SpecialtokensConfig; import java.util.List; diff --git a/linguistics/src/test/java/com/yahoo/language/opennlp/OptimaizeDetectorTestCase.java b/linguistics/src/test/java/com/yahoo/language/opennlp/OptimaizeDetectorTestCase.java deleted file mode 100644 index 20b5de3b165..00000000000 --- a/linguistics/src/test/java/com/yahoo/language/opennlp/OptimaizeDetectorTestCase.java +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.language.opennlp; - -import com.yahoo.language.Language; -import com.yahoo.language.detect.Detector; -import org.junit.Test; - -import static org.junit.Assert.assertEquals; - -/** - * @author bratseth - */ -public class OptimaizeDetectorTestCase { - - private static final Detector detector = new OptimaizeDetector(); - - @Test - public void testDetection() { - assertLanguage(Language.UNKNOWN, "Hello!"); - - // Test fallback to SimpleDetector - assertLanguage(Language.CHINESE_TRADITIONAL, // CHINESE_SIMPLIFIED input - "\u6211\u80FD\u541E\u4E0B\u73BB\u7483\u800C\u4E0D\u4F24\u8EAB\u4F53\u3002"); - - // from https://ru.wikipedia.org/wiki/%D0%A0%D0%BE%D1%81%D1%81%D0%B8%D1%8F - assertLanguage(Language.RUSSIAN, "Материал из Википедии — свободной энциклопедии"); - // https://he.wikipedia.org/wiki/Yahoo! - assertLanguage(Language.HEBREW, "אתר יאהו! הוא אחד מאתרי האינטרנט הפופולריים ביותר בעולם, עם מעל 500 מיליון כניסות בכל יום"); - } - - private static void assertLanguage(Language language, String input) { - assertEquals(language, detector.detect(input, null).getLanguage()); - } - -} diff --git a/parent/pom.xml b/parent/pom.xml index 2cebaf21833..b1dcaab2d83 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -496,11 +496,6 @@ <version>${onnxruntime.version}</version> </dependency> <dependency> - <groupId>com.optimaize.languagedetector</groupId> - <artifactId>language-detector</artifactId> - <version>0.6</version> - </dependency> - <dependency> <groupId>com.yahoo.athenz</groupId> <artifactId>athenz-zms-java-client</artifactId> <version>${athenz.version}</version> @@ -704,7 +699,7 @@ <dependency> <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-tools</artifactId> - <version>1.8.4</version> + <version>1.9.3</version> </dependency> <dependency> <groupId>org.apache.velocity</groupId> |