diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2021-12-18 08:08:47 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2021-12-18 08:08:47 +0100 |
commit | df1c80b0987a8b7a82a2adc8e29befc05f2a326c (patch) | |
tree | 4aa6b122cc61f0ce00548c33d5d62ac90cc8e3ac /linguistics/src/main/java/com/yahoo | |
parent | d2cd1f45cddb57e9f48b07610d7ef70f3c394dac (diff) |
Move model to module where it is needed, to simplify, at the cost of larger bundles
Diffstat (limited to 'linguistics/src/main/java/com/yahoo')
3 files changed, 21 insertions, 22 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java b/linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java deleted file mode 100644 index 144b4612005..00000000000 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.language.opennlp; - -import opennlp.tools.langdetect.LanguageDetectorModel; - -/** - * Wrapper to lazily load a langdetect model for OpenNLP. - * - * @author jonmv - */ -public interface LangDetectModel { - - /** Loads a {@link LanguageDetectorModel}, or throws if this fails. */ - LanguageDetectorModel load(); - -} diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java index e0c0960b920..849452aeafd 100644 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java @@ -6,7 +6,6 @@ import com.yahoo.language.detect.Detection; import com.yahoo.language.detect.Detector; import com.yahoo.language.detect.Hint; import com.yahoo.language.simple.SimpleDetector; -import opennlp.tools.cmdline.langdetect.LanguageDetectorModelLoader; import opennlp.tools.langdetect.LanguageDetectorConfig; import opennlp.tools.langdetect.LanguageDetectorME; import opennlp.tools.langdetect.LanguageDetectorModel; @@ -15,7 +14,6 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.nio.charset.Charset; -import java.nio.file.Paths; import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -29,13 +27,16 @@ import static java.nio.charset.StandardCharsets.UTF_8; */ class OpenNlpDetector implements Detector { + private static final Object monitor = new Object(); + private static LanguageDetectorModel model; + private final SimpleDetector simple = new SimpleDetector(); private final Map<String, Language> languagesByISO3 = new HashMap<>(); private final LanguageDetectorME detector; private final LanguageDetectorConfig config; - OpenNlpDetector(LanguageDetectorModel model) { - detector = new LanguageDetectorME(model); + OpenNlpDetector() { + detector = new LanguageDetectorME(loadModel()); config = new LanguageDetectorConfig(); config.setMinDiff(0.02); config.setChunkSize(64); @@ -43,6 +44,20 @@ class OpenNlpDetector implements Detector { languagesByISO3.put(locale.getISO3Language(), Language.fromLocale(locale)); } + private static LanguageDetectorModel loadModel() { + synchronized (monitor) { + if (model == null) { + try { + model = new LanguageDetectorModel(OpenNlpDetector.class.getResourceAsStream("/models/langdetect-183.bin")); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } + return model; + } + @Override public Detection detect(byte[] input, int offset, int length, Hint hint) { Charset encoding = Charset.forName(simple.guessEncoding(input, offset, length)); diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java index 1f4ec4e261b..c749679024a 100644 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java @@ -20,8 +20,8 @@ public class OpenNlpLinguistics extends SimpleLinguistics { private final Detector detector; @Inject - public OpenNlpLinguistics(LangDetectModel model) { - this.detector = new OpenNlpDetector(model.load()); + public OpenNlpLinguistics() { + this.detector = new OpenNlpDetector(); } @Override |