summaryrefslogtreecommitdiffstats
path: root/linguistics
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2021-12-18 08:08:47 +0100
committerJon Marius Venstad <venstad@gmail.com>2021-12-18 08:08:47 +0100
commitdf1c80b0987a8b7a82a2adc8e29befc05f2a326c (patch)
tree4aa6b122cc61f0ce00548c33d5d62ac90cc8e3ac /linguistics
parentd2cd1f45cddb57e9f48b07610d7ef70f3c394dac (diff)
Move model to module where it is needed, to simplify, at the cost of larger bundles
Diffstat (limited to 'linguistics')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java16
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java23
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java4
3 files changed, 21 insertions, 22 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java b/linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java
deleted file mode 100644
index 144b4612005..00000000000
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/LangDetectModel.java
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.language.opennlp;
-
-import opennlp.tools.langdetect.LanguageDetectorModel;
-
-/**
- * Wrapper to lazily load a langdetect model for OpenNLP.
- *
- * @author jonmv
- */
-public interface LangDetectModel {
-
- /** Loads a {@link LanguageDetectorModel}, or throws if this fails. */
- LanguageDetectorModel load();
-
-}
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java
index e0c0960b920..849452aeafd 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java
@@ -6,7 +6,6 @@ import com.yahoo.language.detect.Detection;
import com.yahoo.language.detect.Detector;
import com.yahoo.language.detect.Hint;
import com.yahoo.language.simple.SimpleDetector;
-import opennlp.tools.cmdline.langdetect.LanguageDetectorModelLoader;
import opennlp.tools.langdetect.LanguageDetectorConfig;
import opennlp.tools.langdetect.LanguageDetectorME;
import opennlp.tools.langdetect.LanguageDetectorModel;
@@ -15,7 +14,6 @@ import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
-import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
@@ -29,13 +27,16 @@ import static java.nio.charset.StandardCharsets.UTF_8;
*/
class OpenNlpDetector implements Detector {
+ private static final Object monitor = new Object();
+ private static LanguageDetectorModel model;
+
private final SimpleDetector simple = new SimpleDetector();
private final Map<String, Language> languagesByISO3 = new HashMap<>();
private final LanguageDetectorME detector;
private final LanguageDetectorConfig config;
- OpenNlpDetector(LanguageDetectorModel model) {
- detector = new LanguageDetectorME(model);
+ OpenNlpDetector() {
+ detector = new LanguageDetectorME(loadModel());
config = new LanguageDetectorConfig();
config.setMinDiff(0.02);
config.setChunkSize(64);
@@ -43,6 +44,20 @@ class OpenNlpDetector implements Detector {
languagesByISO3.put(locale.getISO3Language(), Language.fromLocale(locale));
}
+ private static LanguageDetectorModel loadModel() {
+ synchronized (monitor) {
+ if (model == null) {
+ try {
+ model = new LanguageDetectorModel(OpenNlpDetector.class.getResourceAsStream("/models/langdetect-183.bin"));
+ }
+ catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+ }
+ return model;
+ }
+
@Override
public Detection detect(byte[] input, int offset, int length, Hint hint) {
Charset encoding = Charset.forName(simple.guessEncoding(input, offset, length));
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java
index 1f4ec4e261b..c749679024a 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpLinguistics.java
@@ -20,8 +20,8 @@ public class OpenNlpLinguistics extends SimpleLinguistics {
private final Detector detector;
@Inject
- public OpenNlpLinguistics(LangDetectModel model) {
- this.detector = new OpenNlpDetector(model.load());
+ public OpenNlpLinguistics() {
+ this.detector = new OpenNlpDetector();
}
@Override