diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2021-12-20 13:41:21 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2021-12-20 13:41:21 +0100 |
commit | 76c192666396a934dc0d419a81e3c67a8e82509d (patch) | |
tree | c76f0a0b66954fcdf89851f1c71a61892304df8b /linguistics/src | |
parent | 5e63f91b4a806931d23d43f24fefb0790802535e (diff) |
Use smaller chunks for faster detection
Diffstat (limited to 'linguistics/src')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java index cb4580a9c01..d7a7d3a4744 100644 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java @@ -21,7 +21,7 @@ import java.util.Map; import static java.nio.charset.StandardCharsets.UTF_8; /** - * Detects the language of some sample text using {@link SimpleDetector} for CJK input, and OpenNLP otherwise. + * Detects text language using patched OpenNLP, with fallback to {@link SimpleDetector} for undetected CJK input. * * @author jonmv */ @@ -39,7 +39,7 @@ class OpenNlpDetector implements Detector { detector = new LanguageDetectorME(loadModel()); config = new LanguageDetectorConfig(); config.setMinDiff(0.02); - config.setChunkSize(64); + config.setChunkSize(32); config.setMaxLength(256); for (Locale locale : Locale.getAvailableLocales()) { Language language = Language.fromLocale(locale); |