summaryrefslogtreecommitdiffstats
path: root/linguistics
diff options
context:
space:
mode:
authorJon Marius Venstad <venstad@gmail.com>2021-12-20 13:41:21 +0100
committerJon Marius Venstad <venstad@gmail.com>2021-12-20 13:41:21 +0100
commit76c192666396a934dc0d419a81e3c67a8e82509d (patch)
treec76f0a0b66954fcdf89851f1c71a61892304df8b /linguistics
parent5e63f91b4a806931d23d43f24fefb0790802535e (diff)
Use smaller chunks for faster detection
Diffstat (limited to 'linguistics')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java4
1 files changed, 2 insertions, 2 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java
index cb4580a9c01..d7a7d3a4744 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpDetector.java
@@ -21,7 +21,7 @@ import java.util.Map;
import static java.nio.charset.StandardCharsets.UTF_8;
/**
- * Detects the language of some sample text using {@link SimpleDetector} for CJK input, and OpenNLP otherwise.
+ * Detects text language using patched OpenNLP, with fallback to {@link SimpleDetector} for undetected CJK input.
*
* @author jonmv
*/
@@ -39,7 +39,7 @@ class OpenNlpDetector implements Detector {
detector = new LanguageDetectorME(loadModel());
config = new LanguageDetectorConfig();
config.setMinDiff(0.02);
- config.setChunkSize(64);
+ config.setChunkSize(32);
config.setMaxLength(256);
for (Locale locale : Locale.getAvailableLocales()) {
Language language = Language.fromLocale(locale);