summaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java
diff options
context:
space:
mode:
authorArne Juul <arnej@yahooinc.com>2022-10-06 14:01:37 +0000
committerArne Juul <arnej@yahooinc.com>2022-10-06 14:19:34 +0000
commit91629f81f8425b46e71026b1e733dad2f8ea270c (patch)
tree9bccfe6fcb47ce668c576ee29da8afa2ebea1037 /linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java
parent3cbbac35a188b578f1360ede59de6175b5d43665 (diff)
much simpler CharSequenceNormalizer
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java10
1 files changed, 1 insertions, 9 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java b/linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java
index 305aead056b..0cf4634c6c3 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java
@@ -2,10 +2,6 @@
package com.yahoo.language.opennlp;
import opennlp.tools.langdetect.LanguageDetectorContextGenerator;
-import opennlp.tools.util.normalizer.EmojiCharSequenceNormalizer;
-import opennlp.tools.util.normalizer.NumberCharSequenceNormalizer;
-import opennlp.tools.util.normalizer.ShrinkCharSequenceNormalizer;
-import opennlp.tools.util.normalizer.TwitterCharSequenceNormalizer;
/**
* Overrides the UrlCharSequenceNormalizer, which has a bad regex, until fixed: https://issues.apache.org/jira/browse/OPENNLP-1350
@@ -18,11 +14,7 @@ public class LanguageDetectorFactory extends opennlp.tools.langdetect.LanguageDe
@Override
public LanguageDetectorContextGenerator getContextGenerator() {
return new DefaultLanguageDetectorContextGenerator(1, 3,
- EmojiCharSequenceNormalizer.getInstance(),
- UrlCharSequenceNormalizer.getInstance(),
- TwitterCharSequenceNormalizer.getInstance(),
- NumberCharSequenceNormalizer.getInstance(),
- ShrinkCharSequenceNormalizer.getInstance());
+ VespaCharSequenceNormalizer.getInstance());
}
}