diff options
author | Arne Juul <arnej@yahooinc.com> | 2022-10-06 14:01:37 +0000 |
---|---|---|
committer | Arne Juul <arnej@yahooinc.com> | 2022-10-06 14:19:34 +0000 |
commit | 91629f81f8425b46e71026b1e733dad2f8ea270c (patch) | |
tree | 9bccfe6fcb47ce668c576ee29da8afa2ebea1037 /linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java | |
parent | 3cbbac35a188b578f1360ede59de6175b5d43665 (diff) |
much simpler CharSequenceNormalizer
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java | 10 |
1 files changed, 1 insertions, 9 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java b/linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java index 305aead056b..0cf4634c6c3 100644 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java @@ -2,10 +2,6 @@ package com.yahoo.language.opennlp; import opennlp.tools.langdetect.LanguageDetectorContextGenerator; -import opennlp.tools.util.normalizer.EmojiCharSequenceNormalizer; -import opennlp.tools.util.normalizer.NumberCharSequenceNormalizer; -import opennlp.tools.util.normalizer.ShrinkCharSequenceNormalizer; -import opennlp.tools.util.normalizer.TwitterCharSequenceNormalizer; /** * Overrides the UrlCharSequenceNormalizer, which has a bad regex, until fixed: https://issues.apache.org/jira/browse/OPENNLP-1350 @@ -18,11 +14,7 @@ public class LanguageDetectorFactory extends opennlp.tools.langdetect.LanguageDe @Override public LanguageDetectorContextGenerator getContextGenerator() { return new DefaultLanguageDetectorContextGenerator(1, 3, - EmojiCharSequenceNormalizer.getInstance(), - UrlCharSequenceNormalizer.getInstance(), - TwitterCharSequenceNormalizer.getInstance(), - NumberCharSequenceNormalizer.getInstance(), - ShrinkCharSequenceNormalizer.getInstance()); + VespaCharSequenceNormalizer.getInstance()); } } |