blob: fdca5355008cdae15fbf51c0b7b4dd36c2a24600 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.language.opennlp;
import opennlp.tools.langdetect.LanguageDetectorContextGenerator;
import opennlp.tools.util.normalizer.EmojiCharSequenceNormalizer;
import opennlp.tools.util.normalizer.NumberCharSequenceNormalizer;
import opennlp.tools.util.normalizer.ShrinkCharSequenceNormalizer;
import opennlp.tools.util.normalizer.TwitterCharSequenceNormalizer;
/**
* Overrides the UrlCharSequenceNormalizer, which has a bad regex, until fixed: https://issues.apache.org/jira/browse/OPENNLP-1350
*
* @author jonmv
*/
@SuppressWarnings("unused") // Loaded by black magic.
public class LanguageDetectorFactory extends opennlp.tools.langdetect.LanguageDetectorFactory {
@Override
public LanguageDetectorContextGenerator getContextGenerator() {
return new DefaultLanguageDetectorContextGenerator(1, 3,
EmojiCharSequenceNormalizer.getInstance(),
UrlCharSequenceNormalizer.getInstance(),
TwitterCharSequenceNormalizer.getInstance(),
NumberCharSequenceNormalizer.getInstance(),
ShrinkCharSequenceNormalizer.getInstance());
}
}
|