diff options
Diffstat (limited to 'linguistics-components/src/main')
5 files changed, 0 insertions, 94 deletions
diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java deleted file mode 100644 index c9e78259336..00000000000 --- a/linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.language.opennlp; - -import opennlp.tools.langdetect.LanguageDetectorModel; - -import java.io.IOException; -import java.io.UncheckedIOException; - -public class LangDetectModel183 implements LangDetectModel { - - private final Object monitor = new Object(); - private LanguageDetectorModel loaded; - - @Override - public LanguageDetectorModel load() { - synchronized (monitor) { - if (loaded == null) { - try { - loaded = new LanguageDetectorModel(LangDetectModel183.class.getResourceAsStream("/models/langdetect-183.bin")); - } - catch (IOException e) { - throw new UncheckedIOException(e); - } - } - } - return loaded; - } - -} diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java deleted file mode 100644 index aa4387bcc45..00000000000 --- a/linguistics-components/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.language.opennlp; - -import opennlp.tools.langdetect.DefaultLanguageDetectorContextGenerator; -import opennlp.tools.langdetect.LanguageDetectorContextGenerator; -import opennlp.tools.util.normalizer.EmojiCharSequenceNormalizer; -import opennlp.tools.util.normalizer.NumberCharSequenceNormalizer; -import opennlp.tools.util.normalizer.ShrinkCharSequenceNormalizer; -import opennlp.tools.util.normalizer.TwitterCharSequenceNormalizer; - -/** - * Overrides the UrlCharSequenceNormalizer, which has a bad regex, until fixed: https://issues.apache.org/jira/browse/OPENNLP-1350 - * - * @author jonmv - */ -@SuppressWarnings("unused") // Loaded by black magic. -public class LanguageDetectorFactory extends opennlp.tools.langdetect.LanguageDetectorFactory { - - @Override - public LanguageDetectorContextGenerator getContextGenerator() { - return new DefaultLanguageDetectorContextGenerator(1, 3, - EmojiCharSequenceNormalizer.getInstance(), - UrlCharSequenceNormalizer.getInstance(), - TwitterCharSequenceNormalizer.getInstance(), - NumberCharSequenceNormalizer.getInstance(), - ShrinkCharSequenceNormalizer.getInstance()); - } - -} diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/UrlCharSequenceNormalizer.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/UrlCharSequenceNormalizer.java deleted file mode 100644 index 883319e2f8b..00000000000 --- a/linguistics-components/src/main/java/com/yahoo/language/opennlp/UrlCharSequenceNormalizer.java +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.language.opennlp; - -import opennlp.tools.util.normalizer.CharSequenceNormalizer; - -import java.util.regex.Pattern; - -/** - * Modifies {@link opennlp.tools.util.normalizer.UrlCharSequenceNormalizer} to avoid the bad email regex. - * - * @author jonmv - */ -public class UrlCharSequenceNormalizer implements CharSequenceNormalizer { - - private static final Pattern URL_REGEX = - Pattern.compile("https?://[-_.?&~;+=/#0-9A-Za-z]+"); - private static final Pattern MAIL_REGEX = - Pattern.compile("(?<![-+_.0-9A-Za-z])[-+_.0-9A-Za-z]+@[-0-9A-Za-z]+[-.0-9A-Za-z]+"); - - private static final UrlCharSequenceNormalizer INSTANCE = new UrlCharSequenceNormalizer(); - - public static UrlCharSequenceNormalizer getInstance() { - return INSTANCE; - } - - public CharSequence normalize(CharSequence text) { - String modified = URL_REGEX.matcher(text).replaceAll(" "); - return MAIL_REGEX.matcher(modified).replaceAll(" "); - } - -} diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java deleted file mode 100644 index 9606578b3ac..00000000000 --- a/linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java +++ /dev/null @@ -1,5 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -@ExportPackage -package com.yahoo.language.opennlp; - -import com.yahoo.osgi.annotation.ExportPackage; diff --git a/linguistics-components/src/main/resources/models/langdetect-183.bin b/linguistics-components/src/main/resources/models/langdetect-183.bin Binary files differdeleted file mode 100644 index c3cde217050..00000000000 --- a/linguistics-components/src/main/resources/models/langdetect-183.bin +++ /dev/null |