summaryrefslogtreecommitdiffstats
path: root/linguistics-components/src/main
diff options
context:
space:
mode:
Diffstat (limited to 'linguistics-components/src/main')
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java29
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java29
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/opennlp/UrlCharSequenceNormalizer.java31
-rw-r--r--linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java5
-rw-r--r--linguistics-components/src/main/resources/models/langdetect-183.binbin10568240 -> 0 bytes
5 files changed, 0 insertions, 94 deletions
diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java
deleted file mode 100644
index c9e78259336..00000000000
--- a/linguistics-components/src/main/java/com/yahoo/language/opennlp/LangDetectModel183.java
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.language.opennlp;
-
-import opennlp.tools.langdetect.LanguageDetectorModel;
-
-import java.io.IOException;
-import java.io.UncheckedIOException;
-
-public class LangDetectModel183 implements LangDetectModel {
-
- private final Object monitor = new Object();
- private LanguageDetectorModel loaded;
-
- @Override
- public LanguageDetectorModel load() {
- synchronized (monitor) {
- if (loaded == null) {
- try {
- loaded = new LanguageDetectorModel(LangDetectModel183.class.getResourceAsStream("/models/langdetect-183.bin"));
- }
- catch (IOException e) {
- throw new UncheckedIOException(e);
- }
- }
- }
- return loaded;
- }
-
-}
diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java
deleted file mode 100644
index aa4387bcc45..00000000000
--- a/linguistics-components/src/main/java/com/yahoo/language/opennlp/LanguageDetectorFactory.java
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.language.opennlp;
-
-import opennlp.tools.langdetect.DefaultLanguageDetectorContextGenerator;
-import opennlp.tools.langdetect.LanguageDetectorContextGenerator;
-import opennlp.tools.util.normalizer.EmojiCharSequenceNormalizer;
-import opennlp.tools.util.normalizer.NumberCharSequenceNormalizer;
-import opennlp.tools.util.normalizer.ShrinkCharSequenceNormalizer;
-import opennlp.tools.util.normalizer.TwitterCharSequenceNormalizer;
-
-/**
- * Overrides the UrlCharSequenceNormalizer, which has a bad regex, until fixed: https://issues.apache.org/jira/browse/OPENNLP-1350
- *
- * @author jonmv
- */
-@SuppressWarnings("unused") // Loaded by black magic.
-public class LanguageDetectorFactory extends opennlp.tools.langdetect.LanguageDetectorFactory {
-
- @Override
- public LanguageDetectorContextGenerator getContextGenerator() {
- return new DefaultLanguageDetectorContextGenerator(1, 3,
- EmojiCharSequenceNormalizer.getInstance(),
- UrlCharSequenceNormalizer.getInstance(),
- TwitterCharSequenceNormalizer.getInstance(),
- NumberCharSequenceNormalizer.getInstance(),
- ShrinkCharSequenceNormalizer.getInstance());
- }
-
-}
diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/UrlCharSequenceNormalizer.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/UrlCharSequenceNormalizer.java
deleted file mode 100644
index 883319e2f8b..00000000000
--- a/linguistics-components/src/main/java/com/yahoo/language/opennlp/UrlCharSequenceNormalizer.java
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.language.opennlp;
-
-import opennlp.tools.util.normalizer.CharSequenceNormalizer;
-
-import java.util.regex.Pattern;
-
-/**
- * Modifies {@link opennlp.tools.util.normalizer.UrlCharSequenceNormalizer} to avoid the bad email regex.
- *
- * @author jonmv
- */
-public class UrlCharSequenceNormalizer implements CharSequenceNormalizer {
-
- private static final Pattern URL_REGEX =
- Pattern.compile("https?://[-_.?&~;+=/#0-9A-Za-z]+");
- private static final Pattern MAIL_REGEX =
- Pattern.compile("(?<![-+_.0-9A-Za-z])[-+_.0-9A-Za-z]+@[-0-9A-Za-z]+[-.0-9A-Za-z]+");
-
- private static final UrlCharSequenceNormalizer INSTANCE = new UrlCharSequenceNormalizer();
-
- public static UrlCharSequenceNormalizer getInstance() {
- return INSTANCE;
- }
-
- public CharSequence normalize(CharSequence text) {
- String modified = URL_REGEX.matcher(text).replaceAll(" ");
- return MAIL_REGEX.matcher(modified).replaceAll(" ");
- }
-
-}
diff --git a/linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java b/linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java
deleted file mode 100644
index 9606578b3ac..00000000000
--- a/linguistics-components/src/main/java/com/yahoo/language/opennlp/package-info.java
+++ /dev/null
@@ -1,5 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-@ExportPackage
-package com.yahoo.language.opennlp;
-
-import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/linguistics-components/src/main/resources/models/langdetect-183.bin b/linguistics-components/src/main/resources/models/langdetect-183.bin
deleted file mode 100644
index c3cde217050..00000000000
--- a/linguistics-components/src/main/resources/models/langdetect-183.bin
+++ /dev/null
Binary files differ