diff options
Diffstat (limited to 'linguistics/src/test/java/com/yahoo')
-rw-r--r-- | linguistics/src/test/java/com/yahoo/language/opennlp/OptimaizeDetectorTestCase.java | 35 | ||||
-rw-r--r-- | linguistics/src/test/java/com/yahoo/language/simple/SimpleDetectorTestCase.java | 7 |
2 files changed, 36 insertions, 6 deletions
diff --git a/linguistics/src/test/java/com/yahoo/language/opennlp/OptimaizeDetectorTestCase.java b/linguistics/src/test/java/com/yahoo/language/opennlp/OptimaizeDetectorTestCase.java new file mode 100644 index 00000000000..ef3248ee0bb --- /dev/null +++ b/linguistics/src/test/java/com/yahoo/language/opennlp/OptimaizeDetectorTestCase.java @@ -0,0 +1,35 @@ +package com.yahoo.language.opennlp; + +import com.yahoo.language.Language; +import com.yahoo.language.detect.Detector; +import com.yahoo.language.simple.SimpleDetector; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +/** + * @author bratseth + */ +public class OptimaizeDetectorTestCase { + + private static final Detector detector = new OptimaizeDetector(); + + @Test + public void testDetection() { + assertLanguage(Language.UNKNOWN, "Hello!"); + + // Test fallback to SimpleDetector + assertLanguage(Language.CHINESE_TRADITIONAL, // CHINESE_SIMPLIFIED input + "\u6211\u80FD\u541E\u4E0B\u73BB\u7483\u800C\u4E0D\u4F24\u8EAB\u4F53\u3002"); + + // from https://ru.wikipedia.org/wiki/%D0%A0%D0%BE%D1%81%D1%81%D0%B8%D1%8F + assertLanguage(Language.RUSSIAN, "Материал из Википедии — свободной энциклопедии"); + // https://he.wikipedia.org/wiki/Yahoo! + assertLanguage(Language.HEBREW, "אתר יאהו! הוא אחד מאתרי האינטרנט הפופולריים ביותר בעולם, עם מעל 500 מיליון כניסות בכל יום"); + } + + private static void assertLanguage(Language language, String input) { + assertEquals(language, detector.detect(input, null).getLanguage()); + } + +} diff --git a/linguistics/src/test/java/com/yahoo/language/simple/SimpleDetectorTestCase.java b/linguistics/src/test/java/com/yahoo/language/simple/SimpleDetectorTestCase.java index 1905c6d98a9..0f5fbceccf2 100644 --- a/linguistics/src/test/java/com/yahoo/language/simple/SimpleDetectorTestCase.java +++ b/linguistics/src/test/java/com/yahoo/language/simple/SimpleDetectorTestCase.java @@ -16,7 +16,7 @@ import static org.junit.Assert.assertEquals; public class SimpleDetectorTestCase { @Test - public void requireThatLanguageCanDetected() { + public void testDetection() { assertLanguage(Language.UNKNOWN, "Hello!"); // "Chinese language" @@ -50,11 +50,6 @@ public class SimpleDetectorTestCase { // a string from http://www.columbia.edu/kermit/utf8.html that says "I can eat glass (and it doesn't hurt me)". assertLanguage(Language.KOREAN, "\ub098\ub294 \uc720\ub9ac\ub97c \uba39\uc744 \uc218 \uc788\uc5b4\uc694. " + "\uadf8\ub798\ub3c4 \uc544\ud504\uc9c0 \uc54a\uc544\uc694"); - - // from https://ru.wikipedia.org/wiki/%D0%A0%D0%BE%D1%81%D1%81%D0%B8%D1%8F - assertLanguage(Language.RUSSIAN, "Материал из Википедии — свободной энциклопедии"); - // https://he.wikipedia.org/wiki/Yahoo! - assertLanguage(Language.HEBREW, "אתר יאהו! הוא אחד מאתרי האינטרנט הפופולריים ביותר בעולם, עם מעל 500 מיליון כניסות בכל יום"); } @Test |