aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java
Publish
Diffstat (limited to 'linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java')
-rw-r--r--linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java36
1 files changed, 36 insertions, 0 deletions
diff --git a/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java
new file mode 100644
index 00000000000..8760da56415
--- /dev/null
+++ b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java
@@ -0,0 +1,36 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.language.simple;
+
+import com.yahoo.language.process.AbstractTokenizerTestCase;
+import com.yahoo.language.process.StemMode;
+import org.junit.Test;
+
+/**
+ * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a>
+ * @author bratseth
+ */
+public class SimpleTokenizerTestCase extends AbstractTokenizerTestCase {
+
+ @Test
+ public void testTokenizingNoStemming() {
+ TokenizerTester tester = new TokenizerTester().setStemMode(StemMode.NONE);
+ tester.assertTokens("a\u030a tralalala n4lle. \uD800\uDFC8 (old Persian sign Auramazda, sorry if " +
+ "anyone 1s offended by ancien7 gods.Running)",
+ "\u00E5", " ", "tralalala"," ","n4lle", ".", " ","\uD800\uDFC8", " ", "(",
+ "old", " ", "persian", " ", "sign", " ", "auramazda", ",", " ", "sorry", " ",
+ "if", " ", "anyone", " ", "1s", " ", "offended", " ", "by", " ", "ancien7",
+ " ", "gods", ".", "running", ")");
+ }
+
+ @Test
+ public void testTokenizingStemming() {
+ TokenizerTester tester = new TokenizerTester().setStemMode(StemMode.ALL);
+ tester.assertTokens("a\u030a tralalala n4lle. \uD800\uDFC8 (old Persian sign Auramazda, sorry if " +
+ "anyone 1s offended by ancien7 gods.Running)",
+ "\u00E5", " ", "tralalala"," ","n4lle", ".", " ","\uD800\uDFC8", " ", "(",
+ "old", " ", "persian", " ", "sign", " ", "auramazda", ",", " ", "sorry", " ",
+ "if", " ", "anyone", " ", "1s", " ", "offend", " ", "by", " ", "ancien7",
+ " ", "gods", ".", "running", ")");
+ }
+
+}