diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java |
Publish
Diffstat (limited to 'linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java')
-rw-r--r-- | linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java new file mode 100644 index 00000000000..8760da56415 --- /dev/null +++ b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenizerTestCase.java @@ -0,0 +1,36 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.simple; + +import com.yahoo.language.process.AbstractTokenizerTestCase; +import com.yahoo.language.process.StemMode; +import org.junit.Test; + +/** + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author bratseth + */ +public class SimpleTokenizerTestCase extends AbstractTokenizerTestCase { + + @Test + public void testTokenizingNoStemming() { + TokenizerTester tester = new TokenizerTester().setStemMode(StemMode.NONE); + tester.assertTokens("a\u030a tralalala n4lle. \uD800\uDFC8 (old Persian sign Auramazda, sorry if " + + "anyone 1s offended by ancien7 gods.Running)", + "\u00E5", " ", "tralalala"," ","n4lle", ".", " ","\uD800\uDFC8", " ", "(", + "old", " ", "persian", " ", "sign", " ", "auramazda", ",", " ", "sorry", " ", + "if", " ", "anyone", " ", "1s", " ", "offended", " ", "by", " ", "ancien7", + " ", "gods", ".", "running", ")"); + } + + @Test + public void testTokenizingStemming() { + TokenizerTester tester = new TokenizerTester().setStemMode(StemMode.ALL); + tester.assertTokens("a\u030a tralalala n4lle. \uD800\uDFC8 (old Persian sign Auramazda, sorry if " + + "anyone 1s offended by ancien7 gods.Running)", + "\u00E5", " ", "tralalala"," ","n4lle", ".", " ","\uD800\uDFC8", " ", "(", + "old", " ", "persian", " ", "sign", " ", "auramazda", ",", " ", "sorry", " ", + "if", " ", "anyone", " ", "1s", " ", "offend", " ", "by", " ", "ancien7", + " ", "gods", ".", "running", ")"); + } + +} |