diff options
Diffstat (limited to 'linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTypeTestCase.java')
-rw-r--r-- | linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTypeTestCase.java | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTypeTestCase.java b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTypeTestCase.java new file mode 100644 index 00000000000..2d258be7af0 --- /dev/null +++ b/linguistics/src/test/java/com/yahoo/language/simple/SimpleTokenTypeTestCase.java @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.simple; + +import com.yahoo.language.process.TokenType; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +/** + * Check simple token types. + * + * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + */ +public class SimpleTokenTypeTestCase { + + @Test + public final void test() { + assertEquals(TokenType.ALPHABETIC, tokenType('a')); + assertEquals(TokenType.ALPHABETIC, tokenType('\u02c1')); + assertEquals(TokenType.ALPHABETIC, tokenType('\u02c1')); + assertEquals(TokenType.ALPHABETIC, tokenType('\u01c0')); + assertEquals(TokenType.SYMBOL, tokenType('\u20dd')); + assertEquals(TokenType.ALPHABETIC, tokenType('\u0912')); + assertEquals(TokenType.NUMERIC, tokenType('1')); + assertEquals(TokenType.PUNCTUATION, tokenType('.')); + assertEquals(TokenType.PUNCTUATION, tokenType('\u0f3b')); + assertEquals(TokenType.PUNCTUATION, tokenType('\u0f3c')); + assertEquals(TokenType.PUNCTUATION, tokenType('\u203f')); + assertEquals(TokenType.SYMBOL, tokenType('\u2044')); + assertEquals(TokenType.SYMBOL, tokenType('$')); + assertEquals(TokenType.ALPHABETIC, tokenType('\u2132')); + assertEquals(TokenType.ALPHABETIC, tokenType('\uD800', '\uDFC8')); + } + + private static TokenType tokenType(char c) { + return SimpleTokenType.valueOf(c); + } + + private static TokenType tokenType(char high, char low) { + return SimpleTokenType.valueOf(Character.toCodePoint(high, low)); + } + +} |