diff options
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java index 02232b61e89..4888fd8676f 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleTokenizer.java @@ -44,7 +44,7 @@ public class SimpleTokenizer implements Tokenizer { public Iterable<Token> tokenize(String input, Language language, StemMode stemMode, boolean removeAccents) { if (input.isEmpty()) return Collections.emptyList(); - opennlp.tools.stemmer.Stemmer stemmer = getStemmerForLanguage(language, stemMode); + opennlp.tools.stemmer.Stemmer stemmer = getStemmerForLanguage(language); List<Token> tokens = new ArrayList<>(); int nextCode = input.codePointAt(0); @@ -76,7 +76,11 @@ public class SimpleTokenizer implements Tokenizer { return token; } - private static Stemmer getStemmerForLanguage(Language language, StemMode stemMode) { + private static Stemmer getStemmerForLanguage(Language language) { + Stemmer stemmer = charSequence -> charSequence == null ? null : new KStemmer().stem(charSequence.toString()); + if (language == null) { + return stemmer; + } SnowballStemmer.ALGORITHM alg; switch (language) { case DANISH: @@ -126,7 +130,7 @@ public class SimpleTokenizer implements Tokenizer { alg = SnowballStemmer.ALGORITHM.TURKISH; break; default: - return charSequence -> charSequence == null ? null : new KStemmer().stem(charSequence.toString()); + return stemmer; } return new SnowballStemmer(alg); } |