diff options
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java index e1185cb2457..73518876c3f 100644 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java @@ -4,6 +4,7 @@ package com.yahoo.language.opennlp; import com.yahoo.language.Language; import com.yahoo.language.LinguisticsCase; import com.yahoo.language.process.Normalizer; +import com.yahoo.language.process.SpecialTokenRegistry; import com.yahoo.language.process.StemMode; import com.yahoo.language.process.Token; import com.yahoo.language.process.TokenType; @@ -32,15 +33,21 @@ public class OpenNlpTokenizer implements Tokenizer { private final Normalizer normalizer; private final Transformer transformer; private final SimpleTokenizer simpleTokenizer; + private final SpecialTokenRegistry specialTokenRegistry; public OpenNlpTokenizer() { this(new SimpleNormalizer(), new SimpleTransformer()); } public OpenNlpTokenizer(Normalizer normalizer, Transformer transformer) { + this(normalizer, transformer, new SpecialTokenRegistry(List.of())); + } + + public OpenNlpTokenizer(Normalizer normalizer, Transformer transformer, SpecialTokenRegistry specialTokenRegistry) { this.normalizer = normalizer; this.transformer = transformer; - simpleTokenizer = new SimpleTokenizer(normalizer, transformer); + this.specialTokenRegistry = specialTokenRegistry; + this.simpleTokenizer = new SimpleTokenizer(normalizer, transformer, specialTokenRegistry); } @Override |