diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-05-05 12:12:25 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-05-05 12:12:25 +0200 |
commit | 4b3e615954fa5cbaf761a55cddba786f51ab26f3 (patch) | |
tree | 31cc973d490bdc7b8257c664b31018ac8915efc4 /linguistics/src/main/java/com/yahoo/language/opennlp | |
parent | f8c8b6ab958f930b318eadc92b760dd26f414e58 (diff) |
Revert "Merge pull request #17746 from vespa-engine/revert-17738-revert-17737-revert-17736-bratseth/special-tokens"
This reverts commit 491856b396d003885e159345fe3f533f0fa35933, reversing
changes made to 3720186303f4aef1d185525eaf61092097a64ec9.
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/opennlp')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java | 9 |
1 files changed, 8 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java index e1185cb2457..73518876c3f 100644 --- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java +++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java @@ -4,6 +4,7 @@ package com.yahoo.language.opennlp; import com.yahoo.language.Language; import com.yahoo.language.LinguisticsCase; import com.yahoo.language.process.Normalizer; +import com.yahoo.language.process.SpecialTokenRegistry; import com.yahoo.language.process.StemMode; import com.yahoo.language.process.Token; import com.yahoo.language.process.TokenType; @@ -32,15 +33,21 @@ public class OpenNlpTokenizer implements Tokenizer { private final Normalizer normalizer; private final Transformer transformer; private final SimpleTokenizer simpleTokenizer; + private final SpecialTokenRegistry specialTokenRegistry; public OpenNlpTokenizer() { this(new SimpleNormalizer(), new SimpleTransformer()); } public OpenNlpTokenizer(Normalizer normalizer, Transformer transformer) { + this(normalizer, transformer, new SpecialTokenRegistry(List.of())); + } + + public OpenNlpTokenizer(Normalizer normalizer, Transformer transformer, SpecialTokenRegistry specialTokenRegistry) { this.normalizer = normalizer; this.transformer = transformer; - simpleTokenizer = new SimpleTokenizer(normalizer, transformer); + this.specialTokenRegistry = specialTokenRegistry; + this.simpleTokenizer = new SimpleTokenizer(normalizer, transformer, specialTokenRegistry); } @Override |