summaryrefslogtreecommitdiffstats
path: root/linguistics/src/main/java/com/yahoo/language/opennlp
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-05-05 12:12:25 +0200
committerJon Bratseth <bratseth@gmail.com>2021-05-05 12:12:25 +0200
commit4b3e615954fa5cbaf761a55cddba786f51ab26f3 (patch)
tree31cc973d490bdc7b8257c664b31018ac8915efc4 /linguistics/src/main/java/com/yahoo/language/opennlp
parentf8c8b6ab958f930b318eadc92b760dd26f414e58 (diff)
Revert "Merge pull request #17746 from vespa-engine/revert-17738-revert-17737-revert-17736-bratseth/special-tokens"
This reverts commit 491856b396d003885e159345fe3f533f0fa35933, reversing changes made to 3720186303f4aef1d185525eaf61092097a64ec9.
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/opennlp')
-rw-r--r--linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java9
1 files changed, 8 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
index e1185cb2457..73518876c3f 100644
--- a/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
+++ b/linguistics/src/main/java/com/yahoo/language/opennlp/OpenNlpTokenizer.java
@@ -4,6 +4,7 @@ package com.yahoo.language.opennlp;
import com.yahoo.language.Language;
import com.yahoo.language.LinguisticsCase;
import com.yahoo.language.process.Normalizer;
+import com.yahoo.language.process.SpecialTokenRegistry;
import com.yahoo.language.process.StemMode;
import com.yahoo.language.process.Token;
import com.yahoo.language.process.TokenType;
@@ -32,15 +33,21 @@ public class OpenNlpTokenizer implements Tokenizer {
private final Normalizer normalizer;
private final Transformer transformer;
private final SimpleTokenizer simpleTokenizer;
+ private final SpecialTokenRegistry specialTokenRegistry;
public OpenNlpTokenizer() {
this(new SimpleNormalizer(), new SimpleTransformer());
}
public OpenNlpTokenizer(Normalizer normalizer, Transformer transformer) {
+ this(normalizer, transformer, new SpecialTokenRegistry(List.of()));
+ }
+
+ public OpenNlpTokenizer(Normalizer normalizer, Transformer transformer, SpecialTokenRegistry specialTokenRegistry) {
this.normalizer = normalizer;
this.transformer = transformer;
- simpleTokenizer = new SimpleTokenizer(normalizer, transformer);
+ this.specialTokenRegistry = specialTokenRegistry;
+ this.simpleTokenizer = new SimpleTokenizer(normalizer, transformer, specialTokenRegistry);
}
@Override