diff options
author | Jon Bratseth <bratseth@oath.com> | 2021-05-04 22:54:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-04 22:54:45 +0200 |
commit | 8e1475c69c80e937cfa3eb47372c8008786196af (patch) | |
tree | c3f1c1f77083975756e2b146e0860c1b9d2656d8 /linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java | |
parent | 90bfeda99d269486682fd5a82f73d99ced6aedd1 (diff) | |
parent | 89764523bef6b240fcbc926d21e6c92da57375aa (diff) |
Merge pull request #17736 from vespa-engine/bratseth/special-tokens
Bratseth/special tokens
Diffstat (limited to 'linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java')
-rw-r--r-- | linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java index e1a04b2985d..4ffe2a866d8 100644 --- a/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java +++ b/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java @@ -11,10 +11,14 @@ import com.yahoo.language.process.GramSplitter; import com.yahoo.language.process.Normalizer; import com.yahoo.language.process.Segmenter; import com.yahoo.language.process.SegmenterImpl; +import com.yahoo.language.process.SpecialTokenRegistry; import com.yahoo.language.process.Stemmer; import com.yahoo.language.process.StemmerImpl; import com.yahoo.language.process.Tokenizer; import com.yahoo.language.process.Transformer; +import com.yahoo.vespa.configdefinition.SpecialtokensConfig; + +import java.util.List; /** * Factory of simple linguistic processor implementations. @@ -31,6 +35,7 @@ public class SimpleLinguistics implements Linguistics { private final Detector detector; private final CharacterClasses characterClasses; private final GramSplitter gramSplitter; + private final SpecialTokenRegistry specialTokenRegistry = new SpecialTokenRegistry(List.of()); @Inject public SimpleLinguistics() { @@ -45,7 +50,7 @@ public class SimpleLinguistics implements Linguistics { public Stemmer getStemmer() { return new StemmerImpl(getTokenizer()); } @Override - public Tokenizer getTokenizer() { return new SimpleTokenizer(normalizer, transformer); } + public Tokenizer getTokenizer() { return new SimpleTokenizer(normalizer, transformer, specialTokenRegistry); } @Override public Normalizer getNormalizer() { return normalizer; } |