diff options
author | Dainius Jocas <dainius.jocas@gmail.com> | 2023-09-27 12:12:35 +0300 |
---|---|---|
committer | Dainius Jocas <dainius.jocas@gmail.com> | 2023-09-27 12:13:54 +0300 |
commit | fda0d74dc1c5e833f01d96197bd1dac40ced7ad7 (patch) | |
tree | 58c51d9589292d1f0a6dc30fc2b050f72c5e6216 /lucene-linguistics/src/test | |
parent | 65c585ffcc50626b171b65eb6b2a0027c8798eff (diff) |
LuceneLinguistics: optionaly consider StemMode as analysis key
Diffstat (limited to 'lucene-linguistics/src/test')
-rw-r--r-- | lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java index 92c369bc60c..fc29fcc0071 100644 --- a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java +++ b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java @@ -197,4 +197,33 @@ public class LuceneTokenizerTest { .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false); assertEquals(List.of("and", "Cat"), tokenStrings(tokens)); } + + @Test + public void compositeConfigKey() { + String reversingAnalyzerKey = Language.ENGLISH.languageCode() + + "/" + + StemMode.ALL; + LuceneAnalysisConfig enConfig = new LuceneAnalysisConfig.Builder() + .analysis( + Map.of(reversingAnalyzerKey, + new LuceneAnalysisConfig.Analysis.Builder().tokenFilters(List.of( + new LuceneAnalysisConfig + .Analysis + .TokenFilters + .Builder() + .name("reverseString")))) + ).build(); + LuceneLinguistics linguistics = new LuceneLinguistics(enConfig, new ComponentRegistry<>()); + // Matching StemMode + Iterable<Token> tokens = linguistics + .getTokenizer() + .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false); + assertEquals(List.of("sgoD", "dna", "staC"), tokenStrings(tokens)); + // StemMode is different + Iterable<Token> stemModeTokens = linguistics + .getTokenizer() + .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.BEST, false); + assertEquals(List.of("dog", "cat"), tokenStrings(stemModeTokens)); + + } } |