diff options
author | bjormel <bjormel@yahooinc.com> | 2023-10-01 12:23:12 +0000 |
---|---|---|
committer | bjormel <bjormel@yahooinc.com> | 2023-10-01 12:23:12 +0000 |
commit | e9058b555d4dfea2f6c872d9a677e8678b569569 (patch) | |
tree | fa1b67c6e39712c1e0d9f308b0dd55573b43f913 /lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java | |
parent | 0ad931fa86658904fe9212b014d810236b0e00e4 (diff) | |
parent | 16030193ec04ee41e98779a3d7ee6a6c1d0d0d6f (diff) |
Merge branch 'master' into bjormel/aws-main-controller
Diffstat (limited to 'lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java')
-rw-r--r-- | lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java index 92c369bc60c..fc29fcc0071 100644 --- a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java +++ b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java @@ -197,4 +197,33 @@ public class LuceneTokenizerTest { .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false); assertEquals(List.of("and", "Cat"), tokenStrings(tokens)); } + + @Test + public void compositeConfigKey() { + String reversingAnalyzerKey = Language.ENGLISH.languageCode() + + "/" + + StemMode.ALL; + LuceneAnalysisConfig enConfig = new LuceneAnalysisConfig.Builder() + .analysis( + Map.of(reversingAnalyzerKey, + new LuceneAnalysisConfig.Analysis.Builder().tokenFilters(List.of( + new LuceneAnalysisConfig + .Analysis + .TokenFilters + .Builder() + .name("reverseString")))) + ).build(); + LuceneLinguistics linguistics = new LuceneLinguistics(enConfig, new ComponentRegistry<>()); + // Matching StemMode + Iterable<Token> tokens = linguistics + .getTokenizer() + .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false); + assertEquals(List.of("sgoD", "dna", "staC"), tokenStrings(tokens)); + // StemMode is different + Iterable<Token> stemModeTokens = linguistics + .getTokenizer() + .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.BEST, false); + assertEquals(List.of("dog", "cat"), tokenStrings(stemModeTokens)); + + } } |