aboutsummaryrefslogtreecommitdiffstats
path: root/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
diff options
context:
space:
mode:
authorbjormel <bjormel@yahooinc.com>2023-10-01 12:23:12 +0000
committerbjormel <bjormel@yahooinc.com>2023-10-01 12:23:12 +0000
commite9058b555d4dfea2f6c872d9a677e8678b569569 (patch)
treefa1b67c6e39712c1e0d9f308b0dd55573b43f913 /lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
parent0ad931fa86658904fe9212b014d810236b0e00e4 (diff)
parent16030193ec04ee41e98779a3d7ee6a6c1d0d0d6f (diff)
Merge branch 'master' into bjormel/aws-main-controller
Diffstat (limited to 'lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java')
-rw-r--r--lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java29
1 files changed, 29 insertions, 0 deletions
diff --git a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
index 92c369bc60c..fc29fcc0071 100644
--- a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
+++ b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
@@ -197,4 +197,33 @@ public class LuceneTokenizerTest {
.tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false);
assertEquals(List.of("and", "Cat"), tokenStrings(tokens));
}
+
+ @Test
+ public void compositeConfigKey() {
+ String reversingAnalyzerKey = Language.ENGLISH.languageCode()
+ + "/"
+ + StemMode.ALL;
+ LuceneAnalysisConfig enConfig = new LuceneAnalysisConfig.Builder()
+ .analysis(
+ Map.of(reversingAnalyzerKey,
+ new LuceneAnalysisConfig.Analysis.Builder().tokenFilters(List.of(
+ new LuceneAnalysisConfig
+ .Analysis
+ .TokenFilters
+ .Builder()
+ .name("reverseString"))))
+ ).build();
+ LuceneLinguistics linguistics = new LuceneLinguistics(enConfig, new ComponentRegistry<>());
+ // Matching StemMode
+ Iterable<Token> tokens = linguistics
+ .getTokenizer()
+ .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false);
+ assertEquals(List.of("sgoD", "dna", "staC"), tokenStrings(tokens));
+ // StemMode is different
+ Iterable<Token> stemModeTokens = linguistics
+ .getTokenizer()
+ .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.BEST, false);
+ assertEquals(List.of("dog", "cat"), tokenStrings(stemModeTokens));
+
+ }
}