summaryrefslogtreecommitdiffstats
path: root/lucene-linguistics
diff options
context:
space:
mode:
authorDainius Jocas <dainius.jocas@gmail.com>2023-09-14 22:10:58 +0300
committerDainius Jocas <dainius.jocas@gmail.com>2023-09-14 22:10:58 +0300
commitf089bfed28febfa82bfe0ad06e269de317d7a4e0 (patch)
treeec7350ede88959265d032529466e46a6274a7bca /lucene-linguistics
parent8beac01933b5121187d1cf6dd97cef0b34d1afd2 (diff)
test: load stopwords file from the classpath
Diffstat (limited to 'lucene-linguistics')
-rw-r--r--lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java26
-rw-r--r--lucene-linguistics/src/test/resources/classpath-stopwords.txt1
2 files changed, 27 insertions, 0 deletions
diff --git a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
index 35373479bff..20a7351d0a0 100644
--- a/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
+++ b/lucene-linguistics/src/test/java/com/yahoo/language/lucene/LuceneTokenizerTest.java
@@ -167,4 +167,30 @@ public class LuceneTokenizerTest {
.tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false);
assertEquals(List.of("Dog", "and", "Cat"), tokenStrings(tokens));
}
+
+ @Test
+ public void testOptionalPathWithClasspathResources() {
+ String languageCode = Language.ENGLISH.languageCode();
+ LuceneAnalysisConfig enConfig = new LuceneAnalysisConfig.Builder()
+ .analysis(
+ Map.of(languageCode,
+ new LuceneAnalysisConfig.Analysis.Builder().tokenFilters(List.of(
+ new LuceneAnalysisConfig
+ .Analysis
+ .TokenFilters
+ .Builder()
+ .name("englishMinimalStem"),
+ new LuceneAnalysisConfig
+ .Analysis
+ .TokenFilters
+ .Builder()
+ .name("stop")
+ .conf("words", "classpath-stopwords.txt"))))
+ ).build();
+ LuceneLinguistics linguistics = new LuceneLinguistics(enConfig, new ComponentRegistry<>());
+ Iterable<Token> tokens = linguistics
+ .getTokenizer()
+ .tokenize("Dogs and Cats", Language.ENGLISH, StemMode.ALL, false);
+ assertEquals(List.of("and", "Cat"), tokenStrings(tokens));
+ }
}
diff --git a/lucene-linguistics/src/test/resources/classpath-stopwords.txt b/lucene-linguistics/src/test/resources/classpath-stopwords.txt
new file mode 100644
index 00000000000..4dda64888cb
--- /dev/null
+++ b/lucene-linguistics/src/test/resources/classpath-stopwords.txt
@@ -0,0 +1 @@
+Dog