aboutsummaryrefslogtreecommitdiffstats
path: root/linguistics/src/test/java/com/yahoo
diff options
context:
space:
mode:
authorMarius Arhaug <mariusarhaug@hotmail.com>2024-04-09 16:33:04 +0200
committerGitHub <noreply@github.com>2024-04-09 16:33:04 +0200
commit07010100192978eea266f7cb15b315b57a95438e (patch)
treea232aba9475b273058179872a7ca251b42e39d5c /linguistics/src/test/java/com/yahoo
parente1f023d0e14c3351948beed1ee0af6e466581251 (diff)
parent07eedf3b30af36fc05da6c98778ecda23bd0d304 (diff)
Merge pull request #30816 from vespa-engine/marius/add-significance-model-registry
Add significance model registry to linguistics
Diffstat (limited to 'linguistics/src/test/java/com/yahoo')
-rw-r--r--linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java48
-rw-r--r--linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java31
2 files changed, 79 insertions, 0 deletions
diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java
new file mode 100644
index 00000000000..d1de63a994d
--- /dev/null
+++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java
@@ -0,0 +1,48 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.language.significance;
+
+import com.yahoo.language.Language;
+import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry;
+import org.junit.Test;
+
+import java.nio.file.Path;
+import java.util.HashMap;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+
+/**
+ * @author MariusArhaug
+ */
+public class DefaultSignificanceModelRegistryTest {
+
+ @Test
+ public void testDefaultSignificanceModelRegistry() {
+ HashMap<Language, Path> models = new HashMap<>();
+
+ models.put(Language.ENGLISH, Path.of("src/test/models/en.json"));
+ models.put(Language.NORWEGIAN_BOKMAL, Path.of("src/test/models/no.json"));
+
+ DefaultSignificanceModelRegistry defaultSignificanceModelRegistry = new DefaultSignificanceModelRegistry(models);
+
+ var englishModel = defaultSignificanceModelRegistry.getModel(Language.ENGLISH);
+ var norwegianModel = defaultSignificanceModelRegistry.getModel(Language.NORWEGIAN_BOKMAL);
+
+ assertThrows(IllegalArgumentException.class, () -> defaultSignificanceModelRegistry.getModel(Language.FRENCH));
+
+ assertNotNull(englishModel);
+ assertNotNull(norwegianModel);
+
+ assertEquals(2, englishModel.documentFrequency("test").frequency());
+ assertEquals(10, englishModel.documentFrequency("test").corpusSize());
+
+ assertEquals(3, norwegianModel.documentFrequency("nei").frequency());
+ assertEquals(20, norwegianModel.documentFrequency("nei").corpusSize());
+
+ assertEquals(1, norwegianModel.documentFrequency("non-existent-word").frequency());
+ assertEquals(20, norwegianModel.documentFrequency("non-existent-word").corpusSize());
+
+ }
+}
diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java
new file mode 100644
index 00000000000..137f8d4513a
--- /dev/null
+++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java
@@ -0,0 +1,31 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.language.significance;
+
+import com.yahoo.language.significance.impl.DefaultSignificanceModel;
+import org.junit.jupiter.api.Test;
+
+import java.nio.file.Path;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+
+/**
+ * @author MariusArhaug
+
+ */
+public class DefaultSignificanceModelTest {
+
+ @Test
+ public void testDocumentFrequency() {
+ DefaultSignificanceModel significanceModel = new DefaultSignificanceModel(Path.of("src/test/models/en.json"));
+
+ assertEquals(2, significanceModel.documentFrequency("test").frequency());
+ assertEquals(10, significanceModel.documentFrequency("test").corpusSize());
+
+ assertEquals(3, significanceModel.documentFrequency("hello").frequency());
+ assertEquals(10, significanceModel.documentFrequency("hello").corpusSize());
+
+ assertEquals(1, significanceModel.documentFrequency("non-existent-word").frequency());
+ assertEquals(10, significanceModel.documentFrequency("hello").corpusSize());
+ }
+}