diff options
author | Marius Arhaug <mariusarhaug@hotmail.com> | 2024-04-09 16:33:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-09 16:33:04 +0200 |
commit | 07010100192978eea266f7cb15b315b57a95438e (patch) | |
tree | a232aba9475b273058179872a7ca251b42e39d5c /linguistics/src/test/java/com/yahoo | |
parent | e1f023d0e14c3351948beed1ee0af6e466581251 (diff) | |
parent | 07eedf3b30af36fc05da6c98778ecda23bd0d304 (diff) |
Merge pull request #30816 from vespa-engine/marius/add-significance-model-registry
Add significance model registry to linguistics
Diffstat (limited to 'linguistics/src/test/java/com/yahoo')
2 files changed, 79 insertions, 0 deletions
diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java new file mode 100644 index 00000000000..d1de63a994d --- /dev/null +++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java @@ -0,0 +1,48 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.significance; + +import com.yahoo.language.Language; +import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry; +import org.junit.Test; + +import java.nio.file.Path; +import java.util.HashMap; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + + +/** + * @author MariusArhaug + */ +public class DefaultSignificanceModelRegistryTest { + + @Test + public void testDefaultSignificanceModelRegistry() { + HashMap<Language, Path> models = new HashMap<>(); + + models.put(Language.ENGLISH, Path.of("src/test/models/en.json")); + models.put(Language.NORWEGIAN_BOKMAL, Path.of("src/test/models/no.json")); + + DefaultSignificanceModelRegistry defaultSignificanceModelRegistry = new DefaultSignificanceModelRegistry(models); + + var englishModel = defaultSignificanceModelRegistry.getModel(Language.ENGLISH); + var norwegianModel = defaultSignificanceModelRegistry.getModel(Language.NORWEGIAN_BOKMAL); + + assertThrows(IllegalArgumentException.class, () -> defaultSignificanceModelRegistry.getModel(Language.FRENCH)); + + assertNotNull(englishModel); + assertNotNull(norwegianModel); + + assertEquals(2, englishModel.documentFrequency("test").frequency()); + assertEquals(10, englishModel.documentFrequency("test").corpusSize()); + + assertEquals(3, norwegianModel.documentFrequency("nei").frequency()); + assertEquals(20, norwegianModel.documentFrequency("nei").corpusSize()); + + assertEquals(1, norwegianModel.documentFrequency("non-existent-word").frequency()); + assertEquals(20, norwegianModel.documentFrequency("non-existent-word").corpusSize()); + + } +} diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java new file mode 100644 index 00000000000..137f8d4513a --- /dev/null +++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelTest.java @@ -0,0 +1,31 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.significance; + +import com.yahoo.language.significance.impl.DefaultSignificanceModel; +import org.junit.jupiter.api.Test; + +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.assertEquals; + + +/** + * @author MariusArhaug + + */ +public class DefaultSignificanceModelTest { + + @Test + public void testDocumentFrequency() { + DefaultSignificanceModel significanceModel = new DefaultSignificanceModel(Path.of("src/test/models/en.json")); + + assertEquals(2, significanceModel.documentFrequency("test").frequency()); + assertEquals(10, significanceModel.documentFrequency("test").corpusSize()); + + assertEquals(3, significanceModel.documentFrequency("hello").frequency()); + assertEquals(10, significanceModel.documentFrequency("hello").corpusSize()); + + assertEquals(1, significanceModel.documentFrequency("non-existent-word").frequency()); + assertEquals(10, significanceModel.documentFrequency("hello").corpusSize()); + } +} |