From 9125b714f995bcf8d734cf155a7f17d60d43fdca Mon Sep 17 00:00:00 2001 From: MariusArhaug Date: Wed, 10 Apr 2024 14:33:36 +0200 Subject: add tests --- .../com/yahoo/search/significance/model/en.json | 14 +++ .../test/SignificanceSearcherTest.java | 113 +++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 container-search/src/test/java/com/yahoo/search/significance/model/en.json create mode 100644 container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java (limited to 'container-search/src/test/java/com/yahoo') diff --git a/container-search/src/test/java/com/yahoo/search/significance/model/en.json b/container-search/src/test/java/com/yahoo/search/significance/model/en.json new file mode 100644 index 00000000000..50bae5e3451 --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/significance/model/en.json @@ -0,0 +1,14 @@ +{ + "version" : "1.0", + "id" : "test::1", + "description" : "desc", + "corpus-size" : 10, + "language" : "en", + "word-count" : 4, + "frequencies" : { + "usa" : 2, + "hello": 3, + "world": 5, + "test": 2 + } +} diff --git a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java new file mode 100644 index 00000000000..389236af31b --- /dev/null +++ b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java @@ -0,0 +1,113 @@ +package com.yahoo.search.significance.test; + +import com.yahoo.component.chain.Chain; +import com.yahoo.language.Language; +import com.yahoo.language.significance.SignificanceModel; +import com.yahoo.language.significance.SignificanceModelRegistry; +import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry; +import com.yahoo.prelude.query.AndItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.significance.SignificanceSearcher; +import org.junit.jupiter.api.Test; + +import java.nio.file.Path; +import java.util.HashMap; + +import static com.yahoo.test.JunitCompat.assertEquals; + +public class SignificanceSearcherTest { + SignificanceModelRegistry significanceModelRegistry; + SignificanceSearcher searcher; + + public SignificanceSearcherTest() { + HashMap map = new HashMap<>(); + map.put(Language.ENGLISH, Path.of("src/test/java/com/yahoo/search/significance/model/en.json")); + + significanceModelRegistry = new DefaultSignificanceModelRegistry(map); + // TODO change to mock + searcher = new SignificanceSearcher(significanceModelRegistry); + } + + private Execution createExecution() { + return new Execution(new Chain<>(searcher), Execution.Context.createContextStub()); + } + + @Test + void testSimpleSignificanceValue() { + + Query q = new Query(); + AndItem root = new AndItem(); + WordItem tmp; + tmp = new WordItem("Hello", true); + root.addItem(tmp); + tmp = new WordItem("world", true); + root.addItem(tmp); + + q.getModel().getQueryTree().setRoot(root); + + SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH); + var helloFrequency = model.documentFrequency("Hello"); + var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency()); + + var worldFrequency = model.documentFrequency("world"); + var worldSignificanceValue = SignificanceSearcher.calculateIDF(worldFrequency.corpusSize(), worldFrequency.frequency()); + + Result r = createExecution().search(q); + + root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); + WordItem w0 = (WordItem) root.getItem(0); + WordItem w1 = (WordItem) root.getItem(1); + + assertEquals(helloSignificanceValue, w0.getSignificance()); + assertEquals(worldSignificanceValue, w1.getSignificance()); + + } + + @Test + void testRecursiveSignificanceValues() { + Query q = new Query(); + AndItem root = new AndItem(); + WordItem child1 = new WordItem("hello", true); + + AndItem child2 = new AndItem(); + WordItem child2_1 = new WordItem("test", true); + + AndItem child3 = new AndItem(); + AndItem child3_1 = new AndItem(); + WordItem child3_1_1 = new WordItem("usa", true); + + root.addItem(child1); + root.addItem(child2); + root.addItem(child3); + + child2.addItem(child2_1); + child3.addItem(child3_1); + child3_1.addItem(child3_1_1); + + q.getModel().getQueryTree().setRoot(root); + + SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH); + var helloFrequency = model.documentFrequency("hello"); + var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency()); + + var testFrequency = model.documentFrequency("test"); + var testSignificanceValue = SignificanceSearcher.calculateIDF(testFrequency.corpusSize(), testFrequency.frequency()); + + + + Result r = createExecution().search(q); + + root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); + WordItem w0 = (WordItem) root.getItem(0); + WordItem w1 = (WordItem) ((AndItem) root.getItem(1)).getItem(0); + WordItem w3 = (WordItem) ((AndItem) ((AndItem) root.getItem(2)).getItem(0)).getItem(0); + + assertEquals(helloSignificanceValue, w0.getSignificance()); + assertEquals(testSignificanceValue, w1.getSignificance()); + assertEquals(SignificanceSearcher.calculateIDF(10, 2), w3.getSignificance()); + + } +} -- cgit v1.2.3 From 444cdaee63602faf3bee850b9fa4fcb5eb77bc60 Mon Sep 17 00:00:00 2001 From: MariusArhaug Date: Wed, 10 Apr 2024 14:38:57 +0200 Subject: add author tags --- .../java/com/yahoo/search/significance/SignificanceSearcher.java | 7 +++++++ .../yahoo/search/significance/test/SignificanceSearcherTest.java | 6 ++++++ 2 files changed, 13 insertions(+) (limited to 'container-search/src/test/java/com/yahoo') diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java index 7403d57b71f..f1e0067b185 100644 --- a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java @@ -1,3 +1,4 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.significance; import com.yahoo.component.annotation.Inject; @@ -17,6 +18,12 @@ import com.yahoo.search.searchchain.Execution; import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; +/** + * Sets significance values on word items in the query tree. + * + * @author MariusArhaug + */ + @Provides(SignificanceSearcher.SIGNIFICANCE) @Before(STEMMING) public class SignificanceSearcher extends Searcher { diff --git a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java index 389236af31b..56d7b53825d 100644 --- a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java +++ b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java @@ -1,3 +1,4 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.significance.test; import com.yahoo.component.chain.Chain; @@ -18,6 +19,11 @@ import java.util.HashMap; import static com.yahoo.test.JunitCompat.assertEquals; +/** + * Tests significance term in the search chain. + * + * @author MariusArhaug + */ public class SignificanceSearcherTest { SignificanceModelRegistry significanceModelRegistry; SignificanceSearcher searcher; -- cgit v1.2.3 From 59ccf886b9f35f12e40334a1aac66eda6819090c Mon Sep 17 00:00:00 2001 From: MariusArhaug Date: Mon, 15 Apr 2024 12:25:06 +0200 Subject: fix cr failures --- .../model/federation/LocalProviderSpec.java | 3 +- .../search/significance/SignificanceSearcher.java | 8 +++- .../test/SignificanceSearcherTest.java | 50 ++++++++++++++++++---- .../significance/SignificanceModelRegistry.java | 4 +- .../impl/DefaultSignificanceModelRegistry.java | 7 +-- .../DefaultSignificanceModelRegistryTest.java | 16 ++++--- 6 files changed, 68 insertions(+), 20 deletions(-) (limited to 'container-search/src/test/java/com/yahoo') diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java index 9ea35339f8d..97220725fec 100644 --- a/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java +++ b/container-search/src/main/java/com/yahoo/search/searchchain/model/federation/LocalProviderSpec.java @@ -43,7 +43,8 @@ public class LocalProviderSpec { com.yahoo.search.searchers.ValidateFuzzySearcher.class, com.yahoo.search.yql.FieldFiller.class, com.yahoo.search.searchers.InputCheckingSearcher.class, - com.yahoo.search.searchers.ContainerLatencySearcher.class); + com.yahoo.search.searchers.ContainerLatencySearcher.class, + com.yahoo.search.significance.SignificanceSearcher.class); public final String clusterName; diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java index f1e0067b185..1a0f82cc068 100644 --- a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java @@ -16,6 +16,8 @@ import com.yahoo.search.Result; import com.yahoo.search.Searcher; import com.yahoo.search.searchchain.Execution; +import java.util.Optional; + import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; /** @@ -42,7 +44,11 @@ public class SignificanceSearcher extends Searcher { if (significanceModelRegistry == null) return execution.search(query); Language language = query.getModel().getParsingLanguage(); - setIDF(query.getModel().getQueryTree().getRoot(), significanceModelRegistry.getModel(language)); + Optional model = significanceModelRegistry.getModel(language); + + if (model.isEmpty()) return execution.search(query); + + setIDF(query.getModel().getQueryTree().getRoot(), model.get()); return execution.search(query); } diff --git a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java index 56d7b53825d..890db3abb51 100644 --- a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java +++ b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java @@ -17,6 +17,7 @@ import org.junit.jupiter.api.Test; import java.nio.file.Path; import java.util.HashMap; + import static com.yahoo.test.JunitCompat.assertEquals; /** @@ -33,16 +34,19 @@ public class SignificanceSearcherTest { map.put(Language.ENGLISH, Path.of("src/test/java/com/yahoo/search/significance/model/en.json")); significanceModelRegistry = new DefaultSignificanceModelRegistry(map); - // TODO change to mock searcher = new SignificanceSearcher(significanceModelRegistry); } - private Execution createExecution() { + private Execution createExecution(SignificanceSearcher searcher) { return new Execution(new Chain<>(searcher), Execution.Context.createContextStub()); } + private Execution createExecution() { + return new Execution(new Chain<>(), Execution.Context.createContextStub()); + } + @Test - void testSimpleSignificanceValue() { + void testSignificanceValueOnSimpleQuery() { Query q = new Query(); AndItem root = new AndItem(); @@ -54,14 +58,14 @@ public class SignificanceSearcherTest { q.getModel().getQueryTree().setRoot(root); - SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH); + SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH).get(); var helloFrequency = model.documentFrequency("Hello"); var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency()); var worldFrequency = model.documentFrequency("world"); var worldSignificanceValue = SignificanceSearcher.calculateIDF(worldFrequency.corpusSize(), worldFrequency.frequency()); - Result r = createExecution().search(q); + Result r = createExecution(searcher).search(q); root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); WordItem w0 = (WordItem) root.getItem(0); @@ -73,7 +77,7 @@ public class SignificanceSearcherTest { } @Test - void testRecursiveSignificanceValues() { + void testSignificanceValueOnRecursiveQuery() { Query q = new Query(); AndItem root = new AndItem(); WordItem child1 = new WordItem("hello", true); @@ -95,7 +99,7 @@ public class SignificanceSearcherTest { q.getModel().getQueryTree().setRoot(root); - SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH); + SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH).get(); var helloFrequency = model.documentFrequency("hello"); var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency()); @@ -104,7 +108,7 @@ public class SignificanceSearcherTest { - Result r = createExecution().search(q); + Result r = createExecution(searcher).search(q); root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); WordItem w0 = (WordItem) root.getItem(0); @@ -116,4 +120,34 @@ public class SignificanceSearcherTest { assertEquals(SignificanceSearcher.calculateIDF(10, 2), w3.getSignificance()); } + + @Test + void testSignificanceValueOnEmptyQuery() { + Query q = new Query(); + q.getModel().setLanguage(Language.NORWEGIAN_BOKMAL); + AndItem root = new AndItem(); + WordItem tmp; + tmp = new WordItem("Hei", true); + root.addItem(tmp); + tmp = new WordItem("Verden", true); + root.addItem(tmp); + + + q.getModel().getQueryTree().setRoot(root); + Result r = createExecution(searcher).search(q); + root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot(); + + WordItem w0 = (WordItem) root.getItem(0); + WordItem w1 = (WordItem) root.getItem(1); + + Result r0 = createExecution().search(q); + root = (AndItem) r0.getQuery().getModel().getQueryTree().getRoot(); + + WordItem w0_0 = (WordItem) root.getItem(0); + WordItem w0_1 = (WordItem) root.getItem(1); + + assertEquals(w0_0.getSignificance(), w0.getSignificance()); + assertEquals(w0_1.getSignificance(), w1.getSignificance()); + + } } diff --git a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java index 6d8dcc00e0a..95d5b5e69d8 100644 --- a/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java +++ b/linguistics/src/main/java/com/yahoo/language/significance/SignificanceModelRegistry.java @@ -4,10 +4,12 @@ package com.yahoo.language.significance; import com.yahoo.api.annotations.Beta; import com.yahoo.language.Language; +import java.util.Optional; + /** * @author MariusArhaug */ @Beta public interface SignificanceModelRegistry { - SignificanceModel getModel(Language language); + Optional getModel(Language language); } diff --git a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java index d44eab39cdf..1be1d3f13b5 100644 --- a/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java +++ b/linguistics/src/main/java/com/yahoo/language/significance/impl/DefaultSignificanceModelRegistry.java @@ -11,6 +11,7 @@ import java.nio.file.Path; import java.util.EnumMap; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import java.util.function.Supplier; import static com.yahoo.yolean.Exceptions.uncheck; @@ -43,12 +44,12 @@ public class DefaultSignificanceModelRegistry implements SignificanceModelRegist @Override - public SignificanceModel getModel(Language language) throws IllegalArgumentException { + public Optional getModel(Language language) { if (!models.containsKey(language)) { - throw new IllegalArgumentException("No model for language " + language); + return Optional.empty(); } - return models.get(language); + return Optional.of(models.get(language)); } diff --git a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java index d1de63a994d..d4849571b5e 100644 --- a/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java +++ b/linguistics/src/test/java/com/yahoo/language/significance/DefaultSignificanceModelRegistryTest.java @@ -8,9 +8,7 @@ import org.junit.Test; import java.nio.file.Path; import java.util.HashMap; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; /** @@ -27,10 +25,16 @@ public class DefaultSignificanceModelRegistryTest { DefaultSignificanceModelRegistry defaultSignificanceModelRegistry = new DefaultSignificanceModelRegistry(models); - var englishModel = defaultSignificanceModelRegistry.getModel(Language.ENGLISH); - var norwegianModel = defaultSignificanceModelRegistry.getModel(Language.NORWEGIAN_BOKMAL); + var optionalEnglishModel = defaultSignificanceModelRegistry.getModel(Language.ENGLISH); + var optionalNorwegianModel = defaultSignificanceModelRegistry.getModel(Language.NORWEGIAN_BOKMAL); - assertThrows(IllegalArgumentException.class, () -> defaultSignificanceModelRegistry.getModel(Language.FRENCH)); + assertTrue(optionalEnglishModel.isPresent()); + assertTrue(optionalNorwegianModel.isPresent()); + + var englishModel = optionalEnglishModel.get(); + var norwegianModel = optionalNorwegianModel.get(); + + assertTrue( defaultSignificanceModelRegistry.getModel(Language.FRENCH).isEmpty()); assertNotNull(englishModel); assertNotNull(norwegianModel); -- cgit v1.2.3