aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/test/java/com/yahoo
diff options
context:
space:
mode:
authorMarius Arhaug <mariusarhaug@hotmail.com>2024-04-24 15:27:32 +0200
committerGitHub <noreply@github.com>2024-04-24 15:27:32 +0200
commitff1d604e77b1943a72fc6b585b09db82a5ee791d (patch)
tree2195429e568fe6de8a680454d22efcac4b6a4120 /container-search/src/test/java/com/yahoo
parent802c854e5190d37914f237d8626949781f3db9c2 (diff)
parent8f69128279305dacd077b540d5e9be746508efc9 (diff)
Merge pull request #30871 from vespa-engine/marius/add-significance-searcher
Add significance searcher
Diffstat (limited to 'container-search/src/test/java/com/yahoo')
-rw-r--r--container-search/src/test/java/com/yahoo/search/significance/model/en.json14
-rw-r--r--container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java153
2 files changed, 167 insertions, 0 deletions
diff --git a/container-search/src/test/java/com/yahoo/search/significance/model/en.json b/container-search/src/test/java/com/yahoo/search/significance/model/en.json
new file mode 100644
index 00000000000..50bae5e3451
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/search/significance/model/en.json
@@ -0,0 +1,14 @@
+{
+ "version" : "1.0",
+ "id" : "test::1",
+ "description" : "desc",
+ "corpus-size" : 10,
+ "language" : "en",
+ "word-count" : 4,
+ "frequencies" : {
+ "usa" : 2,
+ "hello": 3,
+ "world": 5,
+ "test": 2
+ }
+}
diff --git a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java
new file mode 100644
index 00000000000..890db3abb51
--- /dev/null
+++ b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java
@@ -0,0 +1,153 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.search.significance.test;
+
+import com.yahoo.component.chain.Chain;
+import com.yahoo.language.Language;
+import com.yahoo.language.significance.SignificanceModel;
+import com.yahoo.language.significance.SignificanceModelRegistry;
+import com.yahoo.language.significance.impl.DefaultSignificanceModelRegistry;
+import com.yahoo.prelude.query.AndItem;
+import com.yahoo.prelude.query.WordItem;
+import com.yahoo.search.Query;
+import com.yahoo.search.Result;
+import com.yahoo.search.searchchain.Execution;
+import com.yahoo.search.significance.SignificanceSearcher;
+import org.junit.jupiter.api.Test;
+
+import java.nio.file.Path;
+import java.util.HashMap;
+
+
+import static com.yahoo.test.JunitCompat.assertEquals;
+
+/**
+ * Tests significance term in the search chain.
+ *
+ * @author MariusArhaug
+ */
+public class SignificanceSearcherTest {
+ SignificanceModelRegistry significanceModelRegistry;
+ SignificanceSearcher searcher;
+
+ public SignificanceSearcherTest() {
+ HashMap<Language, Path> map = new HashMap<>();
+ map.put(Language.ENGLISH, Path.of("src/test/java/com/yahoo/search/significance/model/en.json"));
+
+ significanceModelRegistry = new DefaultSignificanceModelRegistry(map);
+ searcher = new SignificanceSearcher(significanceModelRegistry);
+ }
+
+ private Execution createExecution(SignificanceSearcher searcher) {
+ return new Execution(new Chain<>(searcher), Execution.Context.createContextStub());
+ }
+
+ private Execution createExecution() {
+ return new Execution(new Chain<>(), Execution.Context.createContextStub());
+ }
+
+ @Test
+ void testSignificanceValueOnSimpleQuery() {
+
+ Query q = new Query();
+ AndItem root = new AndItem();
+ WordItem tmp;
+ tmp = new WordItem("Hello", true);
+ root.addItem(tmp);
+ tmp = new WordItem("world", true);
+ root.addItem(tmp);
+
+ q.getModel().getQueryTree().setRoot(root);
+
+ SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH).get();
+ var helloFrequency = model.documentFrequency("Hello");
+ var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency());
+
+ var worldFrequency = model.documentFrequency("world");
+ var worldSignificanceValue = SignificanceSearcher.calculateIDF(worldFrequency.corpusSize(), worldFrequency.frequency());
+
+ Result r = createExecution(searcher).search(q);
+
+ root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot();
+ WordItem w0 = (WordItem) root.getItem(0);
+ WordItem w1 = (WordItem) root.getItem(1);
+
+ assertEquals(helloSignificanceValue, w0.getSignificance());
+ assertEquals(worldSignificanceValue, w1.getSignificance());
+
+ }
+
+ @Test
+ void testSignificanceValueOnRecursiveQuery() {
+ Query q = new Query();
+ AndItem root = new AndItem();
+ WordItem child1 = new WordItem("hello", true);
+
+ AndItem child2 = new AndItem();
+ WordItem child2_1 = new WordItem("test", true);
+
+ AndItem child3 = new AndItem();
+ AndItem child3_1 = new AndItem();
+ WordItem child3_1_1 = new WordItem("usa", true);
+
+ root.addItem(child1);
+ root.addItem(child2);
+ root.addItem(child3);
+
+ child2.addItem(child2_1);
+ child3.addItem(child3_1);
+ child3_1.addItem(child3_1_1);
+
+ q.getModel().getQueryTree().setRoot(root);
+
+ SignificanceModel model = significanceModelRegistry.getModel(Language.ENGLISH).get();
+ var helloFrequency = model.documentFrequency("hello");
+ var helloSignificanceValue = SignificanceSearcher.calculateIDF(helloFrequency.corpusSize(), helloFrequency.frequency());
+
+ var testFrequency = model.documentFrequency("test");
+ var testSignificanceValue = SignificanceSearcher.calculateIDF(testFrequency.corpusSize(), testFrequency.frequency());
+
+
+
+ Result r = createExecution(searcher).search(q);
+
+ root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot();
+ WordItem w0 = (WordItem) root.getItem(0);
+ WordItem w1 = (WordItem) ((AndItem) root.getItem(1)).getItem(0);
+ WordItem w3 = (WordItem) ((AndItem) ((AndItem) root.getItem(2)).getItem(0)).getItem(0);
+
+ assertEquals(helloSignificanceValue, w0.getSignificance());
+ assertEquals(testSignificanceValue, w1.getSignificance());
+ assertEquals(SignificanceSearcher.calculateIDF(10, 2), w3.getSignificance());
+
+ }
+
+ @Test
+ void testSignificanceValueOnEmptyQuery() {
+ Query q = new Query();
+ q.getModel().setLanguage(Language.NORWEGIAN_BOKMAL);
+ AndItem root = new AndItem();
+ WordItem tmp;
+ tmp = new WordItem("Hei", true);
+ root.addItem(tmp);
+ tmp = new WordItem("Verden", true);
+ root.addItem(tmp);
+
+
+ q.getModel().getQueryTree().setRoot(root);
+ Result r = createExecution(searcher).search(q);
+ root = (AndItem) r.getQuery().getModel().getQueryTree().getRoot();
+
+ WordItem w0 = (WordItem) root.getItem(0);
+ WordItem w1 = (WordItem) root.getItem(1);
+
+ Result r0 = createExecution().search(q);
+ root = (AndItem) r0.getQuery().getModel().getQueryTree().getRoot();
+
+ WordItem w0_0 = (WordItem) root.getItem(0);
+ WordItem w0_1 = (WordItem) root.getItem(1);
+
+ assertEquals(w0_0.getSignificance(), w0.getSignificance());
+ assertEquals(w0_1.getSignificance(), w1.getSignificance());
+
+ }
+}