aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search
diff options
context:
space:
mode:
authorMariusArhaug <mariusarhaug@hotmail.com>2024-04-09 16:38:50 +0200
committerMariusArhaug <mariusarhaug@hotmail.com>2024-04-09 16:39:36 +0200
commit91cc80f7accdc9a5456c4fcb1c03002552586aa5 (patch)
tree7f0de0f6008bcb971df19da916553523aebb4892 /container-search/src/main/java/com/yahoo/search
parent887cb5b3c98472ae521f2104216b15ffde5d8acb (diff)
add significance searcher
Diffstat (limited to 'container-search/src/main/java/com/yahoo/search')
-rw-r--r--container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java63
1 files changed, 63 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java
new file mode 100644
index 00000000000..f33d1468334
--- /dev/null
+++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java
@@ -0,0 +1,63 @@
+package com.yahoo.search.significance;
+
+import com.yahoo.component.annotation.Inject;
+import com.yahoo.component.chain.dependencies.Provides;
+import com.yahoo.language.significance.SignificanceModel;
+import com.yahoo.language.significance.SignificanceModelRegistry;
+import com.yahoo.prelude.query.CompositeItem;
+import com.yahoo.prelude.query.Item;
+import com.yahoo.prelude.query.NullItem;
+import com.yahoo.prelude.query.WordItem;
+import com.yahoo.search.Query;
+import com.yahoo.search.Result;
+import com.yahoo.search.Searcher;
+import com.yahoo.search.searchchain.Execution;
+
+@Provides(SignificanceSearcher.SIGNIFICANCE)
+public class SignificanceSearcher extends Searcher {
+
+ public final static String SIGNIFICANCE = "Significance";
+ private final SignificanceModelRegistry significanceModelRegistry;
+
+
+ @Inject
+ public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry) {
+ this.significanceModelRegistry = significanceModelRegistry;
+ }
+
+ @Override
+ public Result search(Query query, Execution execution) {
+ if (significanceModelRegistry == null) return execution.search(query);
+
+
+ setIDF(query.getModel().getQueryTree().getRoot());
+
+ return execution.search(query);
+ }
+
+ private void setIDF(Item root) {
+ if (root == null || root instanceof NullItem) return;
+
+ if (root instanceof WordItem) {
+
+ SignificanceModel significanceModel = significanceModelRegistry.getModel(root.getLanguage());
+
+ var documentFrequency = significanceModel.documentFrequency(((WordItem) root).getWord());
+ long nq_i = documentFrequency.frequency();
+ long N = documentFrequency.corpusSize();
+ double idf = calculateIDF(N, nq_i);
+
+ ((WordItem) root).setSignificance(idf);
+ } else if (root instanceof CompositeItem) {
+ for (int i = 0; i < ((CompositeItem) root).getItemCount(); i++) {
+ setIDF(((CompositeItem) root).getItem(i));
+ }
+ }
+ }
+
+ private static double calculateIDF(long N, long nq_i) {
+ return Math.log(1 + (N - nq_i + 0.5) / (nq_i + 0.5));
+ }
+}
+
+