diff options
author | MariusArhaug <mariusarhaug@hotmail.com> | 2024-04-09 16:38:50 +0200 |
---|---|---|
committer | MariusArhaug <mariusarhaug@hotmail.com> | 2024-04-09 16:39:36 +0200 |
commit | 91cc80f7accdc9a5456c4fcb1c03002552586aa5 (patch) | |
tree | 7f0de0f6008bcb971df19da916553523aebb4892 /container-search/src/main | |
parent | 887cb5b3c98472ae521f2104216b15ffde5d8acb (diff) |
add significance searcher
Diffstat (limited to 'container-search/src/main')
-rw-r--r-- | container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java new file mode 100644 index 00000000000..f33d1468334 --- /dev/null +++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java @@ -0,0 +1,63 @@ +package com.yahoo.search.significance; + +import com.yahoo.component.annotation.Inject; +import com.yahoo.component.chain.dependencies.Provides; +import com.yahoo.language.significance.SignificanceModel; +import com.yahoo.language.significance.SignificanceModelRegistry; +import com.yahoo.prelude.query.CompositeItem; +import com.yahoo.prelude.query.Item; +import com.yahoo.prelude.query.NullItem; +import com.yahoo.prelude.query.WordItem; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; + +@Provides(SignificanceSearcher.SIGNIFICANCE) +public class SignificanceSearcher extends Searcher { + + public final static String SIGNIFICANCE = "Significance"; + private final SignificanceModelRegistry significanceModelRegistry; + + + @Inject + public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry) { + this.significanceModelRegistry = significanceModelRegistry; + } + + @Override + public Result search(Query query, Execution execution) { + if (significanceModelRegistry == null) return execution.search(query); + + + setIDF(query.getModel().getQueryTree().getRoot()); + + return execution.search(query); + } + + private void setIDF(Item root) { + if (root == null || root instanceof NullItem) return; + + if (root instanceof WordItem) { + + SignificanceModel significanceModel = significanceModelRegistry.getModel(root.getLanguage()); + + var documentFrequency = significanceModel.documentFrequency(((WordItem) root).getWord()); + long nq_i = documentFrequency.frequency(); + long N = documentFrequency.corpusSize(); + double idf = calculateIDF(N, nq_i); + + ((WordItem) root).setSignificance(idf); + } else if (root instanceof CompositeItem) { + for (int i = 0; i < ((CompositeItem) root).getItemCount(); i++) { + setIDF(((CompositeItem) root).getItem(i)); + } + } + } + + private static double calculateIDF(long N, long nq_i) { + return Math.log(1 + (N - nq_i + 0.5) / (nq_i + 0.5)); + } +} + + |