blob: f33d146833432496d1748b63a72cf7dcdacc4fe0 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
package com.yahoo.search.significance;
import com.yahoo.component.annotation.Inject;
import com.yahoo.component.chain.dependencies.Provides;
import com.yahoo.language.significance.SignificanceModel;
import com.yahoo.language.significance.SignificanceModelRegistry;
import com.yahoo.prelude.query.CompositeItem;
import com.yahoo.prelude.query.Item;
import com.yahoo.prelude.query.NullItem;
import com.yahoo.prelude.query.WordItem;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
import com.yahoo.search.searchchain.Execution;
@Provides(SignificanceSearcher.SIGNIFICANCE)
public class SignificanceSearcher extends Searcher {
public final static String SIGNIFICANCE = "Significance";
private final SignificanceModelRegistry significanceModelRegistry;
@Inject
public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry) {
this.significanceModelRegistry = significanceModelRegistry;
}
@Override
public Result search(Query query, Execution execution) {
if (significanceModelRegistry == null) return execution.search(query);
setIDF(query.getModel().getQueryTree().getRoot());
return execution.search(query);
}
private void setIDF(Item root) {
if (root == null || root instanceof NullItem) return;
if (root instanceof WordItem) {
SignificanceModel significanceModel = significanceModelRegistry.getModel(root.getLanguage());
var documentFrequency = significanceModel.documentFrequency(((WordItem) root).getWord());
long nq_i = documentFrequency.frequency();
long N = documentFrequency.corpusSize();
double idf = calculateIDF(N, nq_i);
((WordItem) root).setSignificance(idf);
} else if (root instanceof CompositeItem) {
for (int i = 0; i < ((CompositeItem) root).getItemCount(); i++) {
setIDF(((CompositeItem) root).getItem(i));
}
}
}
private static double calculateIDF(long N, long nq_i) {
return Math.log(1 + (N - nq_i + 0.5) / (nq_i + 0.5));
}
}
|