blob: 0a42bf8a2598cd6b4195b751164c1f77c3f76287 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.significance;
import com.yahoo.component.annotation.Inject;
import com.yahoo.component.chain.dependencies.Before;
import com.yahoo.component.chain.dependencies.Provides;
import com.yahoo.language.Language;
import com.yahoo.language.significance.SignificanceModel;
import com.yahoo.language.significance.SignificanceModelRegistry;
import com.yahoo.prelude.query.CompositeItem;
import com.yahoo.prelude.query.Item;
import com.yahoo.prelude.query.NullItem;
import com.yahoo.prelude.query.WordItem;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.search.Searcher;
import com.yahoo.search.searchchain.Execution;
import java.util.Optional;
import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING;
/**
* Sets significance values on word items in the query tree.
*
* @author MariusArhaug
*/
@Provides(SignificanceSearcher.SIGNIFICANCE)
@Before(STEMMING)
public class SignificanceSearcher extends Searcher {
public final static String SIGNIFICANCE = "Significance";
private final SignificanceModelRegistry significanceModelRegistry;
@Inject
public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry) {
this.significanceModelRegistry = significanceModelRegistry;
}
@Override
public Result search(Query query, Execution execution) {
Language language = query.getModel().getParsingLanguage();
Optional<SignificanceModel> model = significanceModelRegistry.getModel(language);
if (model.isEmpty()) return execution.search(query);
setIDF(query.getModel().getQueryTree().getRoot(), model.get());
return execution.search(query);
}
private void setIDF(Item root, SignificanceModel significanceModel) {
if (root == null || root instanceof NullItem) return;
if (root instanceof WordItem) {
var documentFrequency = significanceModel.documentFrequency(((WordItem) root).getWord());
long N = documentFrequency.corpusSize();
long nq_i = documentFrequency.frequency();
double idf = calculateIDF(N, nq_i);
((WordItem) root).setSignificance(idf);
} else if (root instanceof CompositeItem) {
for (int i = 0; i < ((CompositeItem) root).getItemCount(); i++) {
setIDF(((CompositeItem) root).getItem(i), significanceModel);
}
}
}
public static double calculateIDF(long N, long nq_i) {
return Math.log(1 + (N - nq_i + 0.5) / (nq_i + 0.5));
}
}
|