aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/query/rewrite/rewriters/NameRewriter.java
blob: afa984bc67b3d0a6656df643090165d6e4cf4183 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.query.rewrite.rewriters;

import java.io.*;
import java.util.*;
import java.util.logging.Logger;

import com.yahoo.component.annotation.Inject;
import com.yahoo.component.chain.dependencies.Provides;
import com.yahoo.search.query.rewrite.*;
import com.yahoo.search.*;
import com.yahoo.component.ComponentId;
import com.yahoo.filedistribution.fileacquirer.FileAcquirer;
import com.yahoo.search.query.rewrite.RewritesConfig;

/**
 * This rewriter would add rewrites to name entities to boost precision<br>
 * - FSA dict: [normalized original query]\t[rewrite 1]\t[rewrite 2]\t[etc]<br>
 * - Features:<br>
 *   OriginalAsUnit flag: add proximity boosting to original query<br>
 *   RewritesAsUnitEquiv flag: add proximity boosted rewrites to original query<br>
 *   RewritesAsEquiv flag: add rewrites to original query<br>
 *
 * @author Karen Sze Wing Lee
 */
@Provides("NameRewriter")
public class NameRewriter extends QueryRewriteSearcher {

    // Flag for skipping this rewriter if the query has been rewritten
    private final boolean SKIP_REWRITER_IF_REWRITTEN = false;

    // Name of the rewriter
    public static final String REWRITER_NAME = "NameRewriter";

    // Name entity expansion dictionary name
    public static final String NAME_ENTITY_EXPAND_DICT = "NameEntityExpansion";

    // Default Name entity expansion dictionary file name
    public static final String NAME_ENTITY_EXPAND_DICT_FILENAME = "NameRewriter.fsa";

    private Logger logger;

    /**
     * Constructor for NameRewriter<br>
     * Load configs using default format
     */
    @Inject
    public NameRewriter(ComponentId id,
                        FileAcquirer fileAcquirer,
                        RewritesConfig config) {
        super(id, fileAcquirer, config);
    }

    /**
     * Constructor for NameRewriter unit test<br>
     * Load configs using default format
     */
    public NameRewriter(RewritesConfig config,
                        HashMap<String, File> fileList) {
        super(config, fileList);
    }

    /**
     * Instance creation time config loading besides FSA<br>
     * Empty for this rewriter
     */
    public boolean configure(FileAcquirer fileAcquirer,
                             RewritesConfig config,
                             HashMap<String, File> fileList) {
        logger = Logger.getLogger(NameRewriter.class.getName());
        return true;
    }

    /**
     * Main logic of rewriter<br>
     * - Retrieve rewrites from FSA dict<br>
     * - rewrite query using features that are enabled by user
     */
    public HashMap<String, Object> rewrite(Query query,
                                           String dictKey) throws RuntimeException {

        Boolean rewritten = false;

        // Pass the original dict key to the next rewriter
        HashMap<String, Object> result = new HashMap<>();
        result.put(RewriterConstants.REWRITTEN, rewritten);
        result.put(RewriterConstants.DICT_KEY, dictKey);

        RewriterUtils.log(logger, query,
                         "In NameRewriter, query used for dict retrieval=[" + dictKey + "]");

        // Retrieve rewrite from FSA dict using normalized query
        String rewrites = super.getRewriteFromFSA(query, NAME_ENTITY_EXPAND_DICT, dictKey);
        RewriterUtils.log(logger, query, "Retrieved rewrites: " + rewrites);

        // No rewrites
        if(rewrites==null) {
            RewriterUtils.log(logger, query, "No rewrite is retrieved");
            return result;
        }

        // Retrieve max number of rewrites allowed
        int maxNumRewrites = 0;
        String maxNumRewritesStr = getQPConfig(query, RewriterConstants.MAX_REWRITES);
        if(maxNumRewritesStr!=null) {
            maxNumRewrites = Integer.parseInt(maxNumRewritesStr);
            RewriterUtils.log(logger, query,
                              "Limiting max number of rewrites to: " + maxNumRewrites);
        } else {
            RewriterUtils.log(logger, query, "No limit on number of rewrites");
        }

        // Retrieve flags for enabling the features
        String originalAsUnit = getQPConfig(query, RewriterConstants.ORIGINAL_AS_UNIT);
        String originalAsUnitEquiv = getQPConfig(query, RewriterConstants.ORIGINAL_AS_UNIT_EQUIV);
        String rewritesAsUnitEquiv = getQPConfig(query, RewriterConstants.REWRITES_AS_UNIT_EQUIV);
        String rewritesAsEquiv = getQPConfig(query, RewriterConstants.REWRITES_AS_EQUIV);

        // Add proximity boosting to original query and keeping
        // the original query if it's enabled
        if(originalAsUnitEquiv!=null && originalAsUnitEquiv.equalsIgnoreCase("true")) {
            RewriterUtils.log(logger, query, "OriginalAsUnitEquiv is enabled");
            query = RewriterFeatures.addUnitToOriginalQuery(query, dictKey, true);
            RewriterUtils.log(logger, query,
                              "Query after OriginalAsUnitEquiv: " + query.toDetailString());
            rewritten = true;

        // Add proximity boosting to original query
        // if it's enabled
        } else if(originalAsUnit!=null && originalAsUnit.equalsIgnoreCase("true")) {
            RewriterUtils.log(logger, query, "OriginalAsUnit is enabled");
            query = RewriterFeatures.addUnitToOriginalQuery(query, dictKey, false);
            RewriterUtils.log(logger, query,
                              "Query after OriginalAsUnit: " + query.toDetailString());
            rewritten = true;
        }

        // Add rewrites as unit equiv if it's enabled
        if(rewritesAsUnitEquiv!=null && rewritesAsUnitEquiv.equalsIgnoreCase("true")) {
            RewriterUtils.log(logger, query, "RewritesAsUnitEquiv is enabled");
            //query = RewriterFeatures.addRewritesAsEquiv(query, dictKey, rewrites, true, maxNumRewrites);
            query = RewriterFeatures.addRewritesAsEquiv(query, dictKey, rewrites, true, maxNumRewrites);
            RewriterUtils.log(logger, query,
                              "Query after RewritesAsUnitEquiv: " + query.toDetailString());
            rewritten = true;

        // Add rewrites as equiv if it's enabled
        } else if(rewritesAsEquiv!=null && rewritesAsEquiv.equalsIgnoreCase("true")) {
            RewriterUtils.log(logger, query, "RewritesAsEquiv is enabled");
            //query = RewriterFeatures.addRewritesAsEquiv(query, dictKey, rewrites, false, maxNumRewrites);
            query = RewriterFeatures.addRewritesAsEquiv(query, dictKey, rewrites, false, maxNumRewrites);
            RewriterUtils.log(logger, query,
                              "Query after RewritesAsEquiv: " + query.toDetailString());
            rewritten = true;
        }

        RewriterUtils.log(logger, query, "NameRewriter final query: " + query.toDetailString());

        result.put(RewriterConstants.REWRITTEN, rewritten);

        return result;
    }

    /**
     * Get the flag which specifies whether this rewriter.
     * should be skipped if the query has been rewritten
     *
     * @return true if rewriter should be skipped, false
     *         otherwise
     */
    public boolean getSkipRewriterIfRewritten() {
        return SKIP_REWRITER_IF_REWRITTEN;
    }

   /**
    * Get the name of the rewriter
    *
    * @return Name of the rewriter
    */
   public String getRewriterName() {
       return REWRITER_NAME;
   }

   /**
    * Get default FSA dictionary names
    *
    * @return Pair of FSA dictionary name and filename
    */
   public HashMap<String, String> getDefaultFSAs() {
       HashMap<String, String> defaultDicts = new HashMap<>();
       defaultDicts.put(NAME_ENTITY_EXPAND_DICT, NAME_ENTITY_EXPAND_DICT_FILENAME);
       return defaultDicts;
   }
}