aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/querytransform/QueryRewrite.java
blob: bec9756c9db14de3ffd38b0d7b7d2a835ccc728e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.querytransform;

import com.yahoo.prelude.query.AndItem;
import com.yahoo.prelude.query.CompositeItem;
import com.yahoo.prelude.query.EquivItem;
import com.yahoo.prelude.query.HasIndexItem;
import com.yahoo.prelude.query.IndexedItem;
import com.yahoo.prelude.query.Item;
import com.yahoo.prelude.query.NearItem;
import com.yahoo.prelude.query.NotItem;
import com.yahoo.prelude.query.NullItem;
import com.yahoo.prelude.query.OrItem;
import com.yahoo.prelude.query.RankItem;
import com.yahoo.prelude.query.SimpleIndexedItem;
import com.yahoo.prelude.query.SubstringItem;
import com.yahoo.prelude.query.WeakAndItem;
import com.yahoo.search.Query;
import com.yahoo.search.query.Model;
import com.yahoo.search.result.Hit;

/**
 * @author baldersheim
 */
// TODO: This overlaps with QueryCanonicalizer
public class QueryRewrite {

    private enum Recall { RECALLS_EVERYTHING, RECALLS_NOTHING, UNKNOWN_RECALL }

    /**
     * Optimize multiple NotItems under and or by collapsing them in to one and leaving
     * the positive ones behind in its place and moving itself with the original and as its positive item
     * and the union of all the negative items of all the original NotItems as its negative items.
     */
    public static void optimizeAndNot(Query query) {
        Item root = query.getModel().getQueryTree().getRoot();
        Item possibleNewRoot = optimizeAndNot(root);
        if (root != possibleNewRoot) {
            query.getModel().getQueryTree().setRoot(possibleNewRoot);
        }
    }

    /**
     * Optimizes the given query tree based on its {@link Model#getRestrict()} parameter, if any.
     */
    public static void optimizeByRestrict(Query query) {
        if (query.getModel().getRestrict().size() != 1) {
            return;
        }
        Item root = query.getModel().getQueryTree().getRoot();

        if (optimizeByRestrict(root, query.getModel().getRestrict().iterator().next()) == Recall.RECALLS_NOTHING) {
            query.getModel().getQueryTree().setRoot(new NullItem());
        }
    }

    /**
     * Collapses all single-child {@link CompositeItem}s into their parent item.
     */
    public static void collapseSingleComposites(Query query) {
        Item oldRoot = query.getModel().getQueryTree().getRoot();
        Item newRoot = collapseSingleComposites(oldRoot);
        if (oldRoot != newRoot) {
            query.getModel().getQueryTree().setRoot(newRoot);
        }
    }

    /**
     * Replaces and {@link SimpleIndexedItem} searching in the {@link Hit#SDDOCNAME_FIELD} with an item
     * appropriate for the search node.
     */
    public static void rewriteSddocname(Query query) {
        Item oldRoot = query.getModel().getQueryTree().getRoot();
        Item newRoot = rewriteSddocname(oldRoot);
        if (oldRoot != newRoot) {
            query.getModel().getQueryTree().setRoot(newRoot);
        }
    }

    // ------------------- End public API

    private static Item optimizeAndNot(Item node) {
        if (node instanceof CompositeItem) {
            return extractAndNotRecursively((CompositeItem) node);
        }
        return node;
    }

    private static CompositeItem extractAndNotRecursively(CompositeItem parent) {
        for (int i = 0; i < parent.getItemCount(); i++) {
            Item child = parent.getItem(i);
            Item possibleNewChild = optimizeAndNot(child);
            if (child != possibleNewChild) {
                parent.setItem(i, possibleNewChild);
            }
        }
        if (parent instanceof AndItem) {
            return extractAndNot((AndItem) parent);
        }
        return parent;
    }

    private static CompositeItem extractAndNot(AndItem parent) {
        NotItem theOnlyNot = null;
        for (int i = 0; i < parent.getItemCount(); i++) {
            Item child = parent.getItem(i);
            if (child instanceof NotItem) {
                NotItem thisNot = (NotItem) child;
                parent.setItem(i, thisNot.getPositiveItem());
                if (theOnlyNot == null) {
                    theOnlyNot = thisNot;
                    theOnlyNot.setPositiveItem(parent);
                } else {
                    for (int j=1; j < thisNot.getItemCount(); j++) {
                        theOnlyNot.addNegativeItem(thisNot.getItem(j));
                    }
                }
            }
        }
        return (theOnlyNot != null) ? theOnlyNot : parent;
    }

    private static Recall optimizeByRestrict(Item item, String restrictParam) {
        if (item instanceof SimpleIndexedItem) {
            return optimizeIndexedItemByRestrict((SimpleIndexedItem)item, restrictParam);
        } else if (item instanceof NotItem) {
            return optimizeNotItemByRestrict((NotItem)item, restrictParam);
        } else if (item instanceof CompositeItem) {
            return optimizeCompositeItemByRestrict((CompositeItem)item, restrictParam);
        } else {
            return Recall.UNKNOWN_RECALL;
        }
    }

    private static Recall optimizeIndexedItemByRestrict(SimpleIndexedItem item, String restrictParam) {
        if (!Hit.SDDOCNAME_FIELD.equals(item.getIndexName())) {
            return Recall.UNKNOWN_RECALL;
        }
        // a query term searching for sddocname will either recall everything or nothing, depending on whether
        // the term matches the restrict parameter or not
        return restrictParam.equals(item.getIndexedString())
               ? Recall.RECALLS_EVERYTHING
               : Recall.RECALLS_NOTHING;
    }

    private static Recall optimizeNotItemByRestrict(NotItem item, String restrictParam) {
        // first item is the positive one
        if (optimizeByRestrict(item.getItem(0), restrictParam) == Recall.RECALLS_NOTHING) {
            return Recall.RECALLS_NOTHING;
        }
        // all the remaining items are negative ones
        for (int i = item.getItemCount(); --i >= 1; ) {
            Item child = item.getItem(i);
            switch (optimizeByRestrict(child, restrictParam)) {
                case RECALLS_EVERYTHING:
                    return Recall.RECALLS_NOTHING;
                case RECALLS_NOTHING:
                    item.removeItem(i);
                    break;
            }
        }
        return Recall.UNKNOWN_RECALL;
    }

    private static Recall optimizeCompositeItemByRestrict(CompositeItem item, String restrictParam) {
        Recall recall = Recall.UNKNOWN_RECALL;
        for (int i = item.getItemCount(); --i >= 0; ) {
            switch (optimizeByRestrict(item.getItem(i), restrictParam)) {
                case RECALLS_EVERYTHING:
                    if ((item instanceof OrItem) || (item instanceof EquivItem)) {
                        removeOtherNonrankedChildren(item, i);
                        recall = Recall.RECALLS_EVERYTHING;
                    } else if ((item instanceof AndItem) || (item instanceof NearItem) || (item instanceof WeakAndItem)) {
                        if ( ! isRanked(item.getItem(i)) && item.items().size() > 1) {
                            item.removeItem(i);
                        }
                    } else if (item instanceof RankItem) {
                        // empty
                    } else {
                        throw new UnsupportedOperationException(item.getClass().getName());
                    }
                    break;
                case RECALLS_NOTHING:
                    if ((item instanceof OrItem) || (item instanceof EquivItem) && item.items().size() > 1) {
                        item.removeItem(i);
                    } else if ((item instanceof AndItem) || (item instanceof NearItem) || (item instanceof WeakAndItem)) {
                        return Recall.RECALLS_NOTHING;
                    } else if (item instanceof RankItem) {
                        if (i == 0) return Recall.RECALLS_NOTHING;
                        item.removeItem(i);
                    } else {
                        throw new UnsupportedOperationException(item.getClass().getName());
                    }
                    break;
            }
        }
        return recall;
    }

    private static void removeOtherNonrankedChildren(CompositeItem parent, int indexOfChildToKeep) {
        Item childToKeep = parent.getItem(indexOfChildToKeep);
        for (int i = parent.getItemCount(); --i >= 0; ) {
            Item child = parent.getItem(i);
            if ( child != childToKeep && ! parent.getItem(i).isRanked())
                parent.removeItem(i);
        }
    }

    private static boolean isRanked(Item item) {
        if (item instanceof CompositeItem) {
            for (Item child : ((CompositeItem)item).items())
                if (isRanked(child)) return true;
            return false;
        }
        else if (item instanceof HasIndexItem && Hit.SDDOCNAME_FIELD.equals(((HasIndexItem)item).getIndexName())) {
            return false; // No point in ranking by sddocname
        }
        else {
            return item.isRanked();
        }
    }
    
    private static Item collapseSingleComposites(Item item) {
        if (!(item instanceof CompositeItem)) {
            return item;
        }
        CompositeItem parent = (CompositeItem)item;
        int numChildren = parent.getItemCount();
        for (int i = 0; i < numChildren; ++i) {
            Item oldChild = parent.getItem(i);
            Item newChild = collapseSingleComposites(oldChild);
            if (oldChild != newChild) {
                parent.setItem(i, newChild);
            }
        }
        return parent.extractSingleChild().orElse(item);
    }

    private static Item rewriteSddocname(Item item) {
        if (item instanceof CompositeItem) {
            CompositeItem parent = (CompositeItem)item;
            for (int i = 0, len = parent.getItemCount(); i < len; ++i) {
                Item oldChild = parent.getItem(i);
                Item newChild = rewriteSddocname(oldChild);
                if (oldChild != newChild) {
                    parent.setItem(i, newChild);
                }
            }
        } else if (item instanceof SimpleIndexedItem) {
            SimpleIndexedItem oldItem = (SimpleIndexedItem)item;
            if (Hit.SDDOCNAME_FIELD.equals(oldItem.getIndexName())) {
                SubstringItem newItem = new SubstringItem(oldItem.getIndexedString());
                newItem.setIndexName("[documentmetastore]");
                return newItem;
            }
        }
        return item;
    }

}