aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/query/QueryCanonicalizer.java
blob: a1841d6ec440f18981a22ba128f0219db40a9e20 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import com.yahoo.search.Query;
import com.yahoo.search.query.QueryTree;
import com.yahoo.search.query.properties.DefaultProperties;

import java.util.ListIterator;
import java.util.Optional;

/**
 * Query normalizer and sanity checker.
 *
 * @author bratseth
 */
public class QueryCanonicalizer {

    /** The name of the operation performed by this, for use in search chain ordering */
    public static final String queryCanonicalization = "queryCanonicalization";

    /**
     * Validates this query and carries out possible operations on this query
     * which simplifies it without changing its semantics.
     *
     * @return null if the query is valid, an error message if it is invalid
     */
    public static String canonicalize(Query query) {
        return canonicalize(query.getModel().getQueryTree(),
                            query.properties().getInteger(DefaultProperties.MAX_QUERY_ITEMS));
    }

    /**
     * Canonicalizes this query, allowing any query tree size
     *
     * @return null if the query is valid, an error message if it is invalid
     */
    public static String canonicalize(QueryTree queryTree) {
        return canonicalize(queryTree, Integer.MAX_VALUE);
    }

    /**
     * Canonicalizes this query
     * 
     * @return null if the query is valid, an error message if it is invalid
     */
    private static String canonicalize(QueryTree query, Integer maxQueryItems) {
        ListIterator<Item> rootItemIterator = query.getItemIterator();
        CanonicalizationResult result = recursivelyCanonicalize(rootItemIterator.next(), rootItemIterator);
        if (query.isEmpty() && ! result.isError()) result = CanonicalizationResult.error("No query");
        int itemCount = query.treeSize();
        if (itemCount > maxQueryItems)
            result = CanonicalizationResult.error(String.format("Query tree exceeds allowed item count. Configured limit: %d - Item count: %d", maxQueryItems, itemCount));
        return result.error().orElse(null); // preserve old API, unfortunately
    }

    /**
     * Canonicalize this query
     * 
     * @param item the item to canonicalize
     * @param parentIterator iterator for the parent of this item, never null
     * @return result of canonicalization
     */
    private static CanonicalizationResult recursivelyCanonicalize(Item item, ListIterator<Item> parentIterator) {
        // children first as they may be removed
        if (item instanceof CompositeItem composite) {
            for (ListIterator<Item> i = composite.getItemIterator(); i.hasNext(); ) {
                CanonicalizationResult childResult = recursivelyCanonicalize(i.next(), i);
                if (childResult.isError()) return childResult;
            }
        }

        return canonicalizeThis(item, parentIterator);
    }
    
    private static CanonicalizationResult canonicalizeThis(Item item, ListIterator<Item> parentIterator) {
        if (item instanceof NullItem) parentIterator.remove();
        if ( ! (item instanceof CompositeItem composite)) return CanonicalizationResult.success();

        boolean replacedByFalse = collapseFalse(composite, parentIterator);
        if (replacedByFalse) return CanonicalizationResult.success();

        collapseLevels(composite);

        if (composite instanceof EquivItem) {
            removeDuplicates((EquivItem) composite);
        }
        if (composite.getItemCount() == 0)
            parentIterator.remove();

        composite.extractSingleChild().ifPresent(parentIterator::set);

        return CanonicalizationResult.success();
    }

    private static void collapseLevels(CompositeItem composite) {
        if (composite instanceof RankItem || composite instanceof NotItem) {
            collapseLevels(composite, composite.getItemIterator()); // collapse the first item only
        }
        else if (composite instanceof AndItem || composite instanceof OrItem || composite instanceof WeakAndItem) {
            for (ListIterator<Item> i = composite.getItemIterator(); i.hasNext(); )
                collapseLevels(composite, i);
        }
    }
    
    /** Collapse the next item of this iterator into the given parent, if appropriate */
    private static void collapseLevels(CompositeItem composite, ListIterator<Item> i) {
        if ( ! i.hasNext()) return;
        Item child = i.next();
        if (child == null) return;
        if (child.getClass() != composite.getClass()) return;
        if (child instanceof WeakAndItem && !equalWeakAndSettings((WeakAndItem)child, (WeakAndItem)composite)) return;
        i.remove();
        moveChildren((CompositeItem) child, i);
    }

    private static boolean equalWeakAndSettings(WeakAndItem a, WeakAndItem b) {
        if ( ! a.getIndexName().equals(b.getIndexName())) return false;
        if (a.getN() != b.getN()) return false;
        return true;
    }

    private static void moveChildren(CompositeItem from, ListIterator<Item> toIterator) {
        for (ListIterator<Item> i = from.getItemIterator(); i.hasNext(); )
            toIterator.add(i.next());
    }

    /** 
     * Handle FALSE items in the immediate children of this
     * 
     * @return true if this composite was replaced by FALSE
     */
    private static boolean collapseFalse(CompositeItem composite, ListIterator<Item> parentIterator) {
        if ( ! containsFalse(composite)) return false;

        if (composite instanceof AndItem) { // AND false is always false
            parentIterator.set(new FalseItem());
            return true;
        }
        else if (composite instanceof OrItem) { // OR false is unnecessary
            removeFalseIn(composite.getItemIterator());
            return false;
        }
        else if (composite instanceof NotItem || composite instanceof RankItem) { // collapse if first, remove otherwise
            ListIterator<Item> i = composite.getItemIterator();
            if (i.next() instanceof FalseItem) {
                parentIterator.set(new FalseItem());
                return true;
            }
            else {
                removeFalseIn(i);
                return false;
            }
        }
        else { // other composites not handled
            return false;
        }
    }
    
    private static boolean containsFalse(CompositeItem composite) {
        for (ListIterator<Item> i = composite.getItemIterator(); i.hasNext(); )
            if (i.next() instanceof FalseItem) return true;
        return false;
    }

    private static void removeFalseIn(ListIterator<Item> iterator) {
        while (iterator.hasNext())
            if (iterator.next() instanceof FalseItem)
                iterator.remove();
    }
    
    private static void removeDuplicates(EquivItem composite) {
        int origSize = composite.getItemCount();
        for (int i = origSize - 1; i >= 1; --i) {
            Item deleteCandidate = composite.getItem(i);
            for (int j = 0; j < i; ++j) {
                Item check = composite.getItem(j);
                if (deleteCandidate.getClass() == check.getClass()) {
                    if (deleteCandidate instanceof PhraseItem phraseDeletionCandidate) {
                        PhraseItem phraseToCheck = (PhraseItem) check;
                        if (phraseDeletionCandidate.getIndexedString().equals(phraseToCheck.getIndexedString())) {
                            composite.removeItem(i);
                            break;
                        }
                    } else if (deleteCandidate instanceof PhraseSegmentItem phraseSegmentDeletionCandidate) {
                        PhraseSegmentItem phraseSegmentToCheck = (PhraseSegmentItem) check;
                        if (phraseSegmentDeletionCandidate.getIndexedString().equals(phraseSegmentToCheck.getIndexedString())) {
                            composite.removeItem(i);
                            break;
                        }
                    } else if (deleteCandidate instanceof BlockItem blockDeletionCandidate) {
                        BlockItem blockToCheck = (BlockItem) check;
                        if (blockDeletionCandidate.stringValue().equals(blockToCheck.stringValue())) {
                            composite.removeItem(i);
                            break;
                        }
                    }
                }
            }
        }
    }

    public static class CanonicalizationResult {

        private final Optional<String> error;

        private CanonicalizationResult(Optional<String> error) {
            this.error = error;
        }
        
        /** Returns the error of this query, or empty if it is a valid query */
        public Optional<String> error() {
            return error;
        }
    
        public static CanonicalizationResult error(String error) {
            return new CanonicalizationResult(Optional.of(error));
        }

        public static CanonicalizationResult success() {
            return new CanonicalizationResult(Optional.empty());
        }
        
        public boolean isError() { return error.isPresent(); }

    }

}