summaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/IndexFacts.java
blob: c9a855c2f3487ecd653ca62309f2e94aacd605d1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude;

import com.google.common.collect.ImmutableList;
import com.yahoo.search.Query;

import java.util.*;

import static com.yahoo.text.Lowercase.toLowerCase;

/**
 * A central repository for information about indices. Standard usage is
 *
 * <pre><code>
 * IndexFacts.Session session = indexFacts.newSession(query); // once when starting to process a query
 * session.getIndex(indexName).[get index info]
 * </code></pre>
 *
 * @author Steinar Knutsen
 */
// TODO: We should replace this with a better representation of search definitions
//       which is immutable, models clusters and search definitions inside clusters properly,
//       and uses better names. -bratseth
public class IndexFacts {

    private Map<String, List<String>> clusterByDocument;

    private static class DocumentTypeListOffset {
        public final int offset;
        public final SearchDefinition searchDefinition;

        public DocumentTypeListOffset(int offset, SearchDefinition searchDefinition) {
            this.offset = offset;
            this.searchDefinition = searchDefinition;
        }
    }

    /** A Map of all known search definitions indexed by name */
    private Map<String, SearchDefinition> searchDefinitions = new LinkedHashMap<>();

    /** A map of document types contained in each cluster indexed by cluster name */
    private Map<String, List<String>> clusters = new LinkedHashMap<>();

    /**
     * The name of the default search definition, which is the union of all
     * known document types.
     */
    static final String unionName = "unionOfAllKnown";

    /** A search definition which contains the union of all settings. */
    private SearchDefinition unionSearchDefinition = new SearchDefinition(unionName);

    private boolean frozen;

    /** Whether this has (any) NGram indexes. Calculated at freeze time. */
    private boolean hasNGramIndices;

    public IndexFacts() {}

    @SuppressWarnings({"deprecation"})
    public IndexFacts(IndexModel indexModel) {
        if (indexModel.getSearchDefinitions() != null) {
            this.searchDefinitions = indexModel.getSearchDefinitions();
            this.unionSearchDefinition = indexModel.getUnionSearchDefinition();
        }
        if (indexModel.getMasterClusters() != null) {
            setMasterClusters(indexModel.getMasterClusters());
        }
    }

    private void setMasterClusters(Map<String, List<String>> clusters) {
        // TODO: clusters should probably be a separate class
        this.clusters = clusters;
        clusterByDocument = invert(clusters);
    }

    private static Map<String, List<String>> invert(Map<String, List<String>> clusters) {
        Map<String, List<String>> result = new HashMap<>();
        for (Map.Entry<String,List<String>> entry : clusters.entrySet()) {
            for (String value : entry.getValue()) {
                addEntry(result, value, entry.getKey());
            }
        }
        return result;
    }

    private static void addEntry(Map<String, List<String>> result, String key, String value) {
        List<String> values = result.get(key);
        if (values == null) {
            values = new ArrayList<>();
            result.put(key, values);
        }
        values.add(value);
    }

    // Assumes that document names are equal to the search definition that contain them.
    public List<String> clustersHavingSearchDefinition(String searchDefinitionName) {
        if (clusterByDocument == null) return Collections.emptyList();

        List<String> clusters = clusterByDocument.get(searchDefinitionName);
        return clusters != null ? clusters : Collections.<String>emptyList();
    }

    private boolean isInitialized() {
        return searchDefinitions.size() > 0;
    }

    private boolean isIndexFromDocumentTypes(String indexName, List<String> documentTypes) {
        if ( ! isInitialized()) return true;

        if (documentTypes.isEmpty()) {
            return unionSearchDefinition.getIndex(indexName) != null;
        }

        DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
        while (sd != null) {
            Index index = sd.searchDefinition.getIndex(indexName);
            if (index != null) {
                return true;
            }
            sd = chooseSearchDefinition(documentTypes, sd.offset);
        }

        return false;
    }

    private String getCanonicNameFromDocumentTypes(String indexName, List<String> documentTypes) {
        if (!isInitialized()) return indexName;

        if (documentTypes.isEmpty()) {
            Index index = unionSearchDefinition.getIndexByLowerCase(toLowerCase(indexName));
            return index == null ? indexName : index.getName();
        }
        DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);
        while (sd != null) {
            Index index = sd.searchDefinition.getIndexByLowerCase(toLowerCase(indexName));
            if (index != null) return index.getName();
            sd = chooseSearchDefinition(documentTypes, sd.offset);
        }
        return indexName;
    }

    private Index getIndexFromDocumentTypes(String indexName, List<String> documentTypes) {
        if (indexName == null || indexName.isEmpty())
            indexName = "default";

        return getIndexByCanonicNameFromDocumentTypes(indexName, documentTypes);
    }

    private Index getIndexByCanonicNameFromDocumentTypes(String canonicName, List<String> documentTypes) {
        if ( ! isInitialized()) return Index.nullIndex;

        if (documentTypes.isEmpty()) {
            Index index = unionSearchDefinition.getIndex(canonicName);
            if (index == null) return Index.nullIndex;
            return index;
        }

        DocumentTypeListOffset sd = chooseSearchDefinition(documentTypes, 0);

        while (sd != null) {
            Index index = sd.searchDefinition.getIndex(canonicName);

            if (index != null) return index;
            sd = chooseSearchDefinition(documentTypes, sd.offset);
        }
        return Index.nullIndex;
    }

    private Collection<Index> getIndexes(String documentType) {
        if ( ! isInitialized()) return Collections.emptyList();
        SearchDefinition sd = searchDefinitions.get(documentType);
        if (sd == null) return Collections.emptyList();
        return sd.indices().values();
    }

    /** Calls resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict()) */
    private Set<String> resolveDocumentTypes(Query query) {
        // Assumption: Search definition name equals document name.
        return resolveDocumentTypes(query.getModel().getSources(), query.getModel().getRestrict(),
                                    searchDefinitions.keySet());
    }

    /**
     * Given a search list which is a mixture of document types and cluster
     * names, and a restrict list which is a list of document types, return a
     * set of all valid document types for this combination. Most use-cases for
     * fetching index settings will involve calling this method with the the
     * incoming query's {@link com.yahoo.search.query.Model#getSources()} and
     * {@link com.yahoo.search.query.Model#getRestrict()} as input parameters
     * before calling any other method of this class.
     *
     * @param sources the search list for a query
     * @param restrict the restrict list for a query
     * @return a (possibly empty) set of valid document types
     */
    private Set<String> resolveDocumentTypes(Collection<String> sources, Collection<String> restrict,
                                             Set<String> candidateDocumentTypes) {
        sources = emptyCollectionIfNull(sources);
        restrict = emptyCollectionIfNull(restrict);

        if (sources.isEmpty()) {
            if ( ! restrict.isEmpty()) {
                return new TreeSet<>(restrict);
            } else {
                return candidateDocumentTypes;
            }
        }

        Set<String> toSearch = new TreeSet<>();
        for (String source : sources) { // source: a document type or a cluster containing them
            List<String> clusterDocTypes = clusters.get(source);
            if (clusterDocTypes == null) { // source was a document type
                if (candidateDocumentTypes.contains(source)) {
                    toSearch.add(source);
                }
            } else { // source was a cluster, having document types
                for (String documentType : clusterDocTypes) {
                    if (candidateDocumentTypes.contains(documentType)) {
                        toSearch.add(documentType);
                    }
                }
            }
        }

        if ( ! restrict.isEmpty()) {
            toSearch.retainAll(restrict);
        }

        return toSearch;
    }

    private Collection<String> emptyCollectionIfNull(Collection<String> collection) {
        return collection == null ? Collections.<String>emptyList() : collection;
    }

    /**
     * Chooses the correct search definition, default if in doubt.
     *
     * @return the search definition to use
     */
    private DocumentTypeListOffset chooseSearchDefinition(List<String> documentTypes, int index) {
        while (index < documentTypes.size()) {
            String docName = documentTypes.get(index++);
            SearchDefinition sd = searchDefinitions.get(docName);
            if (sd != null) {
                return new DocumentTypeListOffset(index, sd);
            }
        }
        return null;
    }

    /**
     * Freeze this to prevent further changes.
     *
     * @return this for chaining
     */
    public IndexFacts freeze() {
        hasNGramIndices = hasNGramIndices();
        // TODO: Freeze content!
        frozen = true;
        return this;
    }

    /** Whether this contains any index which has isNGram()==true. This is free to ask on a frozen instance. */
    public boolean hasNGramIndices() {
        if (frozen) return hasNGramIndices;
        for (Map.Entry<String,SearchDefinition> searchDefinition : searchDefinitions.entrySet()) {
            for (Index index : searchDefinition.getValue().indices().values())
                if (index.isNGram()) return true;
        }
        return false;
    }

    /** Returns whether it is permissible to update this object */
    public boolean isFrozen() {
        return frozen;
    }

    private void ensureNotFrozen() {
        if (frozen) throw new IllegalStateException("Tried to modify frozen IndexFacts instance.");
    }

    public String getDefaultPosition(String sdName) {
        SearchDefinition sd;
        if (sdName == null) {
            sd = unionSearchDefinition;
        } else if (searchDefinitions.containsKey(sdName)) {
            sd = searchDefinitions.get(sdName);
        } else {
            return null;
        }

        return sd.getDefaultPosition();
    }

    public Session newSession(Query query) {
        return new Session(query);
    }

    public Session newSession(Collection<String> sources, Collection<String> restrict) {
        return new Session(sources, restrict);
    }

    public Session newSession(Collection<String> sources,
                              Collection<String> restrict,
                              Set<String> candidateDocumentTypes) {
        return new Session(sources, restrict, candidateDocumentTypes);
    }

    /**
     * Create an instance of this to look up index facts with a given query.
     * Note that if the model.source or model.restrict parameters of the query
     * is changed another session should be created. This is immutable.
     */
    public class Session {

        private final List<String> documentTypes;

        private Session(Query query) {
            documentTypes = ImmutableList.copyOf(resolveDocumentTypes(query));
        }

        private Session(Collection<String> sources, Collection<String> restrict) {
            // Assumption: Search definition name equals document name.
            documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, searchDefinitions.keySet()));
        }

        private Session(Collection<String> sources, Collection<String> restrict, Set<String> candidateDocumentTypes) {
            documentTypes = List.copyOf(resolveDocumentTypes(sources, restrict, candidateDocumentTypes));
        }

        /**
         * Returns the index for this name.
         *
         * @param indexName the name of the index. If this is null or empty the index named "default" is returned
         * @return the index best matching the input parameters or the null Index (never null) if none is found
         */
        public Index getIndex(String indexName) {
            return IndexFacts.this.getIndexFromDocumentTypes(indexName, documentTypes);
        }

        /** Returns an index given from a given search definition */
        // Note: This does not take the context into account currently.
        // Ideally, we should be able to resolve the right search definition name
        // in the context of the searched clusters, but this cannot be modelled
        // currently by the flat structure in IndexFacts.
        // That can be fixed without changing this API.
        public Index getIndex(String indexName, String documentType) {
            return IndexFacts.this.getIndexFromDocumentTypes(indexName, Collections.singletonList(documentType));
        }

        /** Returns all the indexes of a given search definition */
        public Collection<Index> getIndexes(String documentType) {
            return IndexFacts.this.getIndexes(documentType);
        }

        /**
         * Returns the canonical form of the index name (Which may be the same as
         * the input).
         *
         * @param indexName index name or alias
         */
        public String getCanonicName(String indexName) {
            return IndexFacts.this.getCanonicNameFromDocumentTypes(indexName, documentTypes);
        }

        /**
         * Returns whether the given name is an index.
         *
         * @param indexName index name candidate
         */
        public boolean isIndex(String indexName) {
            return IndexFacts.this.isIndexFromDocumentTypes(indexName, documentTypes);
        }

        /** Returns an immutable list of the document types this has resolved to */
        public List<String> documentTypes() { return documentTypes; }

        @Override
        public String toString() {
            return "index facts for search definitions " + documentTypes;
        }

    }

}