aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/search/schema/SchemaInfo.java
blob: 2d1d391640fc79a1ff8c4cf7c1d487cba76dffad (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.search.schema;

import com.yahoo.api.annotations.Beta;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.search.Query;
import com.yahoo.search.config.IndexInfoConfig;
import com.yahoo.search.config.SchemaInfoConfig;
import com.yahoo.tensor.TensorType;

import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Information about all the schemas configured in the application this container is a part of.
 *
 * Usage:
 * <code>
 *   SchemaInfo.Session session = schemaInfo.newSession(query); // once when starting to process a query
 *   session.get(...) // access information about the schema(s) relevant to the query
 * </code>
 *
 * This is immutable.
 *
 * @author bratseth
 */
// NOTES:
// This should replace IndexFacts, and probably DocumentDatabase.
// It replicates the schema resolution mechanism in IndexFacts, but does not yet contain any field information.
// To replace IndexFacts, this must accept IndexInfo and expose that information, as well as consolidation
// given a set of possible schemas: The session mechanism is present here to make that efficient when added
// (resolving schema subsets for every field lookup is too expensive).
@Beta
public class SchemaInfo {

    private static final SchemaInfo empty = new SchemaInfo(List.of(), Map.of());

    private final List<Schema> schemas;

    /** The schemas contained in each content cluster indexed by cluster name */
    private final Map<String, List<String>> clusters;

    public SchemaInfo(IndexInfoConfig indexInfo, // will be used in the future
                      SchemaInfoConfig schemaInfoConfig,
                      QrSearchersConfig qrSearchersConfig) {
        this(SchemaInfoConfigurer.toSchemas(schemaInfoConfig), SchemaInfoConfigurer.toClusters(qrSearchersConfig));
    }

    public SchemaInfo(List<Schema> schemas, Map<String, List<String>> clusters) {
        this.schemas = List.copyOf(schemas);
        this.clusters = Map.copyOf(clusters);
    }

    public Session newSession(Query query) {
        return new Session(query.getModel().getSources(), query.getModel().getRestrict(), clusters, schemas);
    }

    public static SchemaInfo empty() { return empty; }

    @Override
    public boolean equals(Object o) {
        if (o == this) return true;
        if ( ! (o instanceof SchemaInfo)) return false;
        SchemaInfo other = (SchemaInfo)o;
        if ( ! other.schemas.equals(this.schemas)) return false;
        if ( ! other.clusters.equals(this.clusters)) return false;
        return true;
    }

    @Override
    public int hashCode() { return Objects.hash(schemas, clusters); }

    /** The schema information resolved to be relevant to this session. */
    public static class Session {

        private final List<Schema> schemas;

        private Session(Set<String> sources,
                        Set<String> restrict,
                        Map<String, List<String>> clusters,
                        List<Schema> candidates) {
            this.schemas = resolveSchemas(sources, restrict, clusters, candidates);
        }

        /**
         * Given a search list which is a mixture of schemas and cluster
         * names, and a restrict list which is a list of schemas, return a
         * set of all valid schemas for this combination.
         *
         * @return the possibly empty list of schemas matching the arguments
         */
        private static List<Schema> resolveSchemas(Set<String> sources,
                                                   Set<String> restrict,
                                                   Map<String, List<String>> clusters,
                                                   List<Schema> candidates) {
            if (sources.isEmpty())
                return restrict.isEmpty() ? candidates : keep(restrict, candidates);

            Set<String> schemaNames = new HashSet<>();
            for (String source : sources) {
                if (clusters.containsKey(source)) // source is a cluster
                    schemaNames.addAll(clusters.get(source));
                else // source is a schema
                    schemaNames.add(source);
            }
            candidates = keep(schemaNames, candidates);
            return restrict.isEmpty() ? candidates : keep(restrict, candidates);
        }

        private static List<Schema> keep(Set<String> names, List<Schema> schemas) {
            return schemas.stream().filter(schema -> names.contains(schema.name())).collect(Collectors.toList());
        }

        /**
         * Returns the type of the given rank feature name in the given profile,
         * if it can be uniquely determined.
         *
         * @param rankFeature the rank feature name, a string on the form "query(name)"
         * @param rankProfile the name of the rank profile in which to locate the input declaration
         * @return the type of the declared input, or null if it is not declared or the rank profile is not found
         * @throws IllegalArgumentException if the feature is declared in this rank profile in multiple schemas
         *         of this session with conflicting types
         */
        public TensorType rankProfileInput(String rankFeature, String rankProfile) {
            TensorType foundType = null;
            Schema declaringSchema = null;
            for (Schema schema : schemas) {
                RankProfile profile = schema.rankProfiles().get(rankProfile);
                if (profile == null) continue;
                TensorType newlyFoundType = profile.inputs().get(rankFeature);
                if (newlyFoundType == null) continue;
                if (foundType != null && ! newlyFoundType.equals(foundType))
                    throw new IllegalArgumentException("Conflicting input type declarations for '" + rankFeature + "': " +
                                                       "Declared as " + foundType + " in " + profile + " in " + declaringSchema +
                                                       ", and as " + newlyFoundType + " in " + profile + " in " + schema);
                foundType = newlyFoundType;
                declaringSchema = schema;
            }
            return foundType;
        }

    }

}