summaryrefslogtreecommitdiffstats
path: root/model-integration/src/main/java/ai/vespa/rankingexpression/importer/ImportedModel.java
blob: 0c5866b87fa6ee24feeec4fdf623b384af5a9ccb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package ai.vespa.rankingexpression.importer;

import com.google.common.collect.ImmutableMap;
import ai.vespa.rankingexpression.importer.configmodelview.ImportedMlFunction;
import ai.vespa.rankingexpression.importer.configmodelview.ImportedMlModel;
import com.yahoo.searchlib.rankingexpression.ExpressionFunction;
import com.yahoo.searchlib.rankingexpression.RankingExpression;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Pattern;

/**
 * The result of importing an ML model into Vespa.
 *
 * @author bratseth
 */
public class ImportedModel implements ImportedMlModel {

    private static final String defaultSignatureName = "default";

    private static final Pattern nameRegexp = Pattern.compile("[A-Za-z0-9_]*");
    private final String name;
    private final String source;

    private final Map<String, Signature> signatures = new HashMap<>();
    private final Map<String, TensorType> inputs = new HashMap<>();
    private final Map<String, Tensor> smallConstants = new HashMap<>();
    private final Map<String, Tensor> largeConstants = new HashMap<>();
    private final Map<String, RankingExpression> expressions = new HashMap<>();
    private final Map<String, RankingExpression> functions = new HashMap<>();

    /**
     * Creates a new imported model.
     *
     * @param name the name of this mode, containing only characters in [A-Za-z0-9_]
     * @param source the source path (directory or file) of this model
     */
    public ImportedModel(String name, String source) {
        if ( ! nameRegexp.matcher(name).matches())
            throw new IllegalArgumentException("An imported model name can only contain [A-Za-z0-9_], but is '" + name + "'");
        this.name = name;
        this.source = source;
    }

    /** Returns the name of this model, which can only contain the characters in [A-Za-z0-9_] */
    @Override
    public String name() { return name; }

    /** Returns the source path (directory or file) of this model */
    @Override
    public String source() { return source; }

    /** Returns an immutable map of the inputs of this */
    public Map<String, TensorType> inputs() { return Collections.unmodifiableMap(inputs); }

    @Override
    public Optional<String> inputTypeSpec(String input) {
        return Optional.ofNullable(inputs.get(input)).map(TensorType::toString);
    }

    /**
     * Returns an immutable map of the small constants of this, represented as strings on the standard tensor form.
     * These should have sizes up to a few kb at most, and correspond to constant values given in the source model.
     */
    @Override
    public Map<String, String> smallConstants() { return asTensorStrings(smallConstants); }

    boolean hasSmallConstant(String name) { return smallConstants.containsKey(name); }

    /**
     * Returns an immutable map of the large constants of this.
     * These can have sizes in gigabytes and must be distributed to nodes separately from configuration.
     * For TensorFlow this corresponds to Variable files stored separately.
     */
    @Override
    public Map<String, String> largeConstants() { return asTensorStrings(largeConstants); }

    boolean hasLargeConstant(String name) { return largeConstants.containsKey(name); }

    /**
     * Returns an immutable map of the expressions of this - corresponding to graph nodes
     * which are not Inputs/Placeholders or Variables (which instead become respectively inputs and constants).
     * Note that only nodes recursively referenced by a placeholder/input are added.
     */
    public Map<String, RankingExpression> expressions() { return Collections.unmodifiableMap(expressions); }

    // TODO: Most of the usage of the above can be replaced by a faster expressionNames method

    /**
     * Returns an immutable map of the functions that are part of this model.
     * Note that the functions themselves are *not* copies and *not* immutable - they must be copied before modification.
     */
    @Override
    public Map<String, String> functions() { return asExpressionStrings(functions); }

    /** Returns an immutable map of the signatures of this */
    public Map<String, Signature> signatures() { return Collections.unmodifiableMap(signatures); }

    /** Returns the given signature. If it does not already exist it is added to this. */
    public Signature signature(String name) {
        return signatures.computeIfAbsent(name, Signature::new);
    }

    /** Convenience method for returning a default signature */
    public Signature defaultSignature() { return signature(defaultSignatureName); }

    public void input(String name, TensorType argumentType) { inputs.put(name, argumentType); }
    public void smallConstant(String name, Tensor constant) { smallConstants.put(name, constant); }
    public void largeConstant(String name, Tensor constant) { largeConstants.put(name, constant); }
    public void expression(String name, RankingExpression expression) { expressions.put(name, expression); }
    public void function(String name, RankingExpression expression) { functions.put(name, expression); }

    /**
     * Returns all the output expressions of this indexed by name. The names consist of one or two parts
     * separated by dot, where the first part is the signature name
     * if signatures are used, or the expression name if signatures are not used and there are multiple
     * expressions, and the second is the output name if signature names are used.
     */
    @Override
    public List<ImportedMlFunction> outputExpressions() {
        List<ImportedMlFunction> functions = new ArrayList<>();
        for (Map.Entry<String, Signature> signatureEntry : signatures().entrySet()) {
            for (Map.Entry<String, String> outputEntry : signatureEntry.getValue().outputs().entrySet())
                functions.add(signatureEntry.getValue().outputFunction(outputEntry.getKey(),
                                                                       signatureEntry.getKey() + "." + outputEntry.getKey()));
            if (signatureEntry.getValue().outputs().isEmpty()) // fallback: Signature without outputs
                functions.add(new ImportedMlFunction(signatureEntry.getKey(),
                                                     new ArrayList<>(signatureEntry.getValue().inputs().values()),
                                                     expressions().get(signatureEntry.getKey()).getRoot().toString(),
                                                     asTensorTypeStrings(signatureEntry.getValue().inputMap()),
                                                     Optional.empty()));
        }
        if (signatures().isEmpty()) { // fallback for models without signatures
            if (expressions().size() == 1) {
                Map.Entry<String, RankingExpression> singleEntry = this.expressions.entrySet().iterator().next();
                functions.add(new ImportedMlFunction(singleEntry.getKey(),
                                                     new ArrayList<>(inputs.keySet()),
                                                     singleEntry.getValue().getRoot().toString(),
                                                     asTensorTypeStrings(inputs),
                                                     Optional.empty()));
            }
            else {
                for (Map.Entry<String, RankingExpression> expressionEntry : expressions().entrySet()) {
                    functions.add(new ImportedMlFunction(expressionEntry.getKey(),
                                                         new ArrayList<>(inputs.keySet()),
                                                         expressionEntry.getValue().getRoot().toString(),
                                                         asTensorTypeStrings(inputs),
                                                         Optional.empty()));
                }
            }
        }
        return functions;
    }

    private Map<String, String> asTensorStrings(Map<String, Tensor> map) {
        HashMap<String, String> values = new HashMap<>();
        for (Map.Entry<String, Tensor> entry : map.entrySet()) {
            Tensor tensor = entry.getValue();
            // TODO: See Tensor.toStandardString
            if (tensor.isEmpty() && ! tensor.type().dimensions().isEmpty())
                values.put(entry.getKey(), tensor.toString());
            else
                values.put(entry.getKey(), tensor.type() + ":" + tensor);
        }
        return values;
    }

    private static Map<String, String> asTensorTypeStrings(Map<String, TensorType> map) {
        Map<String, String> stringMap = new HashMap<>();
        for (Map.Entry<String, TensorType> entry : map.entrySet())
            stringMap.put(entry.getKey(), entry.getValue().toString());
        return stringMap;
    }

    private Map<String, String> asExpressionStrings(Map<String, RankingExpression> map) {
        HashMap<String, String> values = new HashMap<>();
        for (Map.Entry<String, RankingExpression> entry : map.entrySet())
            values.put(entry.getKey(), entry.getValue().getRoot().toString());
        return values;
    }

    /**
     * A signature is a set of named inputs and outputs, where the inputs maps to input
     * ("placeholder") names+types, and outputs maps to expressions nodes.
     * Note that TensorFlow supports multiple signatures in their format, but ONNX has no explicit
     * concept of signatures. For now, we handle ONNX models as having a single signature.
     */
    public class Signature {

        private final String name;
        private final Map<String, String> inputs = new LinkedHashMap<>();
        private final Map<String, String> outputs = new LinkedHashMap<>();
        private final Map<String, String> skippedOutputs = new HashMap<>();
        private final List<String> importWarnings = new ArrayList<>();

        Signature(String name) {
            this.name = name;
        }

        public String name() { return name; }

        /** Returns the result this is part of */
        ImportedModel owner() { return ImportedModel.this; }

        /**
         * Returns an immutable map of the inputs (evaluation context) of this. This is a map from input name
         * in this signature to input name in the owning model
         */
        public Map<String, String> inputs() { return Collections.unmodifiableMap(inputs); }

        /** Returns the name and type of all inputs in this signature as an immutable map */
        Map<String, TensorType> inputMap() {
            ImmutableMap.Builder<String, TensorType> inputs = new ImmutableMap.Builder<>();
            // Note: We're naming inputs by their actual name (used in the expression, given by what the input maps *to*
            // in the model, as these are the names which must actually be bound, if we are to avoid creating an
            // "input mapping" to accomodate this complexity in
            for (Map.Entry<String, String> inputEntry : inputs().entrySet())
                inputs.put(inputEntry.getValue(), owner().inputs().get(inputEntry.getValue()));
            return inputs.build();
        }

        /** Returns the type of the input this input references */
        public TensorType inputArgument(String inputName) { return owner().inputs().get(inputs.get(inputName)); }

        /** Returns an immutable list of the expression names of this */
        public Map<String, String> outputs() { return Collections.unmodifiableMap(outputs); }

        /**
         * Returns an immutable list of the outputs of this which could not be imported,
         * with a string detailing the reason for each
         */
        public Map<String, String> skippedOutputs() { return Collections.unmodifiableMap(skippedOutputs); }

        /**
         * Returns an immutable list of possibly non-fatal warnings encountered during import.
         */
        public List<String> importWarnings() { return Collections.unmodifiableList(importWarnings); }

        /** Returns the expression this output references */
        public ExpressionFunction outputExpression(String outputName) {
            return new ExpressionFunction(outputName,
                                          new ArrayList<>(inputs.values()),
                                          owner().expressions().get(outputs.get(outputName)),
                                          inputMap(),
                                          Optional.empty());
        }

        /** Returns the expression this output references as an imported function */
        public ImportedMlFunction outputFunction(String outputName, String functionName) {
            return new ImportedMlFunction(functionName,
                                          new ArrayList<>(inputs.values()),
                                          owner().expressions().get(outputs.get(outputName)).getRoot().toString(),
                                          asTensorTypeStrings(inputMap()),
                                          Optional.empty());
        }

        @Override
        public String toString() { return "signature '" + name + "'"; }

        void input(String inputName, String argumentName) { inputs.put(inputName, argumentName); }
        void output(String name, String expressionName) { outputs.put(name, expressionName); }
        void skippedOutput(String name, String reason) { skippedOutputs.put(name, reason); }
        void importWarning(String warning) { importWarnings.add(warning); }

    }

}