aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java
blob: be129b53c5ecbf9524820107e31edf92bef053c6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.processing;

import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.schema.RankProfileRegistry;
import com.yahoo.document.DataType;
import com.yahoo.document.Field;
import com.yahoo.schema.Schema;
import com.yahoo.schema.document.SDField;
import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.vespa.documentmodel.SummaryTransform;
import com.yahoo.vespa.indexinglanguage.ExpressionConverter;
import com.yahoo.vespa.indexinglanguage.expressions.*;
import com.yahoo.vespa.model.container.search.QueryProfiles;

import java.util.*;

/**
 * This processor modifies all indexing scripts so that they output to the owning field by default. It also prevents
 * any output expression from writing to any field except for the owning field. Finally, for <code>SummaryExpression</code>,
 * this processor expands to write all appropriate summary fields.
 *
 * @author Simon Thoresen Hult
 */
public class IndexingOutputs extends Processor {

    public IndexingOutputs(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) {
        super(schema, deployLogger, rankProfileRegistry, queryProfiles);
    }

    @Override
    public void process(boolean validate, boolean documentsOnly) {
        for (SDField field : schema.allConcreteFields()) {
            ScriptExpression script = field.getIndexingScript();
            if (script == null) continue;

            Set<String> summaryFields = new TreeSet<>();
            findSummaryTo(schema, field, summaryFields, summaryFields);
            MyConverter converter = new MyConverter(schema, field, summaryFields, validate);
            field.setIndexingScript(schema.getName(), (ScriptExpression)converter.convert(script));
        }
    }

    public void findSummaryTo(Schema schema, SDField field, Set<String> dynamicSummary, Set<String> staticSummary) {
        var summaryFields = schema.getSummaryFields(field);
        if (summaryFields.isEmpty()) {
            fillSummaryToFromField(field, dynamicSummary, staticSummary);
        } else {
            fillSummaryToFromSearch(schema, field, summaryFields, dynamicSummary, staticSummary);
        }
    }

    private void fillSummaryToFromSearch(Schema schema, SDField field, List<SummaryField> summaryFields,
                                         Set<String> dynamicSummary, Set<String> staticSummary) {
        for (SummaryField summaryField : summaryFields) {
            fillSummaryToFromSummaryField(schema, field, summaryField, dynamicSummary, staticSummary);
        }
    }

    private void fillSummaryToFromSummaryField(Schema schema, SDField field, SummaryField summaryField,
                                               Set<String> dynamicSummary, Set<String> staticSummary) {
        SummaryTransform summaryTransform = summaryField.getTransform();
        String summaryName = summaryField.getName();
        if (summaryTransform.isDynamic() && summaryField.getSourceCount() > 2) {
            // Avoid writing to summary fields that have more than a single input field, as that is handled by the
            // summary rewriter in the search core.
            return;
        }
        if (summaryTransform.isDynamic()) {
            DataType fieldType = field.getDataType();
            if (!DynamicSummaryTransformUtils.summaryFieldIsPopulatedBySourceField(fieldType)) {
                if (!DynamicSummaryTransformUtils.isSupportedType(fieldType)) {
                    warn(schema, field, "Dynamic summaries are only supported for fields of type " +
                            "string and array<string>, ignoring summary field '" + summaryField.getName() +
                            "' for sd field '" + field.getName() + "' of type " +
                            fieldType.getName() + ".");
                }
                return;
            }
            dynamicSummary.add(summaryName);
        } else if (summaryTransform != SummaryTransform.ATTRIBUTE &&
                summaryTransform != SummaryTransform.TOKENS &&
                summaryTransform != SummaryTransform.ATTRIBUTE_TOKENS) {
            staticSummary.add(summaryName);
        }
    }

    private static void fillSummaryToFromField(SDField field, Set<String> dynamicSummary, Set<String> staticSummary) {
        for (SummaryField summaryField : field.getSummaryFields().values()) {
            String summaryName = summaryField.getName();
            if (summaryField.getTransform().isDynamic()) {
                dynamicSummary.add(summaryName);
            } else {
                staticSummary.add(summaryName);
            }
        }
    }

    private class MyConverter extends ExpressionConverter {

        final Schema schema;
        final Field field;
        final Set<String> summaryFields;
        final boolean validate;

        MyConverter(Schema schema, Field field, Set<String> summaryFields, boolean validate) {
            this.schema = schema;
            this.field = field;
            this.summaryFields = summaryFields.isEmpty() ? Collections.singleton(field.getName()) : summaryFields;
            this.validate = validate;
        }

        @Override
        protected boolean shouldConvert(Expression exp) {
            if ( ! (exp instanceof OutputExpression)) {
                return false;
            }
            String fieldName = ((OutputExpression)exp).getFieldName();
            if (fieldName == null) {
                return true; // inject appropriate field name
            }
            if ( validate && ! fieldName.equals(field.getName())) {
                fail(schema, field, "Indexing expression '" + exp + "' attempts to write to a field other than '" +
                                    field.getName() + "'.");
            }
            return false;
        }

        @Override
        protected Expression doConvert(Expression exp) {
            List<Expression> ret = new LinkedList<>();
            if (exp instanceof AttributeExpression) {
                ret.add(new AttributeExpression(field.getName()));
            } else if (exp instanceof IndexExpression) {
                ret.add(new IndexExpression(field.getName()));
            } else if (exp instanceof SummaryExpression) {
                for (String fieldName : summaryFields) {
                    ret.add(new SummaryExpression(fieldName));
                }
                /*
                 * Write to summary field source. AddExtraFieldsToDocument processor adds the "copy"
                 * summary transform to summary fields without a corresponding explicitly declared
                 * document field (2023-11-01). Future vespa versions will stop adding document
                 * fields for those summary fields.
                 */
                if (!summaryFields.contains(field.getName())) {
                    ret.add(new SummaryExpression(field.getName()));
                }
            } else {
                throw new UnsupportedOperationException(exp.getClass().getName());
            }
            return new StatementExpression(ret);
        }

    }

}