aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java
blob: 26107ad2dca6f2862aeb86230909a38cce8ab6ed (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.processing;

import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.schema.RankProfileRegistry;
import com.yahoo.document.DataType;
import com.yahoo.document.datatypes.IntegerFieldValue;
import com.yahoo.document.datatypes.LongFieldValue;
import com.yahoo.schema.Index;
import com.yahoo.schema.Schema;
import com.yahoo.schema.document.Attribute;
import com.yahoo.schema.document.BooleanIndexDefinition;
import com.yahoo.schema.document.SDField;
import com.yahoo.vespa.documentmodel.DocumentSummary;
import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.vespa.documentmodel.SummaryTransform;
import com.yahoo.vespa.indexinglanguage.ExpressionConverter;
import com.yahoo.vespa.indexinglanguage.expressions.ConstantExpression;
import com.yahoo.vespa.indexinglanguage.expressions.Expression;
import com.yahoo.vespa.indexinglanguage.expressions.OptimizePredicateExpression;
import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
import com.yahoo.vespa.indexinglanguage.expressions.SetVarExpression;
import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression;
import com.yahoo.vespa.model.container.search.QueryProfiles;

import static com.yahoo.prelude.fastsearch.VespaBackend.SORTABLE_ATTRIBUTES_SUMMARY_CLASS;

import java.util.ArrayList;
import java.util.List;

/**
 * Validates the predicate fields.
 *
 * @author Lester Solbakken
 */
public class PredicateProcessor extends Processor {

    public PredicateProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) {
        super(schema, deployLogger, rankProfileRegistry, queryProfiles);
    }

    @Override
    public void process(boolean validate, boolean documentsOnly) {
        for (SDField field : schema.allConcreteFields()) {
            if (field.getDataType() == DataType.PREDICATE) {
                if (validate && field.doesIndexing()) {
                    fail(schema, field, "Use 'attribute' instead of 'index'. This will require a refeed if you have upgraded.");
                }
                if (field.doesAttributing()) {
                    Attribute attribute = field.getAttributes().get(field.getName());
                    for (Index index : field.getIndices().values()) {
                        BooleanIndexDefinition booleanDefinition = index.getBooleanIndexDefiniton();
                        if (validate && (booleanDefinition == null ||  ! booleanDefinition.hasArity())) {
                            fail(schema, field, "Missing arity value in predicate field.");
                        }
                        if (validate && (booleanDefinition.getArity() < 2)) {
                            fail(schema, field, "Invalid arity value in predicate field, must be greater than 1.");
                        }
                        double threshold = booleanDefinition.getDensePostingListThreshold();
                        if (validate && (threshold <= 0 || threshold > 1)) {
                            fail(schema, field, "Invalid dense-posting-list-threshold value in predicate field. " +
                                                "Value must be in range (0..1].");
                        }

                        attribute.setArity(booleanDefinition.getArity());
                        attribute.setLowerBound(booleanDefinition.getLowerBound());
                        attribute.setUpperBound(booleanDefinition.getUpperBound());

                        attribute.setDensePostingListThreshold(threshold);
                        addPredicateOptimizationIlScript(field, booleanDefinition);
                    }
                    DocumentSummary summary = schema.getSummariesInThis().get(SORTABLE_ATTRIBUTES_SUMMARY_CLASS);
                    if (summary != null) {
                        summary.remove(attribute.getName());
                    }
                    for (SummaryField summaryField : schema.getSummaryFields(field)) {
                        summaryField.setTransform(SummaryTransform.NONE);
                    }
                }
            } else if (validate && field.getDataType().getPrimitiveType() == DataType.PREDICATE) {
                fail(schema, field, "Collections of predicates are not allowed.");
            } else if (validate && field.getDataType() == DataType.RAW && field.doesIndexing()) {
                fail(schema, field, "Indexing of RAW fields is not supported.");
            } else if (validate) {
                // if field is not a predicate, disallow predicate-related index parameters
                for (Index index : field.getIndices().values()) {
                    if (index.getBooleanIndexDefiniton() != null) {
                        BooleanIndexDefinition def = index.getBooleanIndexDefiniton();
                        if (def.hasArity()) {
                            fail(schema, field, "Arity parameter is used only for predicate type fields.");
                        } else if (def.hasLowerBound() || def.hasUpperBound()) {
                            fail(schema, field, "Parameters lower-bound and upper-bound are used only for predicate type fields.");
                        } else if (def.hasDensePostingListThreshold()) {
                            fail(schema, field, "Parameter dense-posting-list-threshold is used only for predicate type fields.");
                        }
                    }
                }
            }
        }
    }

    private void addPredicateOptimizationIlScript(SDField field, BooleanIndexDefinition booleanIndexDefiniton) {
        Expression script = field.getIndexingScript();
        if (script == null) return;

        script = new StatementExpression(makeSetPredicateVariablesScript(booleanIndexDefiniton), script);

        ExpressionConverter converter = new PredicateOutputTransformer(schema);
        field.setIndexingScript(new ScriptExpression((StatementExpression)converter.convert(script)));
    }

    private Expression makeSetPredicateVariablesScript(BooleanIndexDefinition options) {
        List<Expression> expressions = new ArrayList<>();
        expressions.add(new ConstantExpression(new IntegerFieldValue(options.getArity())));
        expressions.add(new SetVarExpression("arity"));
        if (options.hasLowerBound()) {
            expressions.add(new ConstantExpression(new LongFieldValue(options.getLowerBound())));
            expressions.add(new SetVarExpression("lower_bound"));
        }
        if (options.hasUpperBound()) {
            expressions.add(new ConstantExpression(new LongFieldValue(options.getUpperBound())));
            expressions.add(new SetVarExpression("upper_bound"));
        }
        return new StatementExpression(expressions);
    }

    private static class PredicateOutputTransformer extends TypedTransformProvider {

        PredicateOutputTransformer(Schema schema) {
            super(OptimizePredicateExpression.class, schema);
        }

        @Override
        protected boolean requiresTransform(Expression exp, DataType fieldType) {
            return exp instanceof OutputExpression && fieldType == DataType.PREDICATE;
        }

        @Override
        protected Expression newTransform(DataType fieldType) {
            return new OptimizePredicateExpression();
        }

    }

}