aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java
blob: 355a810f5ff69686a5bd1265d660ef69af435ab8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.processing;

import com.yahoo.config.model.application.provider.BaseDeployLogger;
import com.yahoo.config.model.test.MockApplicationPackage;
import com.yahoo.document.DataType;
import com.yahoo.schema.Index;
import com.yahoo.schema.RankProfileRegistry;
import com.yahoo.schema.Schema;
import com.yahoo.schema.ApplicationBuilder;
import com.yahoo.schema.AbstractSchemaTestCase;
import com.yahoo.schema.document.BooleanIndexDefinition;
import com.yahoo.schema.document.MatchType;
import com.yahoo.schema.document.SDDocumentType;
import com.yahoo.schema.document.SDField;
import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.vespa.documentmodel.SummaryTransform;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
import com.yahoo.vespa.model.container.search.QueryProfiles;
import org.junit.jupiter.api.Test;

import java.util.List;
import java.util.OptionalDouble;
import java.util.OptionalInt;
import java.util.OptionalLong;
import java.util.Set;

import static com.yahoo.schema.processing.AssertIndexingScript.assertIndexing;
import static org.junit.jupiter.api.Assertions.assertEquals;

/**
 * @author Simon Thoresen Hult
 */
public class IndexingScriptRewriterTestCase extends AbstractSchemaTestCase {

    @Test
    void testSetLanguageRewriting() {
        assertIndexingScript("{ input test | set_language; }",
                createField("test", DataType.STRING, "{ set_language }"));
    }

    @Test
    void testSummaryRewriting() {
        assertIndexingScript("{ input test | summary test; }",
                createField("test", DataType.STRING, "{ summary }"));
    }

    @Test
    void testDynamicSummaryRewriting() {
        SDField field = createField("test", DataType.STRING, "{ summary }");
        field.addSummaryField(createDynamicSummaryField(field, "dyn"));
        assertIndexingScript("{ input test | summary test; }", field);
    }

    @Test
    void testSummaryRewritingWithIndexing() {
        assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" | summary test | index test; }",
                createField("test", DataType.STRING, "{ summary | index }"));
    }

    @Test
    void testDynamicAndStaticSummariesRewritingWithIndexing() {
        SDField field = createField("test", DataType.STRING, "{ summary | index }");
        field.addSummaryField(createDynamicSummaryField(field, "dyn"));
        field.addSummaryField(createStaticSummaryField(field, "test"));
        field.addSummaryField(createStaticSummaryField(field, "other"));
        field.addSummaryField(createDynamicSummaryField(field, "dyn2"));
        assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" | " +
                "summary test | index test; }", field);
    }

    @Test
    void testIntSummaryRewriting() {
        assertIndexingScript("{ input test | summary test | attribute test; }",
                createField("test", DataType.INT, "{ summary | index }"));
    }

    @Test
    void testStringAttributeSummaryRewriting() {
        assertIndexingScript("{ input test | summary test | attribute test; }",
                createField("test", DataType.STRING, "{ summary | attribute }"));
    }

    @Test
    void testMultiblockTokenize() {
        SDField field = createField("test", DataType.STRING,
                "{ input test | tokenize | { summary test; }; }");
        assertIndexingScript("{ input test | tokenize | { summary test; }; }", field);
    }

    @Test
    void requireThatOutputDefaultsToCurrentField() {
        assertIndexingScript("{ input test | attribute test; }",
                createField("test", DataType.STRING, "{ attribute; }"));
        assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" | index test; }",
                createField("test", DataType.STRING, "{ index; }"));
        assertIndexingScript("{ input test | summary test; }",
                createField("test", DataType.STRING, "{ summary; }"));
    }

    @Test
    void testTokenizeComparisonDisregardsConfig() {
        assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" | summary test | index test; }",
                createField("test", DataType.STRING, "{ summary | tokenize | index; }"));
    }

    @Test
    void testDerivingFromSimple() throws Exception {
        assertIndexing(List.of("clear_state | guard { input access | attribute access; }",
                        "clear_state | guard { input category | split \";\" | attribute category_arr; }",
                        "clear_state | guard { input category | tokenize | index category; }",
                        "clear_state | guard { input categories_src | lowercase | normalize | tokenize normalize stem:\"BEST\" | index categories; }",
                        "clear_state | guard { input categoriesagain_src | lowercase | normalize | tokenize normalize stem:\"BEST\" | index categoriesagain; }",
                        "clear_state | guard { input chatter | tokenize normalize stem:\"BEST\" | index chatter; }",
                        "clear_state | guard { input description | tokenize normalize stem:\"BEST\" | summary description | index description; }",
                        "clear_state | guard { input exactemento_src | lowercase | tokenize normalize stem:\"BEST\" | index exactemento | summary exactemento; }",
                        "clear_state | guard { input longdesc | summary longdesc; }",
                        "clear_state | guard { input measurement | attribute measurement | summary measurement; }",
                        "clear_state | guard { input measurement | to_array | attribute measurement_arr; }",
                        "clear_state | guard { input popularity | attribute popularity; }",
                        "clear_state | guard { input popularity * input measurement | attribute popsiness; }",
                        "clear_state | guard { input smallattribute | attribute smallattribute; }",
                        "clear_state | guard { input title | tokenize normalize stem:\"BEST\" | summary title | index title; }",
                        "clear_state | guard { input title . \" \" . input category | tokenize | summary exact | index exact; }"),
                ApplicationBuilder.buildFromFile("src/test/examples/simple.sd"));
    }

    @Test
    void testIndexRewrite() throws Exception {
        assertIndexing(
                List.of("clear_state | guard { input title_src | lowercase | normalize | tokenize | index title; }",
                        "clear_state | guard { input title_src | summary title_s; }"),
                ApplicationBuilder.buildFromFile("src/test/examples/indexrewrite.sd"));
    }

    @Test
    void requireThatPredicateFieldsGetOptimization() {
        assertIndexingScript("{ 10 | set_var arity | { input test | optimize_predicate | attribute test; }; }",
                createPredicateField(
                        "test", DataType.PREDICATE, "{ attribute; }", 10, OptionalLong.empty(), OptionalLong.empty()));
        assertIndexingScript("{ 10 | set_var arity | { input test | optimize_predicate | summary test | attribute test; }; }",
                createPredicateField(
                        "test", DataType.PREDICATE, "{ summary | attribute ; }", 10, OptionalLong.empty(), OptionalLong.empty()));
        assertIndexingScript(
                "{ 2 | set_var arity | 0L | set_var lower_bound | 1023L | set_var upper_bound | " +
                        "{ input test | optimize_predicate | attribute test; }; }",
                createPredicateField("test", DataType.PREDICATE, "{ attribute; }", 2, OptionalLong.of(0L), OptionalLong.of(1023L)));
    }

    @Test
    void requireThatMaxTermOccurrencesIsPropagated() {
        var field = new SDField("test", DataType.STRING);
        field.getMatching().maxTermOccurrences(10);
        field.parseIndexingScript("test", "{ summary | index }");
        assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" max-occurrences:10 | summary test | index test; }",
                field);
    }

    @Test
    void requireThatMaxTokenLengthIsPropagated() {
        var field = new SDField("test", DataType.STRING);
        field.getMatching().maxTokenLength(10);
        field.parseIndexingScript("test", "{ summary | index }");
        assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" max-token-length:10 | summary test | index test; }",
                field);
    }

    @Test
    void requireThatMaxTokenLengthIsPropagatedForWordMatch() {
        var field = new SDField("test", DataType.STRING);
        field.getMatching().maxTokenLength(10).setType(MatchType.WORD);
        field.parseIndexingScript("test", "{ summary | index }");
        assertIndexingScript("{ input test | exact max-token-length:10 | summary test | index test; }",
                field);
    }

    private static void assertIndexingScript(String expectedScript, SDField unprocessedField) {
        assertEquals(expectedScript,
                processField(unprocessedField).toString());
    }

    private static ScriptExpression processField(SDField unprocessedField) {
        SDDocumentType sdoc = new SDDocumentType("test");
        sdoc.addField(unprocessedField);
        Schema schema = new Schema("test", MockApplicationPackage.createEmpty());
        schema.addDocument(sdoc);
        new Processing().process(schema, new BaseDeployLogger(), new RankProfileRegistry(),
                                 new QueryProfiles(), true, false, Set.of());
        return unprocessedField.getIndexingScript();
    }

    private static SDField createField(String name, DataType type, String script) {
        SDField field = new SDField(null, name, type);
        field.parseIndexingScript("test", script);
        return field;
    }

    private static SDField createPredicateField(
            String name, DataType type, String script, int arity, OptionalLong lower_bound, OptionalLong upper_bound) {
        SDField field = new SDField(null, name, type);
        field.parseIndexingScript("test", script);
        Index index = new Index("foo");
        index.setBooleanIndexDefiniton(new BooleanIndexDefinition(
                OptionalInt.of(arity), lower_bound, upper_bound, OptionalDouble.empty()));
        field.addIndex(index);
        return field;
    }

    private static SummaryField createDynamicSummaryField(SDField field, String name) {
        return createSummaryField(field, name, true);
    }

    private static SummaryField createStaticSummaryField(SDField field, String name) {
        return createSummaryField(field, name, false);
    }

    private static SummaryField createSummaryField(SDField field, String name, boolean dynamic) {
        SummaryField summaryField = new SummaryField(name, field.getDataType());
        if (dynamic) {
            summaryField.setTransform(SummaryTransform.DYNAMICTEASER);
        }
        summaryField.addDestination("default");
        summaryField.addSource(field.getName());
        return summaryField;
    }

}