summaryrefslogtreecommitdiffstats
path: root/config-model
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-13 01:02:48 +0100
committerGitHub <noreply@github.com>2024-01-13 01:02:48 +0100
commita393f0a47e0317bf58f82a1a5158bd3e75499904 (patch)
tree888084f76308f40227fd83b94ec8727a463670c1 /config-model
parenta0806fb2ab42a8b774298cd993b50d72e3cab345 (diff)
parent77d69db1e2c28da264997e8b7271a1b0c0a2afb7 (diff)
Merge pull request #29886 from vespa-engine/revert-29885-revert-29883-balder/drop-tokenize-for-streaming
Revert "Revert "Drop tokenize expressions from ilscript for streaming mode.""
Diffstat (limited to 'config-model')
-rw-r--r--config-model/src/main/java/com/yahoo/schema/Schema.java1
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java2
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java36
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java6
-rw-r--r--config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java2
-rw-r--r--config-model/src/test/java/com/yahoo/schema/derived/IndexInfoTestCase.java2
-rw-r--r--config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java43
-rw-r--r--config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java90
-rw-r--r--config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java2
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java3
10 files changed, 127 insertions, 60 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/Schema.java b/config-model/src/main/java/com/yahoo/schema/Schema.java
index 279b5729ea1..3402ba31be9 100644
--- a/config-model/src/main/java/com/yahoo/schema/Schema.java
+++ b/config-model/src/main/java/com/yahoo/schema/Schema.java
@@ -18,7 +18,6 @@ import com.yahoo.schema.document.SDDocumentType;
import com.yahoo.schema.document.SDField;
import com.yahoo.schema.document.Stemming;
import com.yahoo.schema.document.TemporaryImportedFields;
-import com.yahoo.schema.document.annotation.SDAnnotationType;
import com.yahoo.searchlib.rankingexpression.Reference;
import com.yahoo.vespa.documentmodel.DocumentSummary;
import com.yahoo.vespa.documentmodel.SummaryField;
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java
index 7f874d04f13..575b7264628 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java
@@ -80,7 +80,7 @@ public class DerivedConfiguration implements AttributesConfig.Producer {
summaries = new Summaries(schema, deployState.getDeployLogger(), deployState.getProperties().featureFlags());
juniperrc = new Juniperrc(schema);
rankProfileList = new RankProfileList(schema, schema.rankExpressionFiles(), attributeFields, deployState);
- indexingScript = new IndexingScript(schema);
+ indexingScript = new IndexingScript(schema, isStreaming);
indexInfo = new IndexInfo(schema, isStreaming);
schemaInfo = new SchemaInfo(schema, deployState.rankProfileRegistry(), summaries);
indexSchema = new IndexSchema(schema);
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java
index 73f3507ab00..375a0fa3700 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java
@@ -6,9 +6,11 @@ import com.yahoo.schema.document.GeoPos;
import com.yahoo.schema.document.ImmutableSDField;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import com.yahoo.vespa.configdefinition.IlscriptsConfig.Ilscript.Builder;
+import com.yahoo.vespa.indexinglanguage.ExpressionConverter;
import com.yahoo.vespa.indexinglanguage.ExpressionVisitor;
import com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression;
import com.yahoo.vespa.indexinglanguage.expressions.Expression;
+import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression;
import com.yahoo.vespa.indexinglanguage.expressions.GuardExpression;
import com.yahoo.vespa.indexinglanguage.expressions.InputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression;
@@ -16,6 +18,7 @@ import com.yahoo.vespa.indexinglanguage.expressions.PassthroughExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
import com.yahoo.vespa.indexinglanguage.expressions.SetLanguageExpression;
import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression;
+import com.yahoo.vespa.indexinglanguage.expressions.TokenizeExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression;
import java.util.ArrayList;
@@ -36,8 +39,10 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro
private final List<String> docFields = new ArrayList<>();
private final List<Expression> expressions = new ArrayList<>();
private List<ImmutableSDField> fieldsSettingLanguage;
+ private final boolean isStreaming;
- public IndexingScript(Schema schema) {
+ public IndexingScript(Schema schema, boolean isStreaming) {
+ this.isStreaming = isStreaming;
derive(schema);
}
@@ -96,21 +101,40 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro
configBuilder.ilscript(ilscriptBuilder);
}
+ private static class DropTokenize extends ExpressionConverter {
+ @Override
+ protected boolean shouldConvert(Expression exp) {
+ // Handle both string and array<string>
+ return (exp instanceof TokenizeExpression) ||
+ ((exp instanceof ForEachExpression foreach) && (foreach.getInnerExpression() instanceof TokenizeExpression));
+ }
+
+ @Override
+ protected Expression doConvert(Expression exp) {
+ return null;
+ }
+ }
+
private void addContentInOrder(IlscriptsConfig.Ilscript.Builder ilscriptBuilder) {
ArrayList<Expression> later = new ArrayList<>();
Set<String> touchedFields = new HashSet<>();
for (Expression expression : expressions) {
- if (modifiesSelf(expression) && ! setsLanguage(expression))
+ if (isStreaming) {
+ expression = expression.convertChildren(new DropTokenize());
+ }
+ if (modifiesSelf(expression) && ! setsLanguage(expression)) {
later.add(expression);
- else
+ } else {
ilscriptBuilder.content(expression.toString());
+ }
FieldScanVisitor fieldFetcher = new FieldScanVisitor();
fieldFetcher.visit(expression);
touchedFields.addAll(fieldFetcher.touchedFields());
}
- for (Expression exp : later)
+ for (Expression exp : later) {
ilscriptBuilder.content(exp.toString());
+ }
generateSyntheticStatementsForUntouchedFields(ilscriptBuilder, touchedFields);
}
@@ -171,8 +195,8 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro
}
private static class FieldScanVisitor extends ExpressionVisitor {
- List<String> touchedFields = new ArrayList<String>();
- List<String> candidates = new ArrayList<String>();
+ List<String> touchedFields = new ArrayList<>();
+ List<String> candidates = new ArrayList<>();
@Override
protected void doVisit(Expression exp) {
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java
index 34dcc9139b3..8ccc8870419 100644
--- a/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java
+++ b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java
@@ -6,7 +6,11 @@ import com.yahoo.document.Field;
import com.yahoo.schema.Schema;
import com.yahoo.schema.document.Attribute;
import com.yahoo.vespa.indexinglanguage.ValueTransformProvider;
-import com.yahoo.vespa.indexinglanguage.expressions.*;
+import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression;
+import com.yahoo.vespa.indexinglanguage.expressions.Expression;
+import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression;
+import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression;
+import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression;
/**
* @author Simon Thoresen Hult
diff --git a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java
index ceac2b94997..6624a7d3b32 100644
--- a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java
+++ b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java
@@ -280,7 +280,7 @@ public class AttributeSettingsTestCase extends AbstractSchemaTestCase {
@Test
void requireThatMutableIsAllowedThroughIndexing() throws ParseException {
- IndexingScript script = new IndexingScript(getSearchWithMutables());
+ IndexingScript script = new IndexingScript(getSearchWithMutables(), false);
IlscriptsConfig.Builder builder = new IlscriptsConfig.Builder();
script.getConfig(builder);
IlscriptsConfig cfg = builder.build();
diff --git a/config-model/src/test/java/com/yahoo/schema/derived/IndexInfoTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/IndexInfoTestCase.java
index 09450fa8023..1643cf4d541 100644
--- a/config-model/src/test/java/com/yahoo/schema/derived/IndexInfoTestCase.java
+++ b/config-model/src/test/java/com/yahoo/schema/derived/IndexInfoTestCase.java
@@ -45,7 +45,7 @@ public class IndexInfoTestCase {
return builder.build();
}
- private static VespaModel createModel(String schemaName, String sdContent) {
+ static VespaModel createModel(String schemaName, String sdContent) {
var builder = new DeployState.Builder();
return new ApplicationPackageBuilder()
.addCluster(new ContentClusterBuilder().name("content").docTypes(List.of(DocType.index(schemaName))))
diff --git a/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java
new file mode 100644
index 00000000000..6bfb67b3011
--- /dev/null
+++ b/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java
@@ -0,0 +1,43 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.schema.derived;
+
+import com.yahoo.schema.Schema;
+import com.yahoo.vespa.configdefinition.IlscriptsConfig;
+import com.yahoo.vespa.model.VespaModel;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class IndexingScriptTestCase {
+
+ private static final String TEST = "test";
+
+ private static IlscriptsConfig ilscriptsConfig(Schema schema, boolean isStreaming) {
+ IndexingScript script = new IndexingScript(schema, isStreaming);
+ IlscriptsConfig.Builder cfgBuilder = new IlscriptsConfig.Builder();
+ script.getConfig(cfgBuilder);
+ return cfgBuilder.build();
+ }
+
+ private void verifyIndexingScript(boolean isStreaming) {
+ VespaModel model = IndexInfoTestCase.createModel(TEST,
+ """
+ field f type string { indexing: index }
+ field fa type array<string> { indexing: index }
+ """);
+ Schema schema = model.getSearchClusters().get(0).schemas().get(TEST).fullSchema();
+ IlscriptsConfig cfg = ilscriptsConfig(schema, isStreaming);
+ assertEquals(1, cfg.ilscript().size());
+ assertEquals(2, cfg.ilscript(0).content().size());
+ String exp_f = isStreaming ? "" : "tokenize normalize stem:\"BEST\" | ";
+ String exp_fa = isStreaming ? "" : "for_each { tokenize normalize stem:\"BEST\" } | ";
+ assertEquals("clear_state | guard { input f | " + exp_f + "index f; }", cfg.ilscript(0).content(0));
+ assertEquals("clear_state | guard { input fa | " + exp_fa + "index fa; }", cfg.ilscript(0).content(1));
+ }
+
+ @Test
+ void testThatTokenizeIsIgnoredFromStreaming() {
+ verifyIndexingScript(false);
+ verifyIndexingScript(true);
+ }
+}
diff --git a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java
index e469b80ab40..423bc0b1798 100644
--- a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java
+++ b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java
@@ -23,59 +23,57 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
*/
public class VsmFieldsTestCase {
- private static Schema createSchema() {
+ static Schema createSchema() {
Schema schema = new Schema("test", MockApplicationPackage.createEmpty(), new MockFileRegistry(), new TestableDeployLogger(), new TestProperties());
var sdoc = new SDDocumentType("test");
schema.addDocument(sdoc);
return schema;
- }
-
- private static VsmfieldsConfig vsmfieldsConfig(Schema schema) {
- VsmFields vsmFields = new VsmFields(schema);
- VsmfieldsConfig.Builder cfgBuilder = new VsmfieldsConfig.Builder();
- vsmFields.getConfig(cfgBuilder);
- return cfgBuilder.build();
- }
+ }
+ private static VsmfieldsConfig vsmfieldsConfig(Schema schema) {
+ VsmFields vsmFields = new VsmFields(schema);
+ VsmfieldsConfig.Builder cfgBuilder = new VsmfieldsConfig.Builder();
+ vsmFields.getConfig(cfgBuilder);return cfgBuilder.build();
+ }
@Test
- void reference_type_field_is_unsearchable() {
- Schema schema = createSchema();
- SDField field = new TemporarySDField(schema.getDocument(), "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type"));
- field.parseIndexingScript("{ summary }");
- schema.getDocument().addField(field);
- VsmfieldsConfig cfg = vsmfieldsConfig(schema);
+ void reference_type_field_is_unsearchable() {
+ Schema schema = createSchema();
+ SDField field = new TemporarySDField(schema.getDocument(), "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type"));
+ field.parseIndexingScript("{ summary }");
+ schema.getDocument().addField(field);
+ VsmfieldsConfig cfg = vsmfieldsConfig(schema);
- assertEquals(1, cfg.fieldspec().size());
- VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0);
- assertEquals("ref_field", fieldSpec.name());
- assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.NONE, fieldSpec.searchmethod());
- }
+ assertEquals(1, cfg.fieldspec().size());
+ VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0);
+ assertEquals("ref_field", fieldSpec.name());
+ assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.NONE, fieldSpec.searchmethod());
+ }
- private void testIndexMatching(Matching matching, VsmfieldsConfig.Fieldspec.Normalize.Enum normalize, String arg1) {
- Schema schema = createSchema();
- SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING);
- field.parseIndexingScript("{ index }");
- field.setMatching(matching);
- schema.getDocument().addField(field);
- VsmfieldsConfig cfg = vsmfieldsConfig(schema);
- VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0);
- assertEquals("f", fieldSpec.name());
- assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.AUTOUTF8, fieldSpec.searchmethod());
- assertEquals(normalize, fieldSpec.normalize());
- assertEquals(arg1, fieldSpec.arg1());
- }
+ private void testIndexMatching(Matching matching, VsmfieldsConfig.Fieldspec.Normalize.Enum normalize, String arg1) {
+ Schema schema = createSchema();
+ SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING);
+ field.parseIndexingScript("{ index }");
+ field.setMatching(matching);
+ schema.getDocument().addField(field);
+ VsmfieldsConfig cfg = vsmfieldsConfig(schema);
+ VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0);
+ assertEquals("f", fieldSpec.name());
+ assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.AUTOUTF8, fieldSpec.searchmethod());
+ assertEquals(normalize, fieldSpec.normalize());
+ assertEquals(arg1, fieldSpec.arg1());
+ }
- @Test
- void test_exact_string() {
- testIndexMatching(new Matching(MatchType.TEXT),
- VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD, "");
- testIndexMatching(new Matching(MatchType.TEXT).setCase(Case.CASED),
- VsmfieldsConfig.Fieldspec.Normalize.NONE, "");
- testIndexMatching(new Matching(MatchType.EXACT).setCase(Case.CASED),
- VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "exact");
- testIndexMatching(new Matching(MatchType.WORD),
- VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "word");
- testIndexMatching(new Matching(MatchType.WORD).setCase(Case.CASED),
- VsmfieldsConfig.Fieldspec.Normalize.NONE, "word");
- }
+ @Test
+ void test_exact_string() {
+ testIndexMatching(new Matching(MatchType.TEXT),
+ VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD, "");
+ testIndexMatching(new Matching(MatchType.TEXT).setCase(Case.CASED),
+ VsmfieldsConfig.Fieldspec.Normalize.NONE, "");
+ testIndexMatching(new Matching(MatchType.EXACT).setCase(Case.CASED),
+ VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "exact");
+ testIndexMatching(new Matching(MatchType.WORD),
+ VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "word");
+ testIndexMatching(new Matching(MatchType.WORD).setCase(Case.CASED),
+ VsmfieldsConfig.Fieldspec.Normalize.NONE, "word");
+ }
}
diff --git a/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java b/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java
index 8aeb11aaa2e..b82dc197838 100644
--- a/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java
+++ b/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java
@@ -18,7 +18,7 @@ import static org.junit.jupiter.api.Assertions.fail;
public abstract class AssertIndexingScript {
public static void assertIndexing(List<String> expected, Schema schema) {
- assertIndexing(expected, new IndexingScript(schema).expressions());
+ assertIndexing(expected, new IndexingScript(schema, false).expressions());
}
public static void assertIndexing(List<String> expected, IndexingScript script) {
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java
index 49ac370e763..e37999ded12 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java
@@ -50,7 +50,7 @@ public class StorageContentTest extends ContentBaseTest {
return new VespaModelCreatorWithMockPkg(getHosts(), createStorageVespaServices(cluster1docs, cluster2docs), sds).create();
}
- public void doTestRouting(String cluster1docs, String cluster2docs, String expectedRoutes) throws Exception {
+ public void doTestRouting(String cluster1docs, String cluster2docs, String expectedRoutes) {
VespaModel model = getStorageVespaModel(cluster1docs, cluster2docs);
if (expectedRoutes == null) {
@@ -73,7 +73,6 @@ public class StorageContentTest extends ContentBaseTest {
for (int i = 0; i < spec.getNumRoutes(); ++i) {
RouteSpec r = spec.getRoute(i);
-
routes.put(r.getName(), r);
}