diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-13 01:02:48 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-13 01:02:48 +0100 |
commit | a393f0a47e0317bf58f82a1a5158bd3e75499904 (patch) | |
tree | 888084f76308f40227fd83b94ec8727a463670c1 /config-model | |
parent | a0806fb2ab42a8b774298cd993b50d72e3cab345 (diff) | |
parent | 77d69db1e2c28da264997e8b7271a1b0c0a2afb7 (diff) |
Merge pull request #29886 from vespa-engine/revert-29885-revert-29883-balder/drop-tokenize-for-streaming
Revert "Revert "Drop tokenize expressions from ilscript for streaming mode.""
Diffstat (limited to 'config-model')
10 files changed, 127 insertions, 60 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/Schema.java b/config-model/src/main/java/com/yahoo/schema/Schema.java index 279b5729ea1..3402ba31be9 100644 --- a/config-model/src/main/java/com/yahoo/schema/Schema.java +++ b/config-model/src/main/java/com/yahoo/schema/Schema.java @@ -18,7 +18,6 @@ import com.yahoo.schema.document.SDDocumentType; import com.yahoo.schema.document.SDField; import com.yahoo.schema.document.Stemming; import com.yahoo.schema.document.TemporaryImportedFields; -import com.yahoo.schema.document.annotation.SDAnnotationType; import com.yahoo.searchlib.rankingexpression.Reference; import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.documentmodel.SummaryField; diff --git a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java index 7f874d04f13..575b7264628 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java @@ -80,7 +80,7 @@ public class DerivedConfiguration implements AttributesConfig.Producer { summaries = new Summaries(schema, deployState.getDeployLogger(), deployState.getProperties().featureFlags()); juniperrc = new Juniperrc(schema); rankProfileList = new RankProfileList(schema, schema.rankExpressionFiles(), attributeFields, deployState); - indexingScript = new IndexingScript(schema); + indexingScript = new IndexingScript(schema, isStreaming); indexInfo = new IndexInfo(schema, isStreaming); schemaInfo = new SchemaInfo(schema, deployState.rankProfileRegistry(), summaries); indexSchema = new IndexSchema(schema); diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java index 73f3507ab00..375a0fa3700 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java @@ -6,9 +6,11 @@ import com.yahoo.schema.document.GeoPos; import com.yahoo.schema.document.ImmutableSDField; import com.yahoo.vespa.configdefinition.IlscriptsConfig; import com.yahoo.vespa.configdefinition.IlscriptsConfig.Ilscript.Builder; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; import com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression; import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression; import com.yahoo.vespa.indexinglanguage.expressions.GuardExpression; import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; @@ -16,6 +18,7 @@ import com.yahoo.vespa.indexinglanguage.expressions.PassthroughExpression; import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; import com.yahoo.vespa.indexinglanguage.expressions.SetLanguageExpression; import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.expressions.TokenizeExpression; import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression; import java.util.ArrayList; @@ -36,8 +39,10 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro private final List<String> docFields = new ArrayList<>(); private final List<Expression> expressions = new ArrayList<>(); private List<ImmutableSDField> fieldsSettingLanguage; + private final boolean isStreaming; - public IndexingScript(Schema schema) { + public IndexingScript(Schema schema, boolean isStreaming) { + this.isStreaming = isStreaming; derive(schema); } @@ -96,21 +101,40 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro configBuilder.ilscript(ilscriptBuilder); } + private static class DropTokenize extends ExpressionConverter { + @Override + protected boolean shouldConvert(Expression exp) { + // Handle both string and array<string> + return (exp instanceof TokenizeExpression) || + ((exp instanceof ForEachExpression foreach) && (foreach.getInnerExpression() instanceof TokenizeExpression)); + } + + @Override + protected Expression doConvert(Expression exp) { + return null; + } + } + private void addContentInOrder(IlscriptsConfig.Ilscript.Builder ilscriptBuilder) { ArrayList<Expression> later = new ArrayList<>(); Set<String> touchedFields = new HashSet<>(); for (Expression expression : expressions) { - if (modifiesSelf(expression) && ! setsLanguage(expression)) + if (isStreaming) { + expression = expression.convertChildren(new DropTokenize()); + } + if (modifiesSelf(expression) && ! setsLanguage(expression)) { later.add(expression); - else + } else { ilscriptBuilder.content(expression.toString()); + } FieldScanVisitor fieldFetcher = new FieldScanVisitor(); fieldFetcher.visit(expression); touchedFields.addAll(fieldFetcher.touchedFields()); } - for (Expression exp : later) + for (Expression exp : later) { ilscriptBuilder.content(exp.toString()); + } generateSyntheticStatementsForUntouchedFields(ilscriptBuilder, touchedFields); } @@ -171,8 +195,8 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro } private static class FieldScanVisitor extends ExpressionVisitor { - List<String> touchedFields = new ArrayList<String>(); - List<String> candidates = new ArrayList<String>(); + List<String> touchedFields = new ArrayList<>(); + List<String> candidates = new ArrayList<>(); @Override protected void doVisit(Expression exp) { diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java index 34dcc9139b3..8ccc8870419 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java @@ -6,7 +6,11 @@ import com.yahoo.document.Field; import com.yahoo.schema.Schema; import com.yahoo.schema.document.Attribute; import com.yahoo.vespa.indexinglanguage.ValueTransformProvider; -import com.yahoo.vespa.indexinglanguage.expressions.*; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; /** * @author Simon Thoresen Hult diff --git a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java index ceac2b94997..6624a7d3b32 100644 --- a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java @@ -280,7 +280,7 @@ public class AttributeSettingsTestCase extends AbstractSchemaTestCase { @Test void requireThatMutableIsAllowedThroughIndexing() throws ParseException { - IndexingScript script = new IndexingScript(getSearchWithMutables()); + IndexingScript script = new IndexingScript(getSearchWithMutables(), false); IlscriptsConfig.Builder builder = new IlscriptsConfig.Builder(); script.getConfig(builder); IlscriptsConfig cfg = builder.build(); diff --git a/config-model/src/test/java/com/yahoo/schema/derived/IndexInfoTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/IndexInfoTestCase.java index 09450fa8023..1643cf4d541 100644 --- a/config-model/src/test/java/com/yahoo/schema/derived/IndexInfoTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/derived/IndexInfoTestCase.java @@ -45,7 +45,7 @@ public class IndexInfoTestCase { return builder.build(); } - private static VespaModel createModel(String schemaName, String sdContent) { + static VespaModel createModel(String schemaName, String sdContent) { var builder = new DeployState.Builder(); return new ApplicationPackageBuilder() .addCluster(new ContentClusterBuilder().name("content").docTypes(List.of(DocType.index(schemaName)))) diff --git a/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java new file mode 100644 index 00000000000..6bfb67b3011 --- /dev/null +++ b/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java @@ -0,0 +1,43 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.vespa.configdefinition.IlscriptsConfig; +import com.yahoo.vespa.model.VespaModel; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class IndexingScriptTestCase { + + private static final String TEST = "test"; + + private static IlscriptsConfig ilscriptsConfig(Schema schema, boolean isStreaming) { + IndexingScript script = new IndexingScript(schema, isStreaming); + IlscriptsConfig.Builder cfgBuilder = new IlscriptsConfig.Builder(); + script.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + private void verifyIndexingScript(boolean isStreaming) { + VespaModel model = IndexInfoTestCase.createModel(TEST, + """ + field f type string { indexing: index } + field fa type array<string> { indexing: index } + """); + Schema schema = model.getSearchClusters().get(0).schemas().get(TEST).fullSchema(); + IlscriptsConfig cfg = ilscriptsConfig(schema, isStreaming); + assertEquals(1, cfg.ilscript().size()); + assertEquals(2, cfg.ilscript(0).content().size()); + String exp_f = isStreaming ? "" : "tokenize normalize stem:\"BEST\" | "; + String exp_fa = isStreaming ? "" : "for_each { tokenize normalize stem:\"BEST\" } | "; + assertEquals("clear_state | guard { input f | " + exp_f + "index f; }", cfg.ilscript(0).content(0)); + assertEquals("clear_state | guard { input fa | " + exp_fa + "index fa; }", cfg.ilscript(0).content(1)); + } + + @Test + void testThatTokenizeIsIgnoredFromStreaming() { + verifyIndexingScript(false); + verifyIndexingScript(true); + } +} diff --git a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java index e469b80ab40..423bc0b1798 100644 --- a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java @@ -23,59 +23,57 @@ import static org.junit.jupiter.api.Assertions.assertEquals; */ public class VsmFieldsTestCase { - private static Schema createSchema() { + static Schema createSchema() { Schema schema = new Schema("test", MockApplicationPackage.createEmpty(), new MockFileRegistry(), new TestableDeployLogger(), new TestProperties()); var sdoc = new SDDocumentType("test"); schema.addDocument(sdoc); return schema; - } - - private static VsmfieldsConfig vsmfieldsConfig(Schema schema) { - VsmFields vsmFields = new VsmFields(schema); - VsmfieldsConfig.Builder cfgBuilder = new VsmfieldsConfig.Builder(); - vsmFields.getConfig(cfgBuilder); - return cfgBuilder.build(); - } + } + private static VsmfieldsConfig vsmfieldsConfig(Schema schema) { + VsmFields vsmFields = new VsmFields(schema); + VsmfieldsConfig.Builder cfgBuilder = new VsmfieldsConfig.Builder(); + vsmFields.getConfig(cfgBuilder);return cfgBuilder.build(); + } @Test - void reference_type_field_is_unsearchable() { - Schema schema = createSchema(); - SDField field = new TemporarySDField(schema.getDocument(), "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type")); - field.parseIndexingScript("{ summary }"); - schema.getDocument().addField(field); - VsmfieldsConfig cfg = vsmfieldsConfig(schema); + void reference_type_field_is_unsearchable() { + Schema schema = createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "ref_field", NewDocumentReferenceDataType.forDocumentName("parent_type")); + field.parseIndexingScript("{ summary }"); + schema.getDocument().addField(field); + VsmfieldsConfig cfg = vsmfieldsConfig(schema); - assertEquals(1, cfg.fieldspec().size()); - VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); - assertEquals("ref_field", fieldSpec.name()); - assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.NONE, fieldSpec.searchmethod()); - } + assertEquals(1, cfg.fieldspec().size()); + VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); + assertEquals("ref_field", fieldSpec.name()); + assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.NONE, fieldSpec.searchmethod()); + } - private void testIndexMatching(Matching matching, VsmfieldsConfig.Fieldspec.Normalize.Enum normalize, String arg1) { - Schema schema = createSchema(); - SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING); - field.parseIndexingScript("{ index }"); - field.setMatching(matching); - schema.getDocument().addField(field); - VsmfieldsConfig cfg = vsmfieldsConfig(schema); - VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); - assertEquals("f", fieldSpec.name()); - assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.AUTOUTF8, fieldSpec.searchmethod()); - assertEquals(normalize, fieldSpec.normalize()); - assertEquals(arg1, fieldSpec.arg1()); - } + private void testIndexMatching(Matching matching, VsmfieldsConfig.Fieldspec.Normalize.Enum normalize, String arg1) { + Schema schema = createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING); + field.parseIndexingScript("{ index }"); + field.setMatching(matching); + schema.getDocument().addField(field); + VsmfieldsConfig cfg = vsmfieldsConfig(schema); + VsmfieldsConfig.Fieldspec fieldSpec = cfg.fieldspec().get(0); + assertEquals("f", fieldSpec.name()); + assertEquals(VsmfieldsConfig.Fieldspec.Searchmethod.AUTOUTF8, fieldSpec.searchmethod()); + assertEquals(normalize, fieldSpec.normalize()); + assertEquals(arg1, fieldSpec.arg1()); + } - @Test - void test_exact_string() { - testIndexMatching(new Matching(MatchType.TEXT), - VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD, ""); - testIndexMatching(new Matching(MatchType.TEXT).setCase(Case.CASED), - VsmfieldsConfig.Fieldspec.Normalize.NONE, ""); - testIndexMatching(new Matching(MatchType.EXACT).setCase(Case.CASED), - VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "exact"); - testIndexMatching(new Matching(MatchType.WORD), - VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "word"); - testIndexMatching(new Matching(MatchType.WORD).setCase(Case.CASED), - VsmfieldsConfig.Fieldspec.Normalize.NONE, "word"); - } + @Test + void test_exact_string() { + testIndexMatching(new Matching(MatchType.TEXT), + VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE_AND_FOLD, ""); + testIndexMatching(new Matching(MatchType.TEXT).setCase(Case.CASED), + VsmfieldsConfig.Fieldspec.Normalize.NONE, ""); + testIndexMatching(new Matching(MatchType.EXACT).setCase(Case.CASED), + VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "exact"); + testIndexMatching(new Matching(MatchType.WORD), + VsmfieldsConfig.Fieldspec.Normalize.LOWERCASE, "word"); + testIndexMatching(new Matching(MatchType.WORD).setCase(Case.CASED), + VsmfieldsConfig.Fieldspec.Normalize.NONE, "word"); + } } diff --git a/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java b/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java index 8aeb11aaa2e..b82dc197838 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java @@ -18,7 +18,7 @@ import static org.junit.jupiter.api.Assertions.fail; public abstract class AssertIndexingScript { public static void assertIndexing(List<String> expected, Schema schema) { - assertIndexing(expected, new IndexingScript(schema).expressions()); + assertIndexing(expected, new IndexingScript(schema, false).expressions()); } public static void assertIndexing(List<String> expected, IndexingScript script) { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java index 49ac370e763..e37999ded12 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/StorageContentTest.java @@ -50,7 +50,7 @@ public class StorageContentTest extends ContentBaseTest { return new VespaModelCreatorWithMockPkg(getHosts(), createStorageVespaServices(cluster1docs, cluster2docs), sds).create(); } - public void doTestRouting(String cluster1docs, String cluster2docs, String expectedRoutes) throws Exception { + public void doTestRouting(String cluster1docs, String cluster2docs, String expectedRoutes) { VespaModel model = getStorageVespaModel(cluster1docs, cluster2docs); if (expectedRoutes == null) { @@ -73,7 +73,6 @@ public class StorageContentTest extends ContentBaseTest { for (int i = 0; i < spec.getNumRoutes(); ++i) { RouteSpec r = spec.getRoute(i); - routes.put(r.getName(), r); } |