From 4825303a9cc973984825e201a8a470b055853d4c Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Fri, 12 Jan 2024 16:49:02 +0100 Subject: Drop tokenize expressions from ilscript for streaming mode. --- .../src/main/java/com/yahoo/schema/Schema.java | 1 - .../yahoo/schema/derived/DerivedConfiguration.java | 2 +- .../com/yahoo/schema/derived/IndexingScript.java | 33 ++++++++++++++---- .../yahoo/schema/AttributeSettingsTestCase.java | 2 +- .../schema/derived/IndexingScriptTestCase.java | 40 ++++++++++++++++++++++ .../yahoo/schema/derived/VsmFieldsTestCase.java | 2 +- .../schema/processing/AssertIndexingScript.java | 2 +- 7 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java (limited to 'config-model') diff --git a/config-model/src/main/java/com/yahoo/schema/Schema.java b/config-model/src/main/java/com/yahoo/schema/Schema.java index 279b5729ea1..3402ba31be9 100644 --- a/config-model/src/main/java/com/yahoo/schema/Schema.java +++ b/config-model/src/main/java/com/yahoo/schema/Schema.java @@ -18,7 +18,6 @@ import com.yahoo.schema.document.SDDocumentType; import com.yahoo.schema.document.SDField; import com.yahoo.schema.document.Stemming; import com.yahoo.schema.document.TemporaryImportedFields; -import com.yahoo.schema.document.annotation.SDAnnotationType; import com.yahoo.searchlib.rankingexpression.Reference; import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.documentmodel.SummaryField; diff --git a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java index 7f874d04f13..575b7264628 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java @@ -80,7 +80,7 @@ public class DerivedConfiguration implements AttributesConfig.Producer { summaries = new Summaries(schema, deployState.getDeployLogger(), deployState.getProperties().featureFlags()); juniperrc = new Juniperrc(schema); rankProfileList = new RankProfileList(schema, schema.rankExpressionFiles(), attributeFields, deployState); - indexingScript = new IndexingScript(schema); + indexingScript = new IndexingScript(schema, isStreaming); indexInfo = new IndexInfo(schema, isStreaming); schemaInfo = new SchemaInfo(schema, deployState.rankProfileRegistry(), summaries); indexSchema = new IndexSchema(schema); diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java index 73f3507ab00..6f0420f5203 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java @@ -6,6 +6,7 @@ import com.yahoo.schema.document.GeoPos; import com.yahoo.schema.document.ImmutableSDField; import com.yahoo.vespa.configdefinition.IlscriptsConfig; import com.yahoo.vespa.configdefinition.IlscriptsConfig.Ilscript.Builder; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; import com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression; import com.yahoo.vespa.indexinglanguage.expressions.Expression; @@ -16,6 +17,7 @@ import com.yahoo.vespa.indexinglanguage.expressions.PassthroughExpression; import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; import com.yahoo.vespa.indexinglanguage.expressions.SetLanguageExpression; import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.expressions.TokenizeExpression; import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression; import java.util.ArrayList; @@ -36,8 +38,10 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro private final List docFields = new ArrayList<>(); private final List expressions = new ArrayList<>(); private List fieldsSettingLanguage; + private final boolean isStreaming; - public IndexingScript(Schema schema) { + public IndexingScript(Schema schema, boolean isStreaming) { + this.isStreaming = isStreaming; derive(schema); } @@ -96,21 +100,38 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro configBuilder.ilscript(ilscriptBuilder); } + private static class DropTokenize extends ExpressionConverter { + @Override + protected boolean shouldConvert(Expression exp) { + return exp instanceof TokenizeExpression; + } + + @Override + protected Expression doConvert(Expression exp) { + return null; + } + } + private void addContentInOrder(IlscriptsConfig.Ilscript.Builder ilscriptBuilder) { ArrayList later = new ArrayList<>(); Set touchedFields = new HashSet<>(); for (Expression expression : expressions) { - if (modifiesSelf(expression) && ! setsLanguage(expression)) + if (isStreaming) { + expression = expression.convertChildren(new DropTokenize()); + } + if (modifiesSelf(expression) && ! setsLanguage(expression)) { later.add(expression); - else + } else { ilscriptBuilder.content(expression.toString()); + } FieldScanVisitor fieldFetcher = new FieldScanVisitor(); fieldFetcher.visit(expression); touchedFields.addAll(fieldFetcher.touchedFields()); } - for (Expression exp : later) + for (Expression exp : later) { ilscriptBuilder.content(exp.toString()); + } generateSyntheticStatementsForUntouchedFields(ilscriptBuilder, touchedFields); } @@ -171,8 +192,8 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro } private static class FieldScanVisitor extends ExpressionVisitor { - List touchedFields = new ArrayList(); - List candidates = new ArrayList(); + List touchedFields = new ArrayList<>(); + List candidates = new ArrayList<>(); @Override protected void doVisit(Expression exp) { diff --git a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java index ceac2b94997..6624a7d3b32 100644 --- a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java @@ -280,7 +280,7 @@ public class AttributeSettingsTestCase extends AbstractSchemaTestCase { @Test void requireThatMutableIsAllowedThroughIndexing() throws ParseException { - IndexingScript script = new IndexingScript(getSearchWithMutables()); + IndexingScript script = new IndexingScript(getSearchWithMutables(), false); IlscriptsConfig.Builder builder = new IlscriptsConfig.Builder(); script.getConfig(builder); IlscriptsConfig cfg = builder.build(); diff --git a/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java new file mode 100644 index 00000000000..6e86a567e68 --- /dev/null +++ b/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java @@ -0,0 +1,40 @@ +package com.yahoo.schema.derived; + +import com.yahoo.document.DataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.TemporarySDField; +import com.yahoo.vespa.configdefinition.IlscriptsConfig; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class IndexingScriptTestCase { + + private static IlscriptsConfig ilscriptsConfig(Schema schema, boolean isStreaming) { + IndexingScript script = new IndexingScript(schema, isStreaming); + IlscriptsConfig.Builder cfgBuilder = new IlscriptsConfig.Builder(); + script.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + private void verifyIndexingScript(boolean isStreaming) { + Schema schema = VsmFieldsTestCase.createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING); + field.parseIndexingScript("{ tokenize | index }"); + field.setMatching(new Matching(MatchType.TEXT)); + schema.getDocument().addField(field); + IlscriptsConfig cfg = ilscriptsConfig(schema, isStreaming); + assertEquals(1, cfg.ilscript().size()); + assertEquals(2, cfg.ilscript(0).content().size()); + String indexing = isStreaming ? "index" : "tokenize | index"; + assertEquals("clear_state | guard { " + indexing + "; }", cfg.ilscript(0).content(0)); + } + @Test + void testThatTokenizeIsIgnoredFromStreaming() { + verifyIndexingScript(false); + verifyIndexingScript(true); + } +} diff --git a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java index e469b80ab40..042feeb5f46 100644 --- a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java @@ -23,7 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; */ public class VsmFieldsTestCase { - private static Schema createSchema() { + static Schema createSchema() { Schema schema = new Schema("test", MockApplicationPackage.createEmpty(), new MockFileRegistry(), new TestableDeployLogger(), new TestProperties()); var sdoc = new SDDocumentType("test"); schema.addDocument(sdoc); diff --git a/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java b/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java index 8aeb11aaa2e..b82dc197838 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java @@ -18,7 +18,7 @@ import static org.junit.jupiter.api.Assertions.fail; public abstract class AssertIndexingScript { public static void assertIndexing(List expected, Schema schema) { - assertIndexing(expected, new IndexingScript(schema).expressions()); + assertIndexing(expected, new IndexingScript(schema, false).expressions()); } public static void assertIndexing(List expected, IndexingScript script) { -- cgit v1.2.3