diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-01-12 18:29:29 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-12 18:29:29 +0100 |
commit | 477af2e4a8ef20b5c8eb8403c97bbaa50af2688f (patch) | |
tree | 8993208169aa39fcccdaec20268c4b10d0412516 | |
parent | 0213cea0df0706a1f1a70c44e8c2b5906745a6ab (diff) | |
parent | 8498e8b0c30267cae2b076df862e44c53633c922 (diff) |
Merge pull request #29883 from vespa-engine/balder/drop-tokenize-for-streaming
Drop tokenize expressions from ilscript for streaming mode.
8 files changed, 73 insertions, 13 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/Schema.java b/config-model/src/main/java/com/yahoo/schema/Schema.java index 279b5729ea1..3402ba31be9 100644 --- a/config-model/src/main/java/com/yahoo/schema/Schema.java +++ b/config-model/src/main/java/com/yahoo/schema/Schema.java @@ -18,7 +18,6 @@ import com.yahoo.schema.document.SDDocumentType; import com.yahoo.schema.document.SDField; import com.yahoo.schema.document.Stemming; import com.yahoo.schema.document.TemporaryImportedFields; -import com.yahoo.schema.document.annotation.SDAnnotationType; import com.yahoo.searchlib.rankingexpression.Reference; import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.documentmodel.SummaryField; diff --git a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java index 7f874d04f13..575b7264628 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java @@ -80,7 +80,7 @@ public class DerivedConfiguration implements AttributesConfig.Producer { summaries = new Summaries(schema, deployState.getDeployLogger(), deployState.getProperties().featureFlags()); juniperrc = new Juniperrc(schema); rankProfileList = new RankProfileList(schema, schema.rankExpressionFiles(), attributeFields, deployState); - indexingScript = new IndexingScript(schema); + indexingScript = new IndexingScript(schema, isStreaming); indexInfo = new IndexInfo(schema, isStreaming); schemaInfo = new SchemaInfo(schema, deployState.rankProfileRegistry(), summaries); indexSchema = new IndexSchema(schema); diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java index 73f3507ab00..6f0420f5203 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java @@ -6,6 +6,7 @@ import com.yahoo.schema.document.GeoPos; import com.yahoo.schema.document.ImmutableSDField; import com.yahoo.vespa.configdefinition.IlscriptsConfig; import com.yahoo.vespa.configdefinition.IlscriptsConfig.Ilscript.Builder; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; import com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression; import com.yahoo.vespa.indexinglanguage.expressions.Expression; @@ -16,6 +17,7 @@ import com.yahoo.vespa.indexinglanguage.expressions.PassthroughExpression; import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; import com.yahoo.vespa.indexinglanguage.expressions.SetLanguageExpression; import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.expressions.TokenizeExpression; import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression; import java.util.ArrayList; @@ -36,8 +38,10 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro private final List<String> docFields = new ArrayList<>(); private final List<Expression> expressions = new ArrayList<>(); private List<ImmutableSDField> fieldsSettingLanguage; + private final boolean isStreaming; - public IndexingScript(Schema schema) { + public IndexingScript(Schema schema, boolean isStreaming) { + this.isStreaming = isStreaming; derive(schema); } @@ -96,21 +100,38 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro configBuilder.ilscript(ilscriptBuilder); } + private static class DropTokenize extends ExpressionConverter { + @Override + protected boolean shouldConvert(Expression exp) { + return exp instanceof TokenizeExpression; + } + + @Override + protected Expression doConvert(Expression exp) { + return null; + } + } + private void addContentInOrder(IlscriptsConfig.Ilscript.Builder ilscriptBuilder) { ArrayList<Expression> later = new ArrayList<>(); Set<String> touchedFields = new HashSet<>(); for (Expression expression : expressions) { - if (modifiesSelf(expression) && ! setsLanguage(expression)) + if (isStreaming) { + expression = expression.convertChildren(new DropTokenize()); + } + if (modifiesSelf(expression) && ! setsLanguage(expression)) { later.add(expression); - else + } else { ilscriptBuilder.content(expression.toString()); + } FieldScanVisitor fieldFetcher = new FieldScanVisitor(); fieldFetcher.visit(expression); touchedFields.addAll(fieldFetcher.touchedFields()); } - for (Expression exp : later) + for (Expression exp : later) { ilscriptBuilder.content(exp.toString()); + } generateSyntheticStatementsForUntouchedFields(ilscriptBuilder, touchedFields); } @@ -171,8 +192,8 @@ public final class IndexingScript extends Derived implements IlscriptsConfig.Pro } private static class FieldScanVisitor extends ExpressionVisitor { - List<String> touchedFields = new ArrayList<String>(); - List<String> candidates = new ArrayList<String>(); + List<String> touchedFields = new ArrayList<>(); + List<String> candidates = new ArrayList<>(); @Override protected void doVisit(Expression exp) { diff --git a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java index ceac2b94997..6624a7d3b32 100644 --- a/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/AttributeSettingsTestCase.java @@ -280,7 +280,7 @@ public class AttributeSettingsTestCase extends AbstractSchemaTestCase { @Test void requireThatMutableIsAllowedThroughIndexing() throws ParseException { - IndexingScript script = new IndexingScript(getSearchWithMutables()); + IndexingScript script = new IndexingScript(getSearchWithMutables(), false); IlscriptsConfig.Builder builder = new IlscriptsConfig.Builder(); script.getConfig(builder); IlscriptsConfig cfg = builder.build(); diff --git a/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java new file mode 100644 index 00000000000..81536bf99fb --- /dev/null +++ b/config-model/src/test/java/com/yahoo/schema/derived/IndexingScriptTestCase.java @@ -0,0 +1,41 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.DataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.TemporarySDField; +import com.yahoo.vespa.configdefinition.IlscriptsConfig; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class IndexingScriptTestCase { + + private static IlscriptsConfig ilscriptsConfig(Schema schema, boolean isStreaming) { + IndexingScript script = new IndexingScript(schema, isStreaming); + IlscriptsConfig.Builder cfgBuilder = new IlscriptsConfig.Builder(); + script.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + private void verifyIndexingScript(boolean isStreaming) { + Schema schema = VsmFieldsTestCase.createSchema(); + SDField field = new TemporarySDField(schema.getDocument(), "f", DataType.STRING); + field.parseIndexingScript("{ tokenize | index }"); + field.setMatching(new Matching(MatchType.TEXT)); + schema.getDocument().addField(field); + IlscriptsConfig cfg = ilscriptsConfig(schema, isStreaming); + assertEquals(1, cfg.ilscript().size()); + assertEquals(2, cfg.ilscript(0).content().size()); + String indexing = isStreaming ? "index" : "tokenize | index"; + assertEquals("clear_state | guard { " + indexing + "; }", cfg.ilscript(0).content(0)); + } + @Test + void testThatTokenizeIsIgnoredFromStreaming() { + verifyIndexingScript(false); + verifyIndexingScript(true); + } +} diff --git a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java index e469b80ab40..042feeb5f46 100644 --- a/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/derived/VsmFieldsTestCase.java @@ -23,7 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; */ public class VsmFieldsTestCase { - private static Schema createSchema() { + static Schema createSchema() { Schema schema = new Schema("test", MockApplicationPackage.createEmpty(), new MockFileRegistry(), new TestableDeployLogger(), new TestProperties()); var sdoc = new SDDocumentType("test"); schema.addDocument(sdoc); diff --git a/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java b/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java index 8aeb11aaa2e..b82dc197838 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/AssertIndexingScript.java @@ -18,7 +18,7 @@ import static org.junit.jupiter.api.Assertions.fail; public abstract class AssertIndexingScript { public static void assertIndexing(List<String> expected, Schema schema) { - assertIndexing(expected, new IndexingScript(schema).expressions()); + assertIndexing(expected, new IndexingScript(schema, false).expressions()); } public static void assertIndexing(List<String> expected, IndexingScript script) { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java index 66a45cb75e2..2db6c760380 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/StatementExpression.java @@ -12,7 +12,6 @@ import com.yahoo.vespa.indexinglanguage.parser.IndexingInput; import com.yahoo.vespa.indexinglanguage.parser.ParseException; import java.util.Arrays; -import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -49,7 +48,7 @@ public final class StatementExpression extends ExpressionList<Expression> { @Override public StatementExpression convertChildren(ExpressionConverter converter) { return new StatementExpression(asList().stream() - .map(child -> converter.convert(child)) + .map(converter::convert) .filter(Objects::nonNull) .toList()); } |