diff options
author | Geir Storli <geirst@yahooinc.com> | 2023-10-27 19:36:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-27 19:36:39 +0200 |
commit | d1cff84a0ebb690055faac2af20574a8f5bd7c9e (patch) | |
tree | b80a77305dedcaf004b2e431070dc51717afb11e | |
parent | 676a932c235032e465c55da6140847d60da4e1b5 (diff) | |
parent | 5b9b96b83d4f1f4c6802550559a829d0be05b809 (diff) |
Merge pull request #29141 from vespa-engine/toregge/validate-data-type-for-tokens-summary-transform
Validate data type for tokens summary transform.
4 files changed, 112 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/Processing.java b/config-model/src/main/java/com/yahoo/schema/processing/Processing.java index 2d4b4824310..c23d87e9eba 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/Processing.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/Processing.java @@ -57,6 +57,7 @@ public class Processing { AdjustSummaryTransforms::new, SummaryNamesFieldCollisions::new, SummaryFieldsMustHaveValidSource::new, + TokensTransformValidator::new, MatchedElementsOnlyResolver::new, MakeDefaultSummaryTheSuperSet::new, Bolding::new, diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TokensTransformValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/TokensTransformValidator.java new file mode 100644 index 00000000000..7988a0b9ceb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/TokensTransformValidator.java @@ -0,0 +1,50 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/* + * Check that summary fields with summary transform 'tokens' have a source field with a data type that is one of + * string, array<string> or weightedset<string>. + */ +public class TokensTransformValidator extends Processor { + public TokensTransformValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (!validate || documentsOnly) { + return; + } + for (var summary : schema.getSummaries().values()) { + for (var summaryField : summary.getSummaryFields().values()) { + if (summaryField.getTransform().isTokens()) { + var source = summaryField.getSingleSource(); + if (source != null) { + var field = schema.getField(source); + if (field != null) { + var type = field.getDataType(); + var innerType = type.getPrimitiveType(); + if (innerType != DataType.STRING) { + throw new IllegalArgumentException("For schema '" + schema.getName() + + "', document-summary '" + summary.getName() + + "', summary field '" + summaryField.getName() + + "', source field '" + field.getName() + + "', source field type '" + type.getName() + + "': transform '" + SummaryTransform.TOKENS.getName() + + "' is only allowed for fields of type" + + " string, array<string> or weightedset<string>"); + } + } + } + } + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java index 50be01db04b..58f47680f9f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java +++ b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java @@ -69,6 +69,8 @@ public enum SummaryTransform { return this==DYNAMICBOLDED || this==DYNAMICTEASER; } + public boolean isTokens() { return this == TOKENS; } + /** Returns whether this transform always gets its value by accessing memory only */ public boolean isInMemory() { return switch (this) { diff --git a/config-model/src/test/java/com/yahoo/schema/processing/TokensTransformValidatorTest.java b/config-model/src/test/java/com/yahoo/schema/processing/TokensTransformValidatorTest.java new file mode 100644 index 00000000000..6ca62321617 --- /dev/null +++ b/config-model/src/test/java/com/yahoo/schema/processing/TokensTransformValidatorTest.java @@ -0,0 +1,59 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.schema.ApplicationBuilder; +import com.yahoo.schema.Schema; +import com.yahoo.schema.parser.ParseException; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import org.junit.jupiter.api.Test; + +import static com.yahoo.config.model.test.TestUtil.joinLines; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; + +public class TokensTransformValidatorTest { + private void buildSchema(String fieldType) throws ParseException { + String sd = joinLines( + "search test {", + " document test {", + " field f type " + fieldType + " {", + " indexing: summary", + " summary: tokens", + " }", + " }", + "}" + ); + Schema schema = ApplicationBuilder.createFromString(sd).getSchema(); + } + + void buildSchemaShouldFail(String fieldType, String expFail) throws ParseException { + try { + buildSchema(fieldType); + fail("expected IllegalArgumentException with message '" + expFail + "'"); + } catch (IllegalArgumentException e) { + assertEquals(expFail, e.getMessage()); + } + } + + @Test + void testTokensTransformWithPlainString() throws ParseException { + buildSchema("string"); + } + + @Test + void testTokensTransformWithArrayOfString() throws ParseException { + buildSchema("array<string>"); + } + + @Test + void testTokensTransformWithWeightedSetOfString() throws ParseException { + buildSchema("weightedset<string>"); + } + + @Test + void testTokensTransformWithWeightedSetOfInteger() throws ParseException { + buildSchemaShouldFail("weightedset<int>", "For schema 'test', document-summary 'default'" + + ", summary field 'f', source field 'f', source field type 'WeightedSet<int>'" + + ": transform 'tokens' is only allowed for fields of type string, array<string> or weightedset<string>"); + } +} |