summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-10-27 19:36:39 +0200
committerGitHub <noreply@github.com>2023-10-27 19:36:39 +0200
commitd1cff84a0ebb690055faac2af20574a8f5bd7c9e (patch)
treeb80a77305dedcaf004b2e431070dc51717afb11e
parent676a932c235032e465c55da6140847d60da4e1b5 (diff)
parent5b9b96b83d4f1f4c6802550559a829d0be05b809 (diff)
Merge pull request #29141 from vespa-engine/toregge/validate-data-type-for-tokens-summary-transform
Validate data type for tokens summary transform.
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/Processing.java1
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/TokensTransformValidator.java50
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java2
-rw-r--r--config-model/src/test/java/com/yahoo/schema/processing/TokensTransformValidatorTest.java59
4 files changed, 112 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/Processing.java b/config-model/src/main/java/com/yahoo/schema/processing/Processing.java
index 2d4b4824310..c23d87e9eba 100644
--- a/config-model/src/main/java/com/yahoo/schema/processing/Processing.java
+++ b/config-model/src/main/java/com/yahoo/schema/processing/Processing.java
@@ -57,6 +57,7 @@ public class Processing {
AdjustSummaryTransforms::new,
SummaryNamesFieldCollisions::new,
SummaryFieldsMustHaveValidSource::new,
+ TokensTransformValidator::new,
MatchedElementsOnlyResolver::new,
MakeDefaultSummaryTheSuperSet::new,
Bolding::new,
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TokensTransformValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/TokensTransformValidator.java
new file mode 100644
index 00000000000..7988a0b9ceb
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/schema/processing/TokensTransformValidator.java
@@ -0,0 +1,50 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.schema.processing;
+
+import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.document.DataType;
+import com.yahoo.schema.RankProfileRegistry;
+import com.yahoo.schema.Schema;
+import com.yahoo.vespa.documentmodel.SummaryTransform;
+import com.yahoo.vespa.model.container.search.QueryProfiles;
+
+/*
+ * Check that summary fields with summary transform 'tokens' have a source field with a data type that is one of
+ * string, array<string> or weightedset<string>.
+ */
+public class TokensTransformValidator extends Processor {
+ public TokensTransformValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) {
+ super(schema, deployLogger, rankProfileRegistry, queryProfiles);
+ }
+
+ @Override
+ public void process(boolean validate, boolean documentsOnly) {
+ if (!validate || documentsOnly) {
+ return;
+ }
+ for (var summary : schema.getSummaries().values()) {
+ for (var summaryField : summary.getSummaryFields().values()) {
+ if (summaryField.getTransform().isTokens()) {
+ var source = summaryField.getSingleSource();
+ if (source != null) {
+ var field = schema.getField(source);
+ if (field != null) {
+ var type = field.getDataType();
+ var innerType = type.getPrimitiveType();
+ if (innerType != DataType.STRING) {
+ throw new IllegalArgumentException("For schema '" + schema.getName() +
+ "', document-summary '" + summary.getName() +
+ "', summary field '" + summaryField.getName() +
+ "', source field '" + field.getName() +
+ "', source field type '" + type.getName() +
+ "': transform '" + SummaryTransform.TOKENS.getName() +
+ "' is only allowed for fields of type" +
+ " string, array<string> or weightedset<string>");
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java
index 50be01db04b..58f47680f9f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java
+++ b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java
@@ -69,6 +69,8 @@ public enum SummaryTransform {
return this==DYNAMICBOLDED || this==DYNAMICTEASER;
}
+ public boolean isTokens() { return this == TOKENS; }
+
/** Returns whether this transform always gets its value by accessing memory only */
public boolean isInMemory() {
return switch (this) {
diff --git a/config-model/src/test/java/com/yahoo/schema/processing/TokensTransformValidatorTest.java b/config-model/src/test/java/com/yahoo/schema/processing/TokensTransformValidatorTest.java
new file mode 100644
index 00000000000..6ca62321617
--- /dev/null
+++ b/config-model/src/test/java/com/yahoo/schema/processing/TokensTransformValidatorTest.java
@@ -0,0 +1,59 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.schema.processing;
+
+import com.yahoo.schema.ApplicationBuilder;
+import com.yahoo.schema.Schema;
+import com.yahoo.schema.parser.ParseException;
+import com.yahoo.vespa.documentmodel.SummaryTransform;
+import org.junit.jupiter.api.Test;
+
+import static com.yahoo.config.model.test.TestUtil.joinLines;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+public class TokensTransformValidatorTest {
+ private void buildSchema(String fieldType) throws ParseException {
+ String sd = joinLines(
+ "search test {",
+ " document test {",
+ " field f type " + fieldType + " {",
+ " indexing: summary",
+ " summary: tokens",
+ " }",
+ " }",
+ "}"
+ );
+ Schema schema = ApplicationBuilder.createFromString(sd).getSchema();
+ }
+
+ void buildSchemaShouldFail(String fieldType, String expFail) throws ParseException {
+ try {
+ buildSchema(fieldType);
+ fail("expected IllegalArgumentException with message '" + expFail + "'");
+ } catch (IllegalArgumentException e) {
+ assertEquals(expFail, e.getMessage());
+ }
+ }
+
+ @Test
+ void testTokensTransformWithPlainString() throws ParseException {
+ buildSchema("string");
+ }
+
+ @Test
+ void testTokensTransformWithArrayOfString() throws ParseException {
+ buildSchema("array<string>");
+ }
+
+ @Test
+ void testTokensTransformWithWeightedSetOfString() throws ParseException {
+ buildSchema("weightedset<string>");
+ }
+
+ @Test
+ void testTokensTransformWithWeightedSetOfInteger() throws ParseException {
+ buildSchemaShouldFail("weightedset<int>", "For schema 'test', document-summary 'default'" +
+ ", summary field 'f', source field 'f', source field type 'WeightedSet<int>'" +
+ ": transform 'tokens' is only allowed for fields of type string, array<string> or weightedset<string>");
+ }
+}