diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2024-04-22 20:15:10 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-22 20:15:10 +0200 |
commit | 173eaab851d081d5b557bc3743de0d5669936d0a (patch) | |
tree | 9536f5e392d96d9d890e9ed6b2f603d53e9eba17 | |
parent | 8838084a2221cd7ca08cdcd44de02210c8fbfdd2 (diff) | |
parent | 16d6636ffa803c9c7aa43a20d7f163fcb7b5925e (diff) |
Merge pull request #30978 from vespa-engine/toregge/stop-populating-extra-summary-fields
Stop populating extra summary fields.
12 files changed, 35 insertions, 153 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java b/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java index e736f289003..587ffcb86c7 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java @@ -6,16 +6,12 @@ import com.yahoo.schema.RankProfileRegistry; import com.yahoo.document.Field; import com.yahoo.schema.Schema; import com.yahoo.schema.document.Attribute; -import com.yahoo.schema.document.ImmutableSDField; import com.yahoo.schema.document.SDDocumentType; import com.yahoo.schema.document.SDField; import com.yahoo.vespa.documentmodel.SummaryField; import com.yahoo.vespa.documentmodel.SummaryTransform; import com.yahoo.vespa.model.container.search.QueryProfiles; -import java.util.LinkedHashSet; -import java.util.Set; - /** * This processor creates a {@link com.yahoo.schema.document.SDDocumentType} for each {@link Schema} * object which holds all the data that search @@ -24,8 +20,6 @@ import java.util.Set; */ public class AddExtraFieldsToDocument extends Processor { - Set<String> extraSummaryFields = new LinkedHashSet<String>(); - AddExtraFieldsToDocument(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { super(schema, deployLogger, rankProfileRegistry, queryProfiles); } @@ -39,29 +33,7 @@ public class AddExtraFieldsToDocument extends Processor { } for (var docsum : schema.getSummaries().values()) { for (var summaryField : docsum.getSummaryFields().values()) { - var transform = summaryField.getTransform(); - if (transform.isDynamic() && DynamicSummaryTransformUtils.summaryFieldIsRequiredInDocumentType(summaryField) || - transform == SummaryTransform.NONE) - { - addSummaryField(schema, document, summaryField, validate); - } else { - // skip: generated from attribute or similar, - // so does not need to be included as an extra - // field in the document type - } - } - } - /* - * Don't use extra summary fields when generating summaries. They will not be created nor populated by - * future vespa versions. When vespa version 'X' stops using these fields and vespa version 'Y' stops - * populating the fields, rollback from vespa version >= 'Y' to vespa version < 'X' will break (missing - * summary fields). - */ - for (var docsum : schema.getSummaries().values()) { - for (var summaryField : docsum.getSummaryFields().values()) { - if (extraSummaryFields.contains(summaryField.getName())) { - considerCopyTransformForExtraSummaryField(schema, summaryField); - } + considerCopyTransformForExtraSummaryField(schema, summaryField); } } } @@ -79,24 +51,6 @@ public class AddExtraFieldsToDocument extends Processor { addField(schema, document, field, validate); } - private void addSummaryField(Schema schema, SDDocumentType document, SummaryField field, boolean validate) { - Field docField = document.getField(field.getName()); - if (docField == null) { - ImmutableSDField existingField = schema.getField(field.getName()); - if (existingField == null) { - SDField newField = new SDField(document, field.getName(), field.getDataType()); - newField.setIsExtraField(true); - extraSummaryFields.add(field.getName()); - document.addField(newField); - } else if (!existingField.isImportedField()) { - document.addField(existingField.asField()); - } - } else if (!docField.getDataType().equals(field.getDataType())) { - if (validate) - throw newProcessException(schema, field, "Summary field has conflicting type."); - } - } - private void addField(Schema schema, SDDocumentType document, Field field, boolean validate) { if (document.getField(field.getName()) != null && !(document.getField(field.getName()) == field)) { if (validate) @@ -114,6 +68,6 @@ public class AddExtraFieldsToDocument extends Processor { private boolean fieldIsExtraSummaryField(Schema schema, String name) { var field = schema.getConcreteField(name); - return field == null || extraSummaryFields.contains(name); + return field == null; } } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/DynamicSummaryTransformUtils.java b/config-model/src/main/java/com/yahoo/schema/processing/DynamicSummaryTransformUtils.java index 2e9c23dbf06..487aff81c68 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/DynamicSummaryTransformUtils.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/DynamicSummaryTransformUtils.java @@ -39,20 +39,6 @@ public class DynamicSummaryTransformUtils { return (type.equals(DataType.getArray(DataType.STRING))); } - /** - * Whether a summary field must be populated by the source field with the given type in an indexing script. - */ - public static boolean summaryFieldIsPopulatedBySourceField(DataType sourceFieldType) { - return false; - } - - /** - * Whether a summary field is required as an extra field in the document type. - */ - public static boolean summaryFieldIsRequiredInDocumentType(SummaryField summaryField) { - return summaryFieldIsPopulatedBySourceField(summaryField.getDataType()); - } - public static String getSource(SummaryField summaryField, Schema schema) { // Summary fields with the original supported type is always present in the document type. // However, if the source of that summary field is a single explicit source that exists in the schema we diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java index b5d1cf71809..51fb6b2b065 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java @@ -34,33 +34,25 @@ public class IndexingOutputs extends Processor { ScriptExpression script = field.getIndexingScript(); if (script == null) continue; - Set<String> summaryFields = new TreeSet<>(); - findSummaryTo(schema, field, summaryFields, summaryFields); - MyConverter converter = new MyConverter(schema, field, summaryFields, validate); + findSummaryTo(schema, field); + MyConverter converter = new MyConverter(schema, field, validate); field.setIndexingScript(schema.getName(), (ScriptExpression)converter.convert(script)); } } - public void findSummaryTo(Schema schema, SDField field, Set<String> dynamicSummary, Set<String> staticSummary) { + public void findSummaryTo(Schema schema, SDField field) { var summaryFields = schema.getSummaryFields(field); - if (summaryFields.isEmpty()) { - fillSummaryToFromField(field, dynamicSummary, staticSummary); - } else { - fillSummaryToFromSearch(schema, field, summaryFields, dynamicSummary, staticSummary); - } + fillSummaryToFromSearch(schema, field, summaryFields); } - private void fillSummaryToFromSearch(Schema schema, SDField field, List<SummaryField> summaryFields, - Set<String> dynamicSummary, Set<String> staticSummary) { + private void fillSummaryToFromSearch(Schema schema, SDField field, List<SummaryField> summaryFields) { for (SummaryField summaryField : summaryFields) { - fillSummaryToFromSummaryField(schema, field, summaryField, dynamicSummary, staticSummary); + fillSummaryToFromSummaryField(schema, field, summaryField); } } - private void fillSummaryToFromSummaryField(Schema schema, SDField field, SummaryField summaryField, - Set<String> dynamicSummary, Set<String> staticSummary) { + private void fillSummaryToFromSummaryField(Schema schema, SDField field, SummaryField summaryField) { SummaryTransform summaryTransform = summaryField.getTransform(); - String summaryName = summaryField.getName(); if (summaryTransform.isDynamic() && summaryField.getSourceCount() > 2) { // Avoid writing to summary fields that have more than a single input field, as that is handled by the // summary rewriter in the search core. @@ -68,30 +60,11 @@ public class IndexingOutputs extends Processor { } if (summaryTransform.isDynamic()) { DataType fieldType = field.getDataType(); - if (!DynamicSummaryTransformUtils.summaryFieldIsPopulatedBySourceField(fieldType)) { - if (!DynamicSummaryTransformUtils.isSupportedType(fieldType)) { - warn(schema, field, "Dynamic summaries are only supported for fields of type " + - "string and array<string>, ignoring summary field '" + summaryField.getName() + - "' for sd field '" + field.getName() + "' of type " + - fieldType.getName() + "."); - } - return; - } - dynamicSummary.add(summaryName); - } else if (summaryTransform != SummaryTransform.ATTRIBUTE && - summaryTransform != SummaryTransform.TOKENS && - summaryTransform != SummaryTransform.ATTRIBUTE_TOKENS) { - staticSummary.add(summaryName); - } - } - - private static void fillSummaryToFromField(SDField field, Set<String> dynamicSummary, Set<String> staticSummary) { - for (SummaryField summaryField : field.getSummaryFields().values()) { - String summaryName = summaryField.getName(); - if (summaryField.getTransform().isDynamic()) { - dynamicSummary.add(summaryName); - } else { - staticSummary.add(summaryName); + if (!DynamicSummaryTransformUtils.isSupportedType(fieldType)) { + warn(schema, field, "Dynamic summaries are only supported for fields of type " + + "string and array<string>, ignoring summary field '" + summaryField.getName() + + "' for sd field '" + field.getName() + "' of type " + + fieldType.getName() + "."); } } } @@ -100,13 +73,11 @@ public class IndexingOutputs extends Processor { final Schema schema; final Field field; - final Set<String> summaryFields; final boolean validate; - MyConverter(Schema schema, Field field, Set<String> summaryFields, boolean validate) { + MyConverter(Schema schema, Field field, boolean validate) { this.schema = schema; this.field = field; - this.summaryFields = summaryFields.isEmpty() ? Set.of(field.getName()) : summaryFields; this.validate = validate; } @@ -134,18 +105,7 @@ public class IndexingOutputs extends Processor { } else if (exp instanceof IndexExpression) { ret.add(new IndexExpression(field.getName())); } else if (exp instanceof SummaryExpression) { - for (String fieldName : summaryFields) { - ret.add(new SummaryExpression(fieldName)); - } - /* - * Write to summary field source. AddExtraFieldsToDocument processor adds the "copy" - * summary transform to summary fields without a corresponding explicitly declared - * document field (2023-11-01). Future vespa versions will stop adding document - * fields for those summary fields. - */ - if (!summaryFields.contains(field.getName())) { - ret.add(new SummaryExpression(field.getName())); - } + ret.add(new SummaryExpression(field.getName())); } else { throw new UnsupportedOperationException(exp.getClass().getName()); } diff --git a/config-model/src/test/derived/multiplesummaries/ilscripts.cfg b/config-model/src/test/derived/multiplesummaries/ilscripts.cfg index 14a7a62f4bb..0cdf921de25 100644 --- a/config-model/src/test/derived/multiplesummaries/ilscripts.cfg +++ b/config-model/src/test/derived/multiplesummaries/ilscripts.cfg @@ -17,7 +17,7 @@ ilscript[].content[] "clear_state | guard { input loc | to_pos | zcurve | attrib ilscript[].content[] "clear_state | guard { input a | summary a | attribute a; }" ilscript[].content[] "clear_state | guard { input adynamic | summary adynamic | attribute adynamic; }" ilscript[].content[] "clear_state | guard { input abolded | summary abolded | attribute abolded; }" -ilscript[].content[] "clear_state | guard { input b | summary anotherb | summary b; }" +ilscript[].content[] "clear_state | guard { input b | summary b; }" ilscript[].content[] "clear_state | guard { input c | summary c | attribute c; }" ilscript[].content[] "clear_state | guard { input d | summary d; }" ilscript[].content[] "clear_state | guard { input e | summary e; }" diff --git a/config-model/src/test/derived/multiplesummaries/index-info.cfg b/config-model/src/test/derived/multiplesummaries/index-info.cfg index 65ffd71d1ca..58759be9398 100644 --- a/config-model/src/test/derived/multiplesummaries/index-info.cfg +++ b/config-model/src/test/derived/multiplesummaries/index-info.cfg @@ -83,16 +83,6 @@ indexinfo[].command[].indexname "mytags" indexinfo[].command[].command "string" indexinfo[].command[].indexname "mytags" indexinfo[].command[].command "type Array<string>" -indexinfo[].command[].indexname "alltags" -indexinfo[].command[].command "multivalue" -indexinfo[].command[].indexname "alltags" -indexinfo[].command[].command "string" -indexinfo[].command[].indexname "alltags" -indexinfo[].command[].command "type Array<string>" -indexinfo[].command[].indexname "anotherb" -indexinfo[].command[].command "string" -indexinfo[].command[].indexname "anotherb" -indexinfo[].command[].command "type string" indexinfo[].command[].indexname "loc_pos.x" indexinfo[].command[].command "numerical" indexinfo[].command[].indexname "loc_pos.x" diff --git a/config-model/src/test/derived/streamingstruct/documentmanager.cfg b/config-model/src/test/derived/streamingstruct/documentmanager.cfg index f916cc26c36..0b503b04926 100644 --- a/config-model/src/test/derived/streamingstruct/documentmanager.cfg +++ b/config-model/src/test/derived/streamingstruct/documentmanager.cfg @@ -143,10 +143,4 @@ doctype[].structtype[].field[].type 10017 doctype[].structtype[].field[].name "g" doctype[].structtype[].field[].internalid 1091070635 doctype[].structtype[].field[].type 10012 -doctype[].structtype[].field[].name "snippet2" -doctype[].structtype[].field[].internalid 1812076817 -doctype[].structtype[].field[].type 10012 -doctype[].structtype[].field[].name "anothersummaryfield" -doctype[].structtype[].field[].internalid 1811005492 -doctype[].structtype[].field[].type 10012 diff --git a/config-model/src/test/java/com/yahoo/schema/SchemaImporterTestCase.java b/config-model/src/test/java/com/yahoo/schema/SchemaImporterTestCase.java index a11e743b4d2..6252fe62a1d 100644 --- a/config-model/src/test/java/com/yahoo/schema/SchemaImporterTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/SchemaImporterTestCase.java @@ -40,7 +40,7 @@ public class SchemaImporterTestCase extends AbstractSchemaTestCase { SDDocumentType document = schema.getDocument(); assertEquals("simple", document.getName()); - assertEquals(20, document.getFieldCount()); + assertEquals(19, document.getFieldCount()); SDField field; Attribute attribute; diff --git a/config-model/src/test/java/com/yahoo/schema/processing/AddExtraFieldsToDocumentTest.java b/config-model/src/test/java/com/yahoo/schema/processing/AddExtraFieldsToDocumentTest.java index aad6df62993..27e9aae04b5 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/AddExtraFieldsToDocumentTest.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/AddExtraFieldsToDocumentTest.java @@ -43,9 +43,9 @@ public class AddExtraFieldsToDocumentTest { assertSummary(schema, "foo", "my_a", SummaryTransform.COPY, "a"); assertSummary(schema, "foo", "my_b", SummaryTransform.COPY, "b"); assertSummary(schema, "foo", "my_c", SummaryTransform.ATTRIBUTE, "c"); - // Extra fields should still be created - assertField(schema, "my_a", DataType.STRING); - assertField(schema,"my_b", DataType.INT); + // Extra fields should not be created + assertNull(schema.getDocument().getField("my_a")); + assertNull(schema.getDocument().getField("my_b")); assertNull(schema.getDocument().getField("my_c")); } @@ -78,11 +78,4 @@ public class AddExtraFieldsToDocumentTest { assertEquals(transform, field.getTransform()); assertEquals(source, field.getSingleSource()); } - - private void assertField(Schema schema, String name, DataType type) { - var field = schema.getDocument().getField(name); - assertNotNull(field); - assertEquals(field.getDataType(), type); - } - } diff --git a/config-model/src/test/java/com/yahoo/schema/processing/ImplicitSchemaFieldsTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/ImplicitSchemaFieldsTestCase.java index ff7e43b2936..b9685d9a4ff 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/ImplicitSchemaFieldsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/ImplicitSchemaFieldsTestCase.java @@ -37,11 +37,11 @@ public class ImplicitSchemaFieldsTestCase extends AbstractSchemaTestCase { SDDocumentType docType = schema.getDocument(); assertNotNull(docType); assertNotNull(docType.getField("foo")); - assertNotNull(docType.getField("bar")); - assertNotNull(docType.getField("cox")); + assertNull(docType.getField("bar")); + assertNull(docType.getField("cox")); assertNotNull(docType.getField("mytags")); - assertNotNull(docType.getField("alltags")); - assertEquals(5, docType.getFieldCount()); + assertNull(docType.getField("alltags")); + assertEquals(2, docType.getFieldCount()); } @Test diff --git a/config-model/src/test/java/com/yahoo/schema/processing/ImplicitStructTypesTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/ImplicitStructTypesTestCase.java index 135a9fa295a..e4d1b5da29e 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/ImplicitStructTypesTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/ImplicitStructTypesTestCase.java @@ -34,9 +34,9 @@ public class ImplicitStructTypesTestCase extends AbstractSchemaTestCase { assertNotNull(docType); assertField(docType, "doc_str", DataType.STRING); - assertField(docType, "doc_str_sum", DataType.STRING); + assertNoField(docType, "doc_str_sum"); assertField(docType, "doc_uri", DataType.URI); - assertField(docType, "docsum_str", DataType.STRING); + assertNoField(docType, "docsum_str"); } @SuppressWarnings({ "UnusedDeclaration" }) @@ -60,6 +60,11 @@ public class ImplicitStructTypesTestCase extends AbstractSchemaTestCase { assertTrue(field instanceof SDField); } + private static void assertNoField(SDDocumentType docType, String fieldName) { + var field = getSecretField(docType, fieldName); + assertNull(field); + } + private static Field getSecretField(SDDocumentType docType, String fieldName) { for (Field field : docType.fieldSet()) { if (field.getName().equals(fieldName)) { diff --git a/config-model/src/test/java/com/yahoo/schema/processing/IndexingOutputsTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/IndexingOutputsTestCase.java index f56d2b21a2d..d5af996bd59 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/IndexingOutputsTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/IndexingOutputsTestCase.java @@ -78,7 +78,7 @@ public class IndexingOutputsTestCase { """; var builder = ApplicationBuilder.createFromString(sd); var schema = builder.getSchema(); - assertEquals("{ input foo | summary baz | summary bar; }", + assertEquals("{ input foo | summary bar; }", schema.getConcreteField("bar").getIndexingScript().toString()); } } diff --git a/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java index 310706cb0d1..de99d46b9ca 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/IndexingScriptRewriterTestCase.java @@ -64,7 +64,7 @@ public class IndexingScriptRewriterTestCase extends AbstractSchemaTestCase { field.addSummaryField(createStaticSummaryField(field, "test")); field.addSummaryField(createStaticSummaryField(field, "other")); field.addSummaryField(createDynamicSummaryField(field, "dyn2")); - assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" | summary other | " + + assertIndexingScript("{ input test | tokenize normalize stem:\"BEST\" | " + "summary test | index test; }", field); } @@ -113,7 +113,7 @@ public class IndexingScriptRewriterTestCase extends AbstractSchemaTestCase { "clear_state | guard { input chatter | tokenize normalize stem:\"BEST\" | index chatter; }", "clear_state | guard { input description | tokenize normalize stem:\"BEST\" | summary description | index description; }", "clear_state | guard { input exactemento_src | lowercase | tokenize normalize stem:\"BEST\" | index exactemento | summary exactemento; }", - "clear_state | guard { input longdesc | summary longdesc | summary longstat; }", + "clear_state | guard { input longdesc | summary longdesc; }", "clear_state | guard { input measurement | attribute measurement | summary measurement; }", "clear_state | guard { input measurement | to_array | attribute measurement_arr; }", "clear_state | guard { input popularity | attribute popularity; }", |