diff options
Diffstat (limited to 'config-model/src/main/java/com/yahoo/schema/processing')
71 files changed, 5586 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AddAttributeTransformToSummaryOfImportedFields.java b/config-model/src/main/java/com/yahoo/schema/processing/AddAttributeTransformToSummaryOfImportedFields.java new file mode 100644 index 00000000000..d96cd88f6be --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AddAttributeTransformToSummaryOfImportedFields.java @@ -0,0 +1,61 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableImportedComplexSDField; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.stream.Stream; + +/** + * Adds the attribute summary transform ({@link SummaryTransform#ATTRIBUTE} to all {@link SummaryField} having an imported + * field as source. + * + * @author bjorncs + */ +public class AddAttributeTransformToSummaryOfImportedFields extends Processor { + + public AddAttributeTransformToSummaryOfImportedFields(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + schema.allImportedFields() + .forEach(field -> setTransform(field)); + } + + private Stream<SummaryField> getSummaryFieldsForImportedField(ImmutableSDField importedField) { + return schema.getSummaryFields(importedField).stream(); + } + + private void setTransform(ImmutableSDField field) { + if (field instanceof ImmutableImportedComplexSDField) { + getSummaryFieldsForImportedField(field).forEach(AddAttributeTransformToSummaryOfImportedFields::setAttributeCombinerTransform); + } else { + getSummaryFieldsForImportedField(field).forEach(AddAttributeTransformToSummaryOfImportedFields::setAttributeTransform); + } + } + + private static void setAttributeTransform(SummaryField summaryField) { + if (summaryField.getTransform() == SummaryTransform.NONE) { + summaryField.setTransform(SummaryTransform.ATTRIBUTE); + } + } + + private static void setAttributeCombinerTransform(SummaryField summaryField) { + if (summaryField.getTransform() == SummaryTransform.MATCHED_ELEMENTS_FILTER) { + summaryField.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); + } else { + summaryField.setTransform(SummaryTransform.ATTRIBUTECOMBINER); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java b/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java new file mode 100644 index 00000000000..ca81301da73 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java @@ -0,0 +1,90 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * This processor creates a {@link com.yahoo.schema.document.SDDocumentType} for each {@link Schema} + * object which holds all the data that search + * associates with a document described in a search definition file. This includes all extra fields, summary fields and + * implicit fields. All non-indexed and non-summary fields are discarded. + */ +public class AddExtraFieldsToDocument extends Processor { + + AddExtraFieldsToDocument(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + SDDocumentType document = schema.getDocument(); + if (document != null) { + for (SDField field : schema.extraFieldList()) { + addSdField(schema, document, field, validate); + } + for (var docsum : schema.getSummaries().values()) { + for (var summaryField : docsum.getSummaryFields().values()) { + switch (summaryField.getTransform()) { + case NONE: + case BOLDED: + case DYNAMICBOLDED: + case DYNAMICTEASER: + case TEXTEXTRACTOR: + addSummaryField(schema, document, summaryField, validate); + break; + default: + // skip: generated from attribute or similar, + // so does not need to be included as an extra + // field in the document type + } + } + } + } + } + + private void addSdField(Schema schema, SDDocumentType document, SDField field, boolean validate) { + if (! field.hasIndex() && field.getAttributes().isEmpty()) { + return; + } + for (Attribute atr : field.getAttributes().values()) { + if (!atr.getName().equals(field.getName())) { + addField(schema, document, new SDField(document, atr.getName(), atr.getDataType()), validate); + } + } + addField(schema, document, field, validate); + } + + private void addSummaryField(Schema schema, SDDocumentType document, SummaryField field, boolean validate) { + Field docField = document.getField(field.getName()); + if (docField == null) { + ImmutableSDField existingField = schema.getField(field.getName()); + if (existingField == null) { + SDField newField = new SDField(document, field.getName(), field.getDataType()); + newField.setIsExtraField(true); + document.addField(newField); + } else if (!existingField.isImportedField()) { + document.addField(existingField.asField()); + } + } else if (!docField.getDataType().equals(field.getDataType())) { + if (validate) + throw newProcessException(schema, field, "Summary field has conflicting type."); + } + } + + private void addField(Schema schema, SDDocumentType document, Field field, boolean validate) { + if (document.getField(field.getName()) != null && !(document.getField(field.getName()) == field)) { + if (validate) + throw newProcessException(schema, field, "Field shadows another."); + } + document.addField(field); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AdjustPositionSummaryFields.java b/config-model/src/main/java/com/yahoo/schema/processing/AdjustPositionSummaryFields.java new file mode 100644 index 00000000000..6c2d62f37cb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AdjustPositionSummaryFields.java @@ -0,0 +1,135 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.PositionDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryField.Source; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/* + * Adjusts position summary fields by adding derived summary fields (.distance and .position) and setting summary + * transform and source. + */ +public class AdjustPositionSummaryFields extends Processor { + + public AdjustPositionSummaryFields(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + private boolean useV8GeoPositions = false; + + @Override + public void process(boolean validate, boolean documentsOnly, ModelContext.Properties properties) { + this.useV8GeoPositions = properties.featureFlags().useV8GeoPositions(); + process(validate, documentsOnly); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (DocumentSummary summary : schema.getSummaries().values()) { + scanSummary(summary); + } + } + + private void scanSummary(DocumentSummary summary) { + for (SummaryField summaryField : summary.getSummaryFields().values()) { + if ( ! GeoPos.isAnyPos(summaryField.getDataType())) continue; + + String originalSource = summaryField.getSingleSource(); + if (originalSource.indexOf('.') == -1) { // Eliminate summary fields with pos.x or pos.y as source + ImmutableSDField sourceField = schema.getField(originalSource); + if (sourceField != null) { + String zCurve = null; + if (sourceField.getDataType().equals(summaryField.getDataType())) { + zCurve = PositionDataType.getZCurveFieldName(originalSource); + } else if (sourceField.getDataType().equals(makeZCurveDataType(summaryField.getDataType())) && + hasZCurveSuffix(originalSource)) { + zCurve = originalSource; + } + if (zCurve != null) { + if (hasPositionAttribute(zCurve)) { + Source source = new Source(zCurve); + adjustPositionField(summary, summaryField, source); + } else if (sourceField.isImportedField() || !summaryField.getName().equals(originalSource)) { + fail(summaryField, "No position attribute '" + zCurve + "'"); + } + } + } + } + } + } + + private void adjustPositionField(DocumentSummary summary, SummaryField summaryField, Source source) { + summaryField.setTransform(SummaryTransform.GEOPOS); + summaryField.getSources().clear(); + summaryField.addSource(source); + ensureSummaryField(summary, + PositionDataType.getPositionSummaryFieldName(summaryField.getName()), + DataType.getArray(DataType.STRING), + source, + SummaryTransform.POSITIONS); + ensureSummaryField(summary, + PositionDataType.getDistanceSummaryFieldName(summaryField.getName()), + DataType.INT, + source, + SummaryTransform.DISTANCE); + } + + private void ensureSummaryField(DocumentSummary summary, String fieldName, DataType dataType, Source source, SummaryTransform transform) { + SummaryField oldField = schema.getSummaryField(fieldName); + if (oldField == null) { + if (useV8GeoPositions) return; + SummaryField newField = new SummaryField(fieldName, dataType, transform); + newField.addSource(source); + summary.add(newField); + return; + } + if (!oldField.getDataType().equals(dataType)) { + fail(oldField, "exists with type '" + oldField.getDataType().toString() + "', should be of type '" + dataType.toString() + "'"); + } + if (oldField.getTransform() != transform) { + fail(oldField, "has summary transform '" + oldField.getTransform().toString() + "', should have transform '" + transform.toString() + "'"); + } + if (oldField.getSourceCount() != 1 || !oldField.getSingleSource().equals(source.getName())) { + fail(oldField, "has source '" + oldField.getSources().toString() + "', should have source '" + source + "'"); + } + if (useV8GeoPositions) return; + summary.add(oldField); + } + + private boolean hasPositionAttribute(String name) { + Attribute attribute = schema.getAttribute(name); + if (attribute == null) { + ImmutableSDField field = schema.getField(name); + if (field != null && field.isImportedField()) { + attribute = field.getAttribute(); + } + } + return attribute != null && attribute.isPosition(); + } + + private static boolean hasZCurveSuffix(String name) { + String suffix = PositionDataType.getZCurveFieldName(""); + return name.length() > suffix.length() && name.substring(name.length() - suffix.length()).equals(suffix); + } + + private static DataType makeZCurveDataType(DataType dataType) { + return dataType instanceof ArrayDataType ? DataType.getArray(DataType.LONG) : DataType.LONG; + } + + private void fail(SummaryField summaryField, String msg) { + throw newProcessException(schema.getName(), summaryField.getName(), msg); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AttributeProperties.java b/config-model/src/main/java/com/yahoo/schema/processing/AttributeProperties.java new file mode 100644 index 00000000000..6c7dbaecbfb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AttributeProperties.java @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Checks that attribute properties only are set for attributes that have data (are created by an indexing statement). + * + * @author hmusum + */ +public class AttributeProperties extends Processor { + + public AttributeProperties(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (ImmutableSDField field : schema.allConcreteFields()) { + String fieldName = field.getName(); + + // For each attribute, check if the attribute has been created + // by an indexing statement. + for (Attribute attribute : field.getAttributes().values()) { + if (attributeCreated(field, attribute.getName())) { + continue; + } + // Check other fields or statements that may have created this attribute. + boolean created = false; + for (SDField f : schema.allConcreteFields()) { + // Checking against the field we are looking at + if (!f.getName().equals(fieldName)) { + if (attributeCreated(f, attribute.getName())) { + created = true; + break; + } + } + } + if (validate && !created) { + throw new IllegalArgumentException("Attribute '" + attribute.getName() + "' in field '" + + field.getName() + "' is not created by the indexing statement"); + } + } + } + } + + /** + * Checks if the attribute has been created bye an indexing statement in this field. + * + * @param field a searchdefinition field + * @param attributeName name of the attribute + * @return true if the attribute has been created by this field, else false + */ + static boolean attributeCreated(ImmutableSDField field, String attributeName) { + if ( ! field.doesAttributing()) { + return false; + } + for (Attribute attribute : field.getAttributes().values()) { + if (attribute.getName().equals(attributeName)) { + return true; + } + } + return false; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java b/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java new file mode 100644 index 00000000000..415f23f2786 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java @@ -0,0 +1,57 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.MatchType; +import com.yahoo.document.NumericDataType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Fields that derive to attribute(s) and no indices should use the WORD indexing form, + * in a feeble attempt to match the most peoples expectations as closely as possible. + * + * @author Vegard Havdal + */ +public class AttributesImplicitWord extends Processor { + + public AttributesImplicitWord(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (ImmutableSDField field : schema.allConcreteFields()) { + processFieldRecursive(field); + } + } + + private void processFieldRecursive(ImmutableSDField field) { + processField(field); + for (ImmutableSDField structField : field.getStructFields()) { + processFieldRecursive(structField); + } + } + + private void processField(ImmutableSDField field) { + if (fieldImplicitlyWordMatch(field)) { + field.getMatching().setType(MatchType.WORD); + } + } + + private boolean fieldImplicitlyWordMatch(ImmutableSDField field) { + // numeric types should not trigger exact-match query parsing + DataType dt = field.getDataType().getPrimitiveType(); + if (dt != null && dt instanceof NumericDataType) { + return false; + } + return (! field.hasIndex() + && !field.getAttributes().isEmpty() + && field.getIndices().isEmpty() + && !field.getMatching().isTypeUserSet()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/Bolding.java b/config-model/src/main/java/com/yahoo/schema/processing/Bolding.java new file mode 100644 index 00000000000..53a3d462d54 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/Bolding.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Checks that bolding or dynamic summary is turned on only for text fields. Throws exception if it is turned on for any + * other fields (otherwise will cause indexing failure) + * + * @author hmusum + */ +public class Bolding extends Processor { + + public Bolding(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + for (ImmutableSDField field : schema.allConcreteFields()) { + for (SummaryField summary : field.getSummaryFields().values()) { + if (summary.getTransform().isBolded() && + !((summary.getDataType() == DataType.STRING) || (summary.getDataType() == DataType.URI))) + { + throw new IllegalArgumentException("'bolding: on' for non-text field " + + "'" + field.getName() + "'" + + " (" + summary.getDataType() + ")" + + " is not allowed"); + } else if (summary.getTransform().isDynamic() && + !((summary.getDataType() == DataType.STRING) || (summary.getDataType() == DataType.URI))) + { + throw new IllegalArgumentException("'summary: dynamic' for non-text field " + + "'" + field.getName() + "'" + + " (" + summary.getDataType() + ")" + + " is not allowed"); + } + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/BoolAttributeValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/BoolAttributeValidator.java new file mode 100644 index 00000000000..bdb1eed4b10 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/BoolAttributeValidator.java @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Validates attribute fields using bool type, ensuring the collection type is supported. + * + * Currently, only the single value bool type is supported. + * + * @author geirst + */ +public class BoolAttributeValidator extends Processor { + + public BoolAttributeValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (var field : schema.allConcreteFields()) { + var attribute = field.getAttribute(); + if (attribute == null) { + continue; + } + if (attribute.getType().equals(Attribute.Type.BOOL) && + !attribute.getCollectionType().equals(Attribute.CollectionType.SINGLE)) { + fail(schema, field, "Only single value bool attribute fields are supported"); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/BuiltInFieldSets.java b/config-model/src/main/java/com/yahoo/schema/processing/BuiltInFieldSets.java new file mode 100644 index 00000000000..514cbf225fd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/BuiltInFieldSets.java @@ -0,0 +1,52 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DocumentType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Adds field sets for 1) fields defined inside document type 2) fields inside search but outside document + * + * @author Vegard Havdal + */ +public class BuiltInFieldSets extends Processor { + + public static final String SEARCH_FIELDSET_NAME = "[search]"; // Public due to oddities in position handling. + public static final String INTERNAL_FIELDSET_NAME = "[internal]"; // This one populated from misc places + + public BuiltInFieldSets(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + addDocumentFieldSet(); + addSearchFieldSet(); + // "Hook" the field sets on search onto the document types, since we will include them + // on the document configs + schema.getDocument().setFieldSets(schema.fieldSets()); + } + + private void addSearchFieldSet() { + for (SDField searchField : schema.extraFieldList()) { + schema.fieldSets().addBuiltInFieldSetItem(SEARCH_FIELDSET_NAME, searchField.getName()); + } + } + + private void addDocumentFieldSet() { + for (Field docField : schema.getDocument().fieldSet()) { + if (docField instanceof SDField && ((SDField) docField).isExtraField()) { + continue; // skip + } + schema.fieldSets().addBuiltInFieldSetItem(DocumentType.DOCUMENT, docField.getName()); + } + } + + + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/CreatePositionZCurve.java b/config-model/src/main/java/com/yahoo/schema/processing/CreatePositionZCurve.java new file mode 100644 index 00000000000..5bb5079fab6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/CreatePositionZCurve.java @@ -0,0 +1,216 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.PositionDataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.logging.Level; + +/** + * Adds a "fieldName_zcurve" long attribute and "fieldName.distance" and "FieldName.position" summary fields to all position type fields. + * + * @author bratseth + */ +public class CreatePositionZCurve extends Processor { + + private final SDDocumentType repo; + + public CreatePositionZCurve(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + this.repo = schema.getDocument(); + } + + private boolean useV8GeoPositions = false; + + @Override + public void process(boolean validate, boolean documentsOnly, ModelContext.Properties properties) { + this.useV8GeoPositions = properties.featureFlags().useV8GeoPositions(); + process(validate, documentsOnly); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + DataType fieldType = field.getDataType(); + if ( ! isSupportedPositionType(fieldType)) continue; + + if (validate && field.doesIndexing()) { + fail(schema, field, "Indexing of data type '" + fieldType.getName() + "' is not supported, " + + "replace 'index' statement with 'attribute'."); + } + + if ( ! field.doesAttributing()) continue; + + boolean doesSummary = field.doesSummarying(); + + String fieldName = field.getName(); + field.getAttributes().remove(fieldName); + + String zName = PositionDataType.getZCurveFieldName(fieldName); + SDField zCurveField = createZCurveField(field, zName, validate); + schema.addExtraField(zCurveField); + schema.fieldSets().addBuiltInFieldSetItem(BuiltInFieldSets.INTERNAL_FIELDSET_NAME, zCurveField.getName()); + + // configure summary + Collection<String> summaryTo = removeSummaryTo(field); + if (! useV8GeoPositions) { + ensureCompatibleSummary(field, zName, + PositionDataType.getPositionSummaryFieldName(fieldName), + DataType.getArray(DataType.STRING), // will become "xmlstring" + SummaryTransform.POSITIONS, summaryTo, validate); + ensureCompatibleSummary(field, zName, + PositionDataType.getDistanceSummaryFieldName(fieldName), + DataType.INT, + SummaryTransform.DISTANCE, summaryTo, validate); + } + // clear indexing script + field.setIndexingScript(null); + SDField posX = field.getStructField(PositionDataType.FIELD_X); + if (posX != null) { + posX.setIndexingScript(null); + } + SDField posY = field.getStructField(PositionDataType.FIELD_Y); + if (posY != null) { + posY.setIndexingScript(null); + } + if (doesSummary) ensureCompatibleSummary(field, zName, + field.getName(), + field.getDataType(), + SummaryTransform.GEOPOS, summaryTo, validate); + } + } + + private SDField createZCurveField(SDField inputField, String fieldName, boolean validate) { + if (validate && schema.getConcreteField(fieldName) != null || schema.getAttribute(fieldName) != null) { + throw newProcessException(schema, null, "Incompatible position attribute '" + fieldName + + "' already created."); + } + boolean isArray = inputField.getDataType() instanceof ArrayDataType; + SDField field = new SDField(repo, fieldName, isArray ? DataType.getArray(DataType.LONG) : DataType.LONG); + Attribute attribute = new Attribute(fieldName, Attribute.Type.LONG, isArray ? Attribute.CollectionType.ARRAY : + Attribute.CollectionType.SINGLE); + attribute.setPosition(true); + attribute.setFastSearch(true); + field.addAttribute(attribute); + + ScriptExpression script = inputField.getIndexingScript(); + script = (ScriptExpression)new RemoveSummary(inputField.getName()).convert(script); + script = (ScriptExpression)new PerformZCurve(field, fieldName).convert(script); + field.setIndexingScript(script); + return field; + } + + private void ensureCompatibleSummary(SDField field, String sourceName, String summaryName, DataType summaryType, + SummaryTransform summaryTransform, Collection<String> summaryTo, boolean validate) { + SummaryField summary = schema.getSummaryField(summaryName); + if (summary == null) { + summary = new SummaryField(summaryName, summaryType, summaryTransform); + summary.addDestination("default"); + summary.addDestinations(summaryTo); + field.addSummaryField(summary); + } else if (!summary.getDataType().equals(summaryType)) { + if (validate) + fail(schema, field, "Incompatible summary field '" + summaryName + "' type " + summary.getDataType() + " already created."); + } else if (summary.getTransform() == SummaryTransform.NONE) { + summary.setTransform(summaryTransform); + summary.addDestination("default"); + summary.addDestinations(summaryTo); + } else if (summary.getTransform() != summaryTransform) { + deployLogger.logApplicationPackage(Level.WARNING, "Summary field " + summaryName + " has wrong transform: " + summary.getTransform()); + return; + } + SummaryField.Source source = new SummaryField.Source(sourceName); + summary.getSources().clear(); + summary.addSource(source); + } + + private Set<String> removeSummaryTo(SDField field) { + Set<String> summaryTo = new HashSet<>(); + Collection<SummaryField> summaryFields = field.getSummaryFields().values(); + for (SummaryField summary : summaryFields) { + summaryTo.addAll(summary.getDestinations()); + } + field.removeSummaryFields(); + return summaryTo; + } + + private static boolean isSupportedPositionType(DataType dataType) { + return GeoPos.isAnyPos(dataType); + } + + private static class RemoveSummary extends ExpressionConverter { + + final String find; + + RemoveSummary(String find) { + this.find = find; + } + + @Override + protected boolean shouldConvert(Expression exp) { + if (!(exp instanceof SummaryExpression)) { + return false; + } + String fieldName = ((SummaryExpression)exp).getFieldName(); + return fieldName == null || fieldName.equals(find); + } + + @Override + protected Expression doConvert(Expression exp) { + return null; + } + } + + private static class PerformZCurve extends ExpressionConverter { + + final String find; + final String replace; + final boolean isArray; + + PerformZCurve(SDField find, String replace) { + this.find = find.getName(); + this.replace = replace; + this.isArray = find.getDataType() instanceof ArrayDataType; + } + + @Override + protected boolean shouldConvert(Expression exp) { + if (!(exp instanceof AttributeExpression)) { + return false; + } + String fieldName = ((AttributeExpression)exp).getFieldName(); + return fieldName == null || fieldName.equals(find); + } + + @Override + protected Expression doConvert(Expression exp) { + return new StatementExpression( + isArray ? new ForEachExpression(new ZCurveExpression()) : + new ZCurveExpression(), new AttributeExpression(replace)); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/DictionaryProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/DictionaryProcessor.java new file mode 100644 index 00000000000..3209fd1703d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/DictionaryProcessor.java @@ -0,0 +1,54 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.NumericDataType; +import com.yahoo.document.PrimitiveDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.Dictionary; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Propagates dictionary settings from field level to attribute level. + * Only applies to numeric fields with fast-search enabled. + * + * @author baldersheim + */ +public class DictionaryProcessor extends Processor { + public DictionaryProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + Attribute attribute = field.getAttribute(); + if (attribute == null) continue; + attribute.setCase(field.getMatching().getCase()); + Dictionary dictionary = field.getDictionary(); + if (dictionary == null) continue; + if (attribute.getDataType().getPrimitiveType() instanceof NumericDataType ) { + if (attribute.isFastSearch()) { + attribute.setDictionary(dictionary); + } else { + fail(schema, field, "You must specify 'attribute:fast-search' to allow dictionary control"); + } + } else if (attribute.getDataType().getPrimitiveType() == PrimitiveDataType.STRING) { + attribute.setDictionary(dictionary); + if (dictionary.getType() == Dictionary.Type.HASH) { + if (dictionary.getMatch() != Case.CASED) { + fail(schema, field, "hash dictionary require cased match"); + } + } + if (! dictionary.getMatch().equals(attribute.getCase())) { + fail(schema, field, "Dictionary casing '" + dictionary.getMatch() + "' does not match field match casing '" + attribute.getCase() + "'"); + } + } else { + fail(schema, field, "You can only specify 'dictionary:' for numeric or string fields"); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/DisallowComplexMapAndWsetKeyTypes.java b/config-model/src/main/java/com/yahoo/schema/processing/DisallowComplexMapAndWsetKeyTypes.java new file mode 100644 index 00000000000..a5b4ca9a71f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/DisallowComplexMapAndWsetKeyTypes.java @@ -0,0 +1,57 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.Field; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.document.MapDataType; +import com.yahoo.document.PrimitiveDataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Non-primitive key types for map and weighted set forbidden (though OK in document model) + * + * @author Vegard Havdal + */ +public class DisallowComplexMapAndWsetKeyTypes extends Processor { + + public DisallowComplexMapAndWsetKeyTypes(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + // TODO also traverse struct types to search for bad map or wset types. + // Do this after document manager is fixed, do not start using the static stuff on SDDocumentTypes any more. + for (SDField field : schema.allConcreteFields()) { + checkFieldType(field, field.getDataType()); + } + } + + private void checkFieldType(Field field, DataType dataType) { + if (dataType instanceof ArrayDataType) { + DataType nestedType = ((ArrayDataType) dataType).getNestedType(); + checkFieldType(field, nestedType); + } else if (dataType instanceof WeightedSetDataType) { + DataType nestedType = ((WeightedSetDataType) dataType).getNestedType(); + if ( ! (nestedType instanceof PrimitiveDataType)) { + fail(schema, field, "Weighted set must have a primitive key type."); + } + } else if (dataType instanceof MapDataType) { + DataType keyType = ((MapDataType) dataType).getKeyType(); + if ( ! (keyType instanceof PrimitiveDataType)) { + fail(schema, field, "Map key type must be a primitive type."); + } + checkFieldType(field, ((MapDataType) dataType).getValueType()); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/DiversitySettingsValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/DiversitySettingsValidator.java new file mode 100644 index 00000000000..0400292c7e5 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/DiversitySettingsValidator.java @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * @author baldersheim + */ +public class DiversitySettingsValidator extends Processor { + + public DiversitySettingsValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + if (documentsOnly) return; + + for (RankProfile rankProfile : rankProfileRegistry.rankProfilesOf(schema)) { + if (rankProfile.getMatchPhaseSettings() != null && rankProfile.getMatchPhaseSettings().getDiversity() != null) { + validate(rankProfile, rankProfile.getMatchPhaseSettings().getDiversity()); + } + } + } + private void validate(RankProfile rankProfile, RankProfile.DiversitySettings settings) { + String attributeName = settings.getAttribute(); + new AttributeValidator(schema.getName(), rankProfile.name(), + schema.getAttribute(attributeName), attributeName).validate(); + } + + private static class AttributeValidator extends MatchPhaseSettingsValidator.AttributeValidator { + + public AttributeValidator(String searchName, String rankProfileName, Attribute attribute, String attributeName) { + super(searchName, rankProfileName, attribute, attributeName); + } + + protected void validateThatAttributeIsSingleAndNotPredicate() { + if ( ! attribute.getCollectionType().equals(Attribute.CollectionType.SINGLE) || + attribute.getType().equals(Attribute.Type.PREDICATE)) + { + failValidation("must be single value numeric, or enumerated attribute, but it is '" + + attribute.getDataType().getName() + "'"); + } + } + + @Override + public void validate() { + validateThatAttributeExists(); + validateThatAttributeIsSingleAndNotPredicate(); + } + + @Override + public String getValidationType() { + return "diversity"; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java new file mode 100644 index 00000000000..aa2d8293cac --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java @@ -0,0 +1,109 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.indexinglanguage.ExpressionSearcher; +import com.yahoo.vespa.indexinglanguage.expressions.ExactExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * The implementation of exact matching + * + * @author bratseth + */ +public class ExactMatch extends Processor { + + public static final String DEFAULT_EXACT_TERMINATOR = "@@"; + + ExactMatch(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + processField(field, schema); + } + } + + private void processField(SDField field, Schema schema) { + MatchType matching = field.getMatching().getType(); + if (matching.equals(MatchType.EXACT) || matching.equals(MatchType.WORD)) { + implementExactMatch(field, schema); + } else if (field.getMatching().getExactMatchTerminator() != null) { + warn(schema, field, "exact-terminator requires 'exact' matching to have any effect."); + } + for (var structField : field.getStructFields()) { + processField(structField, schema); + } + } + + private void implementExactMatch(SDField field, Schema schema) { + field.setStemming(Stemming.NONE); + field.getNormalizing().inferLowercase(); + + if (field.getMatching().getType().equals(MatchType.WORD)) { + field.addQueryCommand("word"); + } else { // exact + String exactTerminator = DEFAULT_EXACT_TERMINATOR; + if (field.getMatching().getExactMatchTerminator() != null + && ! field.getMatching().getExactMatchTerminator().equals("")) { + exactTerminator = field.getMatching().getExactMatchTerminator(); + } else { + info(schema, field, + "With 'exact' matching, an exact-terminator is needed," + + " using default value '" + exactTerminator +"' as terminator"); + } + field.addQueryCommand("exact " + exactTerminator); + + // The following part illustrates how nice it would have been with canonical representation of indices + if (field.doesIndexing()) { + exactMatchSettingsForField(field); + } + } + ScriptExpression script = field.getIndexingScript(); + if (new ExpressionSearcher<>(IndexExpression.class).containedIn(script)) { + field.setIndexingScript((ScriptExpression)new MyProvider(schema).convert(field.getIndexingScript())); + } + } + + private void exactMatchSettingsForField(SDField field) { + field.getRanking().setFilter(true); + } + + private static class MyProvider extends TypedTransformProvider { + + MyProvider(Schema schema) { + super(ExactExpression.class, schema); + } + + @Override + protected boolean requiresTransform(Expression exp, DataType fieldType) { + return exp instanceof OutputExpression; + } + + @Override + protected Expression newTransform(DataType fieldType) { + Expression exp = new ExactExpression(); + if (fieldType instanceof CollectionDataType) { + exp = new ForEachExpression(exp); + } + return exp; + } + + } + +} + diff --git a/config-model/src/main/java/com/yahoo/schema/processing/FastAccessValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/FastAccessValidator.java new file mode 100644 index 00000000000..224000e6b64 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/FastAccessValidator.java @@ -0,0 +1,54 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.stream.Collectors; + +/** + * Validates the use of the fast-access property. + * + * @author bjorncs + */ +public class FastAccessValidator extends Processor { + + public FastAccessValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + String invalidAttributes = schema.allFields() + .flatMap(field -> field.getAttributes().values().stream()) + .filter(FastAccessValidator::isIncompatibleAttribute) + .map(Attribute::getName) + .collect(Collectors.joining(", ")); + if ( ! invalidAttributes.isEmpty()) { + throw new IllegalArgumentException( + "For " + schema + ": The following attributes have a type that is incompatible with fast-access: " + + invalidAttributes + ". Predicate, tensor and reference attributes are incompatible with fast-access."); + } + } + + private static boolean isIncompatibleAttribute(Attribute attribute) { + return attribute.isFastAccess() && isTypeIncompatibleWithFastAccess(attribute.getType()); + } + + private static boolean isTypeIncompatibleWithFastAccess(Attribute.Type type) { + switch (type) { + case PREDICATE: + case TENSOR: + case REFERENCE: + return true; + default: + return false; + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java b/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java new file mode 100644 index 00000000000..f0c59ece1bf --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java @@ -0,0 +1,107 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.FieldSet; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.NormalizeLevel; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Computes the right "index commands" for each fieldset in a search definition. + * + * @author vegardh + * @author bratseth + */ +// See also IndexInfo.addFieldSetCommands, which does more of this in a complicated way. +// That should be moved here, and done in the way the match setting is done below +// (this requires adding normalizing and stemming settings to FieldSet). +public class FieldSetSettings extends Processor { + + public FieldSetSettings(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (FieldSet fieldSet : schema.fieldSets().userFieldSets().values()) { + if (validate) + checkFieldNames(schema, fieldSet); + checkMatching(schema, fieldSet); + checkNormalization(schema, fieldSet); + checkStemming(schema, fieldSet); + } + } + + private void checkFieldNames(Schema schema, FieldSet fieldSet) { + for (String field : fieldSet.getFieldNames()) { + if (schema.getField(field) == null) + throw new IllegalArgumentException("For " + schema + ": Field '" + field + "' in " + + fieldSet + " does not exist."); + } + } + + private void checkMatching(Schema schema, FieldSet fieldSet) { + Matching matching = fieldSet.getMatching(); + for (String fieldName : fieldSet.getFieldNames()) { + ImmutableSDField field = schema.getField(fieldName); + Matching fieldMatching = field.getMatching(); + if (matching == null) { + matching = fieldMatching; + } else { + if ( ! matching.equals(fieldMatching)) { + warn(schema, field.asField(), + "The matching settings for the fields in " + fieldSet + " are inconsistent " + + "(explicitly or because of field type). This may lead to recall and ranking issues."); + return; + } + } + } + fieldSet.setMatching(matching); // Assign the uniquely determined matching to the field set + } + + private void checkNormalization(Schema schema, FieldSet fieldSet) { + NormalizeLevel.Level normalizing = null; + for (String fieldName : fieldSet.getFieldNames()) { + ImmutableSDField field = schema.getField(fieldName); + NormalizeLevel.Level fieldNorm = field.getNormalizing().getLevel(); + if (normalizing == null) { + normalizing = fieldNorm; + } else { + if ( ! normalizing.equals(fieldNorm)) { + warn(schema, field.asField(), + "The normalization settings for the fields in " + fieldSet + " are inconsistent " + + "(explicitly or because of field type). This may lead to recall and ranking issues."); + return; + } + } + } + } + + private void checkStemming(Schema schema, FieldSet fieldSet) { + Stemming stemming = null; + for (String fieldName : fieldSet.getFieldNames()) { + ImmutableSDField field = schema.getField(fieldName); + Stemming fieldStemming = field.getStemming(); + if (stemming == null) { + stemming = fieldStemming; + } else { + if ( ! stemming.equals(fieldStemming)) { + warn(schema, field.asField(), + "The stemming settings for the fields in the fieldset '"+fieldSet.getName()+ + "' are inconsistent (explicitly or because of field type). " + + "This may lead to recall and ranking issues."); + return; + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/FilterFieldNames.java b/config-model/src/main/java/com/yahoo/schema/processing/FilterFieldNames.java new file mode 100644 index 00000000000..28973c82d42 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/FilterFieldNames.java @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.RankProfile; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.logging.Level; + +/** + * Takes the fields and indexes that are of type rank filter, and stores those names on all rank profiles + * + * @author Vegard Havdal + */ +public class FilterFieldNames extends Processor { + + public FilterFieldNames(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + + for (SDField f : schema.allConcreteFields()) { + if (f.getRanking().isFilter()) { + filterField(f.getName()); + } + } + + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + Set<String> filterFields = new LinkedHashSet<>(); + findFilterFields(schema, profile, filterFields); + for (Iterator<String> itr = filterFields.iterator(); itr.hasNext(); ) { + String fieldName = itr.next(); + profile.filterFields().add(fieldName); + profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.RANKTYPE, RankType.EMPTY); + } + } + } + + private void filterField(String f) { + for (RankProfile rp : rankProfileRegistry.rankProfilesOf(schema)) { + rp.filterFields().add(f); + } + } + + private void findFilterFields(Schema schema, RankProfile profile, Set<String> filterFields) { + for (Iterator<RankProfile.RankSetting> itr = profile.declaredRankSettingIterator(); itr.hasNext(); ) { + RankProfile.RankSetting setting = itr.next(); + if (setting.getType().equals(RankProfile.RankSetting.Type.PREFERBITVECTOR) && ((Boolean)setting.getValue())) + { + String fieldName = setting.getFieldName(); + if (schema.getConcreteField(fieldName) != null) { + if ( ! profile.filterFields().contains(fieldName)) { + filterFields.add(fieldName); + } + } else { + deployLogger.logApplicationPackage(Level.WARNING, "For rank profile '" + profile.name() + "': Cannot apply rank filter setting to unexisting field '" + fieldName + "'"); + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaries.java b/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaries.java new file mode 100644 index 00000000000..4080e37003f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaries.java @@ -0,0 +1,232 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import java.util.logging.Level; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.document.PositionDataType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isComplexFieldWithOnlyStructFieldAttributes; + +/** + * Makes implicitly defined summaries into explicit summaries + * + * @author bratseth + */ +public class ImplicitSummaries extends Processor { + + public ImplicitSummaries(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + DocumentSummary defaultSummary = schema.getSummariesInThis().get("default"); + if (defaultSummary == null) { + defaultSummary = new DocumentSummary("default", schema); + defaultSummary.setFromDisk(true); // As we add documentid to this + schema.addSummary(defaultSummary); + } + + for (SDField field : schema.allConcreteFields()) { + collectSummaries(field, schema, validate); + } + for (DocumentSummary documentSummary : schema.getSummaries().values()) { + documentSummary.purgeImplicits(); + } + } + + private void addSummaryFieldSources(SummaryField summaryField, SDField sdField) { + sdField.addSummaryFieldSources(summaryField); + } + + private void collectSummaries(SDField field, Schema schema, boolean validate) { + SummaryField addedSummaryField = null; + + // Implicit + String fieldName = field.getName(); + SummaryField fieldSummaryField = field.getSummaryField(fieldName); + if (fieldSummaryField == null && field.doesSummarying()) { + fieldSummaryField = new SummaryField(fieldName, field.getDataType()); + fieldSummaryField.setImplicit(true); + addSummaryFieldSources(fieldSummaryField, field); + fieldSummaryField.addDestination("default"); + field.addSummaryField(fieldSummaryField); + addedSummaryField = fieldSummaryField; + } + if (fieldSummaryField != null) { + for (String dest : fieldSummaryField.getDestinations()) { + DocumentSummary summary = schema.getSummariesInThis().get(dest); + if (summary != null) { + summary.add(fieldSummaryField); + } + } + } + + // Attribute prefetch + for (Attribute attribute : field.getAttributes().values()) { + if (attribute.getName().equals(fieldName)) { + if (addedSummaryField != null) { + addedSummaryField.setTransform(SummaryTransform.ATTRIBUTE); + } + if (attribute.isPrefetch()) { + addPrefetchAttribute(attribute, field, schema); + } + } + } + + if (addedSummaryField != null && isComplexFieldWithOnlyStructFieldAttributes(field)) { + addedSummaryField.setTransform(SummaryTransform.ATTRIBUTECOMBINER); + } + + // Position attributes + if (field.doesSummarying()) { + for (Attribute attribute : field.getAttributes().values()) { + if ( ! attribute.isPosition()) continue; + var distField = field.getSummaryField(PositionDataType.getDistanceSummaryFieldName(fieldName)); + if (distField != null) { + DocumentSummary attributePrefetchSummary = getOrCreateAttributePrefetchSummary(schema); + attributePrefetchSummary.add(distField); + } + var posField = field.getSummaryField(PositionDataType.getPositionSummaryFieldName(fieldName)); + if (posField != null) { + DocumentSummary attributePrefetchSummary = getOrCreateAttributePrefetchSummary(schema); + attributePrefetchSummary.add(posField); + } + } + } + + // Explicits + for (SummaryField summaryField : field.getSummaryFields().values()) { + // Make sure we fetch from attribute here too + Attribute attribute = field.getAttributes().get(fieldName); + if (attribute != null && summaryField.getTransform() == SummaryTransform.NONE) { + summaryField.setTransform(SummaryTransform.ATTRIBUTE); + } + if (isValid(summaryField, schema, validate)) { + addToDestinations(summaryField, schema); + } + } + + } + + private DocumentSummary getOrCreateAttributePrefetchSummary(Schema schema) { + DocumentSummary summary = schema.getSummariesInThis().get("attributeprefetch"); + if (summary == null) { + summary = new DocumentSummary("attributeprefetch", schema); + schema.addSummary(summary); + } + return summary; + } + + + private void addPrefetchAttribute(Attribute attribute, SDField field, Schema schema) { + if (attribute.getPrefetchValue() == null) { // Prefetch by default - unless any summary makes this dynamic + // Check if there is an implicit dynamic definition + SummaryField fieldSummaryField = field.getSummaryField(attribute.getName()); + if (fieldSummaryField != null && fieldSummaryField.getTransform().isDynamic()) return; + + // Check if an explicit class makes it dynamic (first is enough, as all must be the same, checked later) + SummaryField explicitSummaryField = schema.getExplicitSummaryField(attribute.getName()); + if (explicitSummaryField != null && explicitSummaryField.getTransform().isDynamic()) return; + } + + DocumentSummary summary = getOrCreateAttributePrefetchSummary(schema); + SummaryField attributeSummaryField = new SummaryField(attribute.getName(), attribute.getDataType()); + attributeSummaryField.addSource(attribute.getName()); + attributeSummaryField.addDestination("attributeprefetch"); + attributeSummaryField.setTransform(SummaryTransform.ATTRIBUTE); + summary.add(attributeSummaryField); + } + + // Returns whether this is valid. Warns if invalid and ignorable. Throws if not ignorable. + private boolean isValid(SummaryField summaryField, Schema schema, boolean validate) { + if (summaryField.getTransform() == SummaryTransform.DISTANCE || + summaryField.getTransform() == SummaryTransform.POSITIONS) { + int sourceCount = summaryField.getSourceCount(); + if (validate && sourceCount != 1) { + throw newProcessException(schema.getName(), summaryField.getName(), + "Expected 1 source field, got " + sourceCount + "."); + } + String sourceName = summaryField.getSingleSource(); + if (validate && schema.getAttribute(sourceName) == null) { + throw newProcessException(schema.getName(), summaryField.getName(), + "Summary source attribute '" + sourceName + "' not found."); + } + return true; + } + + String fieldName = summaryField.getSourceField(); + SDField sourceField = schema.getConcreteField(fieldName); + if (validate && sourceField == null) { + throw newProcessException(schema, summaryField, "Source field '" + fieldName + "' does not exist."); + } + if (! sourceField.doesSummarying() && + summaryField.getTransform() != SummaryTransform.ATTRIBUTE && + summaryField.getTransform() != SummaryTransform.GEOPOS) + { + // Summary transform attribute may indicate that the ilscript was rewritten to remove summary + // by another search that uses this same field in inheritance. + deployLogger.logApplicationPackage(Level.WARNING, "Ignoring " + summaryField + ": " + sourceField + + " is not creating a summary value in its indexing statement"); + return false; + } + + if (summaryField.getTransform().isDynamic() + && summaryField.getName().equals(sourceField.getName()) + && sourceField.doesAttributing()) { + Attribute attribute = sourceField.getAttributes().get(sourceField.getName()); + if (attribute != null) { + String destinations = "document summary 'default'"; + if (summaryField.getDestinations().size() >0) { + destinations = "document summaries " + summaryField.getDestinations(); + } + deployLogger.logApplicationPackage(Level.WARNING, + "Will fetch the disk summary value of " + sourceField + " in " + destinations + + " since this summary field uses a dynamic summary value (snippet/bolding): Dynamic summaries and bolding " + + "is not supported with summary values fetched from in-memory attributes yet. If you want to see partial updates " + + "to this attribute, remove any bolding and dynamic snippeting from this field"); + // Note: The dynamic setting has already overridden the attribute map setting, + // so we do not need to actually do attribute.setSummary(false) here + // Also, we can not do this, since it makes it impossible to fetch this attribute + // in another summary + } + } + + return true; + } + + private void addToDestinations(SummaryField summaryField, Schema schema) { + if (summaryField.getDestinations().size() == 0) { + addToDestination("default", summaryField, schema); + } + else { + for (String destinationName : summaryField.getDestinations()) { + addToDestination(destinationName, summaryField, schema); + } + } + } + + private void addToDestination(String destinationName, SummaryField summaryField, Schema schema) { + DocumentSummary destination = schema.getSummariesInThis().get(destinationName); + if (destination == null) { + destination = new DocumentSummary(destinationName, schema); + schema.addSummary(destination); + destination.add(summaryField); + } + else { + SummaryField existingField= destination.getSummaryField(summaryField.getName()); + SummaryField merged = summaryField.mergeWith(existingField); + destination.add(merged); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaryFields.java b/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaryFields.java new file mode 100644 index 00000000000..b17efbfe8e8 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaryFields.java @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * This processor adds all implicit summary fields to all registered document summaries. If another field has already + * been registered with one of the implicit names, this processor will throw an {@link IllegalStateException}. + */ +public class ImplicitSummaryFields extends Processor { + + public ImplicitSummaryFields(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (DocumentSummary docsum : schema.getSummariesInThis().values()) { + if (docsum.inherited().isPresent()) continue; // Implicit fields are added to inheriting summaries through their parent + addField(docsum, new SummaryField("rankfeatures", DataType.STRING, SummaryTransform.RANKFEATURES), validate); + addField(docsum, new SummaryField("summaryfeatures", DataType.STRING, SummaryTransform.SUMMARYFEATURES), validate); + } + } + + private void addField(DocumentSummary docsum, SummaryField field, boolean validate) { + if (validate && docsum.getSummaryField(field.getName()) != null) { + throw new IllegalArgumentException("Summary class '" + docsum.getName() + "' uses reserved field name '" + + field.getName() + "'."); + } + docsum.add(field); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ImportedFieldsResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/ImportedFieldsResolver.java new file mode 100644 index 00000000000..ee465be44f2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ImportedFieldsResolver.java @@ -0,0 +1,207 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.document.PositionDataType; +import com.yahoo.schema.DocumentReference; +import com.yahoo.schema.DocumentReferences; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.ImportedComplexField; +import com.yahoo.schema.document.ImportedField; +import com.yahoo.schema.document.ImportedFields; +import com.yahoo.schema.document.ImportedSimpleField; +import com.yahoo.schema.document.TemporaryImportedField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isArrayOfSimpleStruct; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfPrimitiveType; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfSimpleStruct; + +/** + * Iterates all imported fields from schema parsing and validates and resolves them into concrete fields from referenced document types. + * + * @author geirst + */ +public class ImportedFieldsResolver extends Processor { + + private final Map<String, ImportedField> importedFields = new LinkedHashMap<>(); + private final Optional<DocumentReferences> references; + + public ImportedFieldsResolver(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + references = schema.getDocument().getDocumentReferences(); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + schema.temporaryImportedFields().get().fields().forEach((name, field) -> resolveImportedField(field, validate)); + schema.setImportedFields(new ImportedFields(importedFields)); + } + + private void resolveImportedField(TemporaryImportedField importedField, boolean validate) { + DocumentReference reference = validateDocumentReference(importedField); + ImmutableSDField targetField = getTargetField(importedField, reference); + if (GeoPos.isAnyPos(targetField)) { + resolveImportedPositionField(importedField, reference, targetField, validate); + } else if (isArrayOfSimpleStruct(targetField)) { + resolveImportedArrayOfStructField(importedField, reference, targetField, validate); + } else if (isMapOfSimpleStruct(targetField)) { + resolveImportedMapOfStructField(importedField, reference, targetField, validate); + } else if (isMapOfPrimitiveType(targetField)) { + resolveImportedMapOfPrimitiveField(importedField, reference, targetField, validate); + } else { + resolveImportedNormalField(importedField, reference, targetField, validate); + } + } + + private void resolveImportedPositionField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + TemporaryImportedField importedZCurveField = new TemporaryImportedField(PositionDataType.getZCurveFieldName(importedField.fieldName()), + reference.referenceField().getName(), PositionDataType.getZCurveFieldName(targetField.getName())); + ImmutableSDField targetZCurveField = getTargetField(importedZCurveField, reference); + resolveImportedNormalField(importedZCurveField, reference, targetZCurveField, validate); + ImportedComplexField importedStructField = new ImportedComplexField(importedField.fieldName(), reference, targetField); + registerImportedField(importedField, null, importedStructField); + } + + private void resolveImportedArrayOfStructField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + ImportedComplexField importedStructField = new ImportedComplexField(importedField.fieldName(), reference, targetField); + resolveImportedNestedStructField(importedField, reference, importedStructField, targetField, validate); + registerImportedField(importedField, null, importedStructField); + } + + private void resolveImportedMapOfStructField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + ImportedComplexField importedMapField = new ImportedComplexField(importedField.fieldName(), reference, targetField); + ImportedComplexField importedStructField = new ImportedComplexField(importedField.fieldName() + ".value", reference, targetField.getStructField("value")); + importedMapField.addNestedField(importedStructField); + resolveImportedNestedField(importedField, reference, importedMapField, targetField.getStructField("key"), validate); + resolveImportedNestedStructField(importedField, reference, importedStructField, importedStructField.targetField(), validate); + registerImportedField(importedField, null, importedMapField); + } + + private void makeImportedNormalField(TemporaryImportedField importedField, ImportedComplexField owner, String name, DocumentReference reference, ImmutableSDField targetField) { + ImportedField importedSimpleField = new ImportedSimpleField(name, reference, targetField); + registerImportedField(importedField, owner, importedSimpleField); + } + + private void registerImportedField(TemporaryImportedField temporaryImportedField, ImportedComplexField owner, ImportedField importedField) { + if (owner != null) { + owner.addNestedField(importedField); + } else { + if (importedFields.get(importedField.fieldName()) != null) { + fail(temporaryImportedField, importedField.fieldName(), targetFieldAsString(importedField.targetField().getName(), importedField.reference()) + ": Field already imported"); + } + importedFields.put(importedField.fieldName(), importedField); + } + } + + private static String makeImportedNestedFieldName(TemporaryImportedField importedField, ImmutableSDField targetNestedField) { + return importedField.fieldName() + targetNestedField.getName().substring(importedField.targetFieldName().length()); + } + + private boolean resolveImportedNestedField(TemporaryImportedField importedField, DocumentReference reference, + ImportedComplexField owner, ImmutableSDField targetNestedField, boolean requireAttribute) { + Attribute attribute = targetNestedField.getAttribute(); + String importedNestedFieldName = makeImportedNestedFieldName(importedField, targetNestedField); + if (attribute != null) { + makeImportedNormalField(importedField, owner, importedNestedFieldName, reference, targetNestedField); + } else if (requireAttribute) { + fail(importedField, importedNestedFieldName, targetFieldAsString(targetNestedField.getName(), reference) + + ": Is not an attribute field. Only attribute fields supported"); + } + return attribute != null; + } + + private void resolveImportedNestedStructField(TemporaryImportedField importedField, DocumentReference reference, + ImportedComplexField ownerField, ImmutableSDField targetNestedField, boolean validate) { + boolean foundAttribute = false; + for (ImmutableSDField targetStructField : targetNestedField.getStructFields()) { + if (resolveImportedNestedField(importedField, reference, ownerField, targetStructField, false)) { + foundAttribute = true; + }; + } + if (validate && !foundAttribute) { + String importedNestedFieldName = makeImportedNestedFieldName(importedField, targetNestedField); + fail(importedField, importedNestedFieldName, targetFieldAsString(targetNestedField.getName(), reference) + + ": Is not a struct containing an attribute field."); + } + } + + private void resolveImportedMapOfPrimitiveField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + ImportedComplexField importedMapField = new ImportedComplexField(importedField.fieldName(), reference, targetField); + resolveImportedNestedField(importedField, reference, importedMapField, targetField.getStructField("key"), validate); + resolveImportedNestedField(importedField, reference, importedMapField, targetField.getStructField("value"), validate); + registerImportedField(importedField, null, importedMapField); + } + + private void resolveImportedNormalField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + if (validate) { + validateTargetField(importedField, targetField, reference); + } + makeImportedNormalField(importedField, null, importedField.fieldName(), reference, targetField); + } + + private DocumentReference validateDocumentReference(TemporaryImportedField importedField) { + String referenceFieldName = importedField.referenceFieldName(); + DocumentReference reference = references.get().referenceMap().get(referenceFieldName); + if (reference == null) { + fail(importedField, "Reference field '" + referenceFieldName + "' not found"); + } + return reference; + } + + private ImmutableSDField getTargetField(TemporaryImportedField importedField, + DocumentReference reference) { + String targetFieldName = importedField.targetFieldName(); + Schema targetSchema = reference.targetSearch(); + ImmutableSDField targetField = targetSchema.getField(targetFieldName); + if (targetField == null) { + fail(importedField, targetFieldAsString(targetFieldName, reference) + ": Not found"); + } + return targetField; + } + + private void validateTargetField(TemporaryImportedField importedField, + ImmutableSDField targetField, DocumentReference reference) { + if (!targetField.doesAttributing()) { + fail(importedField, targetFieldAsString(targetField.getName(), reference) + + ": Is not an attribute field. Only attribute fields supported"); + } else if (targetField.doesIndexing()) { + fail(importedField, targetFieldAsString(targetField.getName(), reference) + + ": Is an index field. Not supported"); + } else if (targetField.getDataType().equals(DataType.PREDICATE)) { + fail(importedField, targetFieldAsString(targetField.getName(), reference) + + ": Is of type 'predicate'. Not supported"); + } + } + + private static String targetFieldAsString(String targetFieldName, DocumentReference reference) { + return "Field '" + targetFieldName + "' via reference field '" + reference.referenceField().getName() + "'"; + } + + private void fail(TemporaryImportedField importedField, String msg) { + throw new IllegalArgumentException("For " + schema + ", import field '" + + importedField.fieldName() + "': " + msg); + } + + private void fail(TemporaryImportedField importedField, String importedNestedFieldName, String msg) { + if (importedField.fieldName().equals(importedNestedFieldName)) { + fail(importedField, msg); + } + throw new IllegalArgumentException("For " + schema + ", import field '" + + importedField.fieldName() + "' (nested to '" + importedNestedFieldName + "'): " + msg); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexFieldNames.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexFieldNames.java new file mode 100644 index 00000000000..27101c47c7a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexFieldNames.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Because of the way the parser works (allowing any token as identifier), + * it is not practical to limit the syntax of field names there, do it here. + * Important to disallow dash, has semantic in IL. + * + * @author Vehard Havdal + */ +public class IndexFieldNames extends Processor { + + private static final String FIELD_NAME_REGEXP = "[a-zA-Z]\\w*"; + + public IndexFieldNames(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (SDField field : schema.allConcreteFields()) { + if ( ! field.getName().matches(FIELD_NAME_REGEXP) && ! legalDottedPositionField(field)) { + fail(schema, field, " Not a legal field name. Legal expression: " + FIELD_NAME_REGEXP); + } + } + } + + /** + * In {@link CreatePositionZCurve} we add some .position and .distance fields for pos fields. Make an exception for those for now. + * TODO Vespa 8: Rename to _position and _distance and delete this method. + * + * @param field an {@link com.yahoo.schema.document.SDField} + * @return true if allowed + */ + private boolean legalDottedPositionField(SDField field) { + return field.getName().endsWith(".position") || field.getName().endsWith(".distance"); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java new file mode 100644 index 00000000000..88e84d5289f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java @@ -0,0 +1,106 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * This processor modifies all indexing scripts so that they input the value of the owning field by default. It also + * ensures that all fields used as input exist. + * + * @author Simon Thoresen Hult + */ +public class IndexingInputs extends Processor { + + public IndexingInputs(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + ScriptExpression script = field.getIndexingScript(); + if (script == null) continue; + + String fieldName = field.getName(); + script = (ScriptExpression)new DefaultToCurrentField(fieldName).convert(script); + script = (ScriptExpression)new EnsureInputExpression(fieldName).convert(script); + if (validate) + new VerifyInputExpression(schema, field).visit(script); + + field.setIndexingScript(script); + } + } + + private static class DefaultToCurrentField extends ExpressionConverter { + + final String fieldName; + + DefaultToCurrentField(String fieldName) { + this.fieldName = fieldName; + } + + @Override + protected boolean shouldConvert(Expression exp) { + return exp instanceof InputExpression && ((InputExpression)exp).getFieldName() == null; + } + + @Override + protected Expression doConvert(Expression exp) { + return new InputExpression(fieldName); + } + } + + private static class EnsureInputExpression extends ExpressionConverter { + + final String fieldName; + + EnsureInputExpression(String fieldName) { + this.fieldName = fieldName; + } + + @Override + protected boolean shouldConvert(Expression exp) { + return exp instanceof StatementExpression; + } + + @Override + protected Expression doConvert(Expression exp) { + if (exp.requiredInputType() != null) { + return new StatementExpression(new InputExpression(fieldName), exp); + } else { + return exp; + } + } + } + + private class VerifyInputExpression extends ExpressionVisitor { + + private final Schema schema; + private final SDField field; + + public VerifyInputExpression(Schema schema, SDField field) { + this.schema = schema; + this.field = field; + } + + @Override + protected void doVisit(Expression exp) { + if ( ! (exp instanceof InputExpression)) return; + String inputField = ((InputExpression)exp).getFieldName(); + if (schema.getField(inputField).hasFullIndexingDocprocRights()) return; + + fail(schema, field, "Indexing script refers to field '" + inputField + "' which does not exist " + + "in document type '" + schema.getDocument().getName() + "', and is not a mutable attribute."); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java new file mode 100644 index 00000000000..ea65a223686 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java @@ -0,0 +1,144 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.*; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.*; + +/** + * This processor modifies all indexing scripts so that they output to the owning field by default. It also prevents + * any output expression from writing to any field except for the owning field. Finally, for <code>SummaryExpression</code>, + * this processor expands to write all appropriate summary fields. + * + * @author Simon Thoresen Hult + */ +public class IndexingOutputs extends Processor { + + public IndexingOutputs(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + ScriptExpression script = field.getIndexingScript(); + if (script == null) continue; + + Set<String> summaryFields = new TreeSet<>(); + findSummaryTo(schema, field, summaryFields, summaryFields); + MyConverter converter = new MyConverter(schema, field, summaryFields, validate); + field.setIndexingScript((ScriptExpression)converter.convert(script)); + } + } + + public void findSummaryTo(Schema schema, SDField field, Set<String> dynamicSummary, Set<String> staticSummary) { + var summaryFields = schema.getSummaryFields(field); + if (summaryFields.isEmpty()) { + fillSummaryToFromField(field, dynamicSummary, staticSummary); + } else { + fillSummaryToFromSearch(schema, field, summaryFields, dynamicSummary, staticSummary); + } + } + + private void fillSummaryToFromSearch(Schema schema, SDField field, List<SummaryField> summaryFields, + Set<String> dynamicSummary, Set<String> staticSummary) { + for (SummaryField summaryField : summaryFields) { + fillSummaryToFromSummaryField(schema, field, summaryField, dynamicSummary, staticSummary); + } + } + + private void fillSummaryToFromSummaryField(Schema schema, SDField field, SummaryField summaryField, + Set<String> dynamicSummary, Set<String> staticSummary) { + SummaryTransform summaryTransform = summaryField.getTransform(); + String summaryName = summaryField.getName(); + if (summaryTransform.isDynamic() && summaryField.getSourceCount() > 2) { + // Avoid writing to summary fields that have more than a single input field, as that is handled by the + // summary rewriter in the search core. + return; + } + if (summaryTransform.isDynamic()) { + DataType fieldType = field.getDataType(); + if (fieldType != DataType.URI && fieldType != DataType.STRING) { + warn(schema, field, "Dynamic summaries are only supported for fields of type " + + "string, ignoring summary field '" + summaryField.getName() + + "' for sd field '" + field.getName() + "' of type " + + fieldType.getName() + "."); + return; + } + dynamicSummary.add(summaryName); + } else if (summaryTransform != SummaryTransform.ATTRIBUTE) { + staticSummary.add(summaryName); + } + } + + private static void fillSummaryToFromField(SDField field, Set<String> dynamicSummary, Set<String> staticSummary) { + for (SummaryField summaryField : field.getSummaryFields().values()) { + String summaryName = summaryField.getName(); + if (summaryField.getTransform().isDynamic()) { + dynamicSummary.add(summaryName); + } else { + staticSummary.add(summaryName); + } + } + } + + private class MyConverter extends ExpressionConverter { + + final Schema schema; + final Field field; + final Set<String> summaryFields; + final boolean validate; + + MyConverter(Schema schema, Field field, Set<String> summaryFields, boolean validate) { + this.schema = schema; + this.field = field; + this.summaryFields = summaryFields.isEmpty() ? Collections.singleton(field.getName()) : summaryFields; + this.validate = validate; + } + + @Override + protected boolean shouldConvert(Expression exp) { + if ( ! (exp instanceof OutputExpression)) { + return false; + } + String fieldName = ((OutputExpression)exp).getFieldName(); + if (fieldName == null) { + return true; // inject appropriate field name + } + if ( validate && ! fieldName.equals(field.getName())) { + fail(schema, field, "Indexing expression '" + exp + "' attempts to write to a field other than '" + + field.getName() + "'."); + } + return false; + } + + @Override + protected Expression doConvert(Expression exp) { + List<Expression> ret = new LinkedList<>(); + if (exp instanceof AttributeExpression) { + ret.add(new AttributeExpression(field.getName())); + } else if (exp instanceof IndexExpression) { + ret.add(new IndexExpression(field.getName())); + } else if (exp instanceof SummaryExpression) { + for (String fieldName : summaryFields) { + ret.add(new SummaryExpression(fieldName)); + } + } else { + throw new UnsupportedOperationException(exp.getClass().getName()); + } + return new StatementExpression(ret); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java new file mode 100644 index 00000000000..d8c1fb3125f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java @@ -0,0 +1,164 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.MapDataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.FieldTypeAdapter; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; +import com.yahoo.vespa.indexinglanguage.expressions.VerificationContext; +import com.yahoo.vespa.indexinglanguage.expressions.VerificationException; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashSet; +import java.util.Set; + +/** + * @author Simon Thoresen Hult + */ +public class IndexingValidation extends Processor { + + IndexingValidation(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + VerificationContext context = new VerificationContext(new MyAdapter(schema)); + for (SDField field : schema.allConcreteFields()) { + ScriptExpression script = field.getIndexingScript(); + try { + script.verify(context); + MyConverter converter = new MyConverter(); + for (StatementExpression exp : script) { + converter.convert(exp); // TODO: stop doing this explicitly when visiting a script does not branch + } + } catch (VerificationException e) { + fail(schema, field, "For expression '" + e.getExpression() + "': " + e.getMessage()); + } + } + } + + private static class MyConverter extends ExpressionConverter { + + final Set<String> outputs = new HashSet<>(); + final Set<String> prevNames = new HashSet<>(); + + @Override + protected ExpressionConverter branch() { + MyConverter ret = new MyConverter(); + ret.outputs.addAll(outputs); + ret.prevNames.addAll(prevNames); + return ret; + } + + @Override + protected boolean shouldConvert(Expression exp) { + if (exp instanceof OutputExpression) { + String fieldName = ((OutputExpression)exp).getFieldName(); + if (outputs.contains(fieldName) && !prevNames.contains(fieldName)) { + throw new VerificationException(exp, "Attempting to assign conflicting values to field '" + + fieldName + "'."); + } + outputs.add(fieldName); + prevNames.add(fieldName); + } + if (exp.createdOutputType() != null) { + prevNames.clear(); + } + return false; + } + + @Override + protected Expression doConvert(Expression exp) { + throw new UnsupportedOperationException(); + } + } + + private static class MyAdapter implements FieldTypeAdapter { + + final Schema schema; + + MyAdapter(Schema schema) { + this.schema = schema; + } + + @Override + public DataType getInputType(Expression exp, String fieldName) { + SDField field = schema.getDocumentField(fieldName); + if (field == null) { + throw new VerificationException(exp, "Input field '" + fieldName + "' not found."); + } + return field.getDataType(); + } + + @Override + public void tryOutputType(Expression exp, String fieldName, DataType valueType) { + String fieldDesc; + DataType fieldType; + if (exp instanceof AttributeExpression) { + Attribute attribute = schema.getAttribute(fieldName); + if (attribute == null) { + throw new VerificationException(exp, "Attribute '" + fieldName + "' not found."); + } + fieldDesc = "attribute"; + fieldType = attribute.getDataType(); + } else if (exp instanceof IndexExpression) { + SDField field = schema.getConcreteField(fieldName); + if (field == null) { + throw new VerificationException(exp, "Index field '" + fieldName + "' not found."); + } + fieldDesc = "index field"; + fieldType = field.getDataType(); + } else if (exp instanceof SummaryExpression) { + SummaryField field = schema.getSummaryField(fieldName); + if (field == null) { + throw new VerificationException(exp, "Summary field '" + fieldName + "' not found."); + } + fieldDesc = "summary field"; + fieldType = field.getDataType(); + } else { + throw new UnsupportedOperationException(); + } + if ( ! fieldType.isAssignableFrom(valueType) && + ! fieldType.isAssignableFrom(createCompatType(valueType))) { + throw new VerificationException(exp, "Can not assign " + valueType.getName() + " to " + fieldDesc + + " '" + fieldName + "' which is " + fieldType.getName() + "."); + } + } + + private static DataType createCompatType(DataType origType) { + if (origType instanceof ArrayDataType) { + return DataType.getArray(createCompatType(((ArrayDataType)origType).getNestedType())); + } else if (origType instanceof MapDataType) { + MapDataType mapType = (MapDataType)origType; + return DataType.getMap(createCompatType(mapType.getKeyType()), + createCompatType(mapType.getValueType())); + } else if (origType instanceof WeightedSetDataType) { + return DataType.getWeightedSet(createCompatType(((WeightedSetDataType)origType).getNestedType())); + } else if (GeoPos.isPos(origType)) { + return DataType.LONG; + } else { + return origType; + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingValues.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValues.java new file mode 100644 index 00000000000..fa4b7d2bc40 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValues.java @@ -0,0 +1,71 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * @author Simon Thoresen Hult + */ +public class IndexingValues extends Processor { + + public IndexingValues(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (Field field : schema.getDocument().fieldSet()) { + SDField sdField = (SDField)field; + if ( ! sdField.isExtraField()) { + new RequireThatDocumentFieldsAreImmutable(field).convert(sdField.getIndexingScript()); + } + } + } + + private class RequireThatDocumentFieldsAreImmutable extends ExpressionConverter { + + final Field field; + Expression mutatedBy; + + RequireThatDocumentFieldsAreImmutable(Field field) { + this.field = field; + } + + @Override + public ExpressionConverter branch() { + return clone(); + } + + @Override + protected boolean shouldConvert(Expression exp) { + if (exp instanceof OutputExpression && mutatedBy != null) { + throw newProcessException(schema, field, + "Indexing expression '" + mutatedBy + "' attempts to modify the value of the " + + "document field '" + field.getName() + "'. Use a field outside the document " + + "block instead."); + } + if (exp instanceof InputExpression && ((InputExpression)exp).getFieldName().equals(field.getName())) { + mutatedBy = null; + } else if (exp.createdOutputType() != null) { + mutatedBy = exp; + } + return false; + } + + @Override + protected Expression doConvert(Expression exp) { + throw new UnsupportedOperationException(); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IntegerIndex2Attribute.java b/config-model/src/main/java/com/yahoo/schema/processing/IntegerIndex2Attribute.java new file mode 100644 index 00000000000..1d8480a8e99 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IntegerIndex2Attribute.java @@ -0,0 +1,88 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.NumericDataType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashSet; +import java.util.Set; + +/** + * Replaces the 'index' statement of all numerical fields to 'attribute' because we no longer support numerical indexes. + * + * @author baldersheim + */ +public class IntegerIndex2Attribute extends Processor { + + public IntegerIndex2Attribute(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.doesIndexing() && field.getDataType().getPrimitiveType() instanceof NumericDataType) { + if (field.getIndex(field.getName()) != null + && ! (field.getIndex(field.getName()).getType().equals(Index.Type.VESPA))) continue; + ScriptExpression script = field.getIndexingScript(); + Set<String> attributeNames = new HashSet<>(); + new MyVisitor(attributeNames).visit(script); + field.setIndexingScript((ScriptExpression)new MyConverter(attributeNames).convert(script)); + warn(schema, field, "Changed to attribute because numerical indexes (field has type " + + field.getDataType().getName() + ") is not currently supported." + + " Index-only settings may fail. Ignore this warning for streaming search."); + } + } + } + + private static class MyVisitor extends ExpressionVisitor { + + final Set<String> attributeNames; + + public MyVisitor(Set<String> attributeNames) { + this.attributeNames = attributeNames; + } + + @Override + protected void doVisit(Expression exp) { + if (exp instanceof AttributeExpression) { + attributeNames.add(((AttributeExpression)exp).getFieldName()); + } + } + } + + private static class MyConverter extends ExpressionConverter { + + final Set<String> attributeNames; + + public MyConverter(Set<String> attributeNames) { + this.attributeNames = attributeNames; + } + + @Override + protected boolean shouldConvert(Expression exp) { + return exp instanceof IndexExpression; + } + + @Override + protected Expression doConvert(Expression exp) { + String indexName = ((IndexExpression)exp).getFieldName(); + if (attributeNames.contains(indexName)) { + return null; + } + return new AttributeExpression(indexName); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/LiteralBoost.java b/config-model/src/main/java/com/yahoo/schema/processing/LiteralBoost.java new file mode 100644 index 00000000000..a84f895100a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/LiteralBoost.java @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.RankProfile; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Iterator; + +/** + * Expresses literal boosts in terms of extra indices with rank boost. + * One extra index named <i>indexname</i>_exact is added for each index having + * a fields with literal-boosts of zero or more (zero to support other + * rank profiles setting a literal boost). Complete boost values in to fields + * are translated to rank boosts to the implementation indices. + * These indices has no positional + * or phrase support and contains concatenated versions of each field value + * of complete-boosted fields indexed to <i>indexname</i>. A search for indexname + * will be rewritten to also search <i>indexname</i>_exaxt + * + * @author bratseth + */ +public class LiteralBoost extends Processor { + + public LiteralBoost(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + /** Adds extra search fields and indices to express literal boosts */ + @Override + public void process(boolean validate, boolean documentsOnly) { + checkRankModifierRankType(schema); + addLiteralBoostsToFields(schema); + reduceFieldLiteralBoosts(schema); + } + + /** Checks if literal boost is given using rank: , and set the actual literal boost accordingly. */ + private void checkRankModifierRankType(Schema schema) { + for (SDField field : schema.allConcreteFields()) { + if (field.getLiteralBoost() > -1) continue; // Let explicit value take precedence + if (field.getRanking().isLiteral()) + field.setLiteralBoost(100); + } + } + + /** + * Ensures there are field boosts for all literal boosts mentioned in rank profiles. + * This is required because boost indices will only be generated by looking + * at field boosts + */ + private void addLiteralBoostsToFields(Schema schema) { + Iterator i = matchingRankSettingsIterator(schema, RankProfile.RankSetting.Type.LITERALBOOST); + while (i.hasNext()) { + RankProfile.RankSetting setting = (RankProfile.RankSetting)i.next(); + SDField field = schema.getConcreteField(setting.getFieldName()); + if (field == null) continue; + if (field.getLiteralBoost() < 0) + field.setLiteralBoost(0); + } + } + + private void reduceFieldLiteralBoosts(Schema schema) { + for (SDField field : schema.allConcreteFields()) { + if (field.getLiteralBoost() < 0) continue; + reduceFieldLiteralBoost(field, schema); + } + } + + private void reduceFieldLiteralBoost(SDField field, Schema schema) { + SDField literalField = addField(schema, field, "literal", + "{ input " + field.getName() + " | tokenize | index " + field.getName() + "_literal; }", + "literal-boost"); + literalField.setWeight(field.getWeight() + field.getLiteralBoost()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MakeAliases.java b/config-model/src/main/java/com/yahoo/schema/processing/MakeAliases.java new file mode 100644 index 00000000000..7093242d0ac --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MakeAliases.java @@ -0,0 +1,61 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Index; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Takes the aliases set on field by parser and sets them on correct Index or Attribute + * + * @author vegardh + */ +public class MakeAliases extends Processor { + + public MakeAliases(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + List<String> usedAliases = new ArrayList<>(); + for (SDField field : schema.allConcreteFields()) { + for (Map.Entry<String, String> e : field.getAliasToName().entrySet()) { + String alias = e.getKey(); + String name = e.getValue(); + String errMsg = "For " + schema + ": alias '" + alias + "' "; + if (validate && schema.existsIndex(alias)) { + throw new IllegalArgumentException(errMsg + "is illegal since it is the name of an index."); + } + if (validate && schema.getAttribute(alias) != null) { + throw new IllegalArgumentException(errMsg + "is illegal since it is the name of an attribute."); + } + if (validate && usedAliases.contains(alias)) { + throw new IllegalArgumentException(errMsg + "specified more than once."); + } + usedAliases.add(alias); + + Index index = field.getIndex(name); + Attribute attribute = field.getAttributes().get(name); + if (index != null) { + index.addAlias(alias); // alias will be for index in this case, since it is the one used in a search + } else if (attribute != null && ! field.doesIndexing()) { + attribute.getAliases().add(alias); + } else { + index = new Index(name); + index.addAlias(alias); + field.addIndex(index); + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MakeDefaultSummaryTheSuperSet.java b/config-model/src/main/java/com/yahoo/schema/processing/MakeDefaultSummaryTheSuperSet.java new file mode 100644 index 00000000000..ea24bf0569d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MakeDefaultSummaryTheSuperSet.java @@ -0,0 +1,49 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * All summary fields which are not attributes + * must currently be present in the default summary class, + * since the default summary class also defines the docsum.dat format. + * This processor adds any missing summaries to the default summary. + * When that is decoupled from the actual summaries returned, this + * processor can be removed. Note: the StreamingSummary also takes advantage of + * the fact that default is the superset. + * + * All other summary logic should work unchanged without this processing step + * except that IndexStructureValidator.validateSummaryFields must be changed to + * consider all summaries, not just the default, i.e change to + * if (search.getSummaryField(expr.getFieldName()) == null) + * + * This must be done after other summary processors. + * + * @author bratseth + */ +public class MakeDefaultSummaryTheSuperSet extends Processor { + + public MakeDefaultSummaryTheSuperSet(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + DocumentSummary defaultSummary= schema.getSummariesInThis().get("default"); + for (SummaryField summaryField : schema.getUniqueNamedSummaryFields().values() ) { + if (defaultSummary.getSummaryField(summaryField.getName()) != null) continue; + if (summaryField.getTransform() == SummaryTransform.ATTRIBUTE) continue; + if (summaryField.getTransform() == SummaryTransform.ATTRIBUTECOMBINER) continue; + if (summaryField.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER) continue; + + defaultSummary.add(summaryField.clone()); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MatchConsistency.java b/config-model/src/main/java/com/yahoo/schema/processing/MatchConsistency.java new file mode 100644 index 00000000000..5fb59e53ba9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MatchConsistency.java @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashMap; +import java.util.Map; + +/** + * Warn on inconsistent match settings for any index + * + * @author vegardh + */ +public class MatchConsistency extends Processor { + + public MatchConsistency(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + Map<String, MatchType> types = new HashMap<>(); + for (SDField field : schema.allConcreteFields()) { + new MyVisitor(schema, field, types).visit(field.getIndexingScript()); + } + } + + private void checkMatching(Schema schema, SDField field, Map<String, MatchType> types, String indexTo) { + MatchType prevType = types.get(indexTo); + if (prevType == null) { + types.put(indexTo, field.getMatching().getType()); + } else if ( ! field.getMatching().getType().equals(prevType)) { + warn(schema, field, "The matching type for index '" + indexTo + "' (got " + field.getMatching().getType() + + ") is inconsistent with that given for the same index in a previous field (had " + + prevType + ")."); + } + } + + private class MyVisitor extends ExpressionVisitor { + + final Schema schema; + final SDField field; + final Map<String, MatchType> types; + + MyVisitor(Schema schema, SDField field, Map<String, MatchType> types) { + this.schema = schema; + this.field = field; + this.types = types; + } + + @Override + protected void doVisit(Expression exp) { + if (exp instanceof IndexExpression) { + checkMatching(schema, field, types, ((IndexExpression)exp).getFieldName()); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MatchPhaseSettingsValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/MatchPhaseSettingsValidator.java new file mode 100644 index 00000000000..7c1c255097f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MatchPhaseSettingsValidator.java @@ -0,0 +1,98 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Validates the match phase settings for all registered rank profiles. + * + * @author geirst + */ +public class MatchPhaseSettingsValidator extends Processor { + + public MatchPhaseSettingsValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + if (documentsOnly) return; + + for (RankProfile rankProfile : rankProfileRegistry.rankProfilesOf(schema)) { + RankProfile.MatchPhaseSettings settings = rankProfile.getMatchPhaseSettings(); + if (settings != null) { + validateMatchPhaseSettings(rankProfile, settings); + } + } + } + + private void validateMatchPhaseSettings(RankProfile rankProfile, RankProfile.MatchPhaseSettings settings) { + String attributeName = settings.getAttribute(); + new AttributeValidator(schema.getName(), + rankProfile.name(), + schema.getAttribute(attributeName), attributeName).validate(); + } + + public static class AttributeValidator { + + private final String searchName; + private final String rankProfileName; + protected final Attribute attribute; + private final String attributeName; + + public AttributeValidator(String searchName, String rankProfileName, Attribute attribute, String attributeName) { + this.searchName = searchName; + this.rankProfileName = rankProfileName; + this.attribute = attribute; + this.attributeName = attributeName; + } + + public void validate() { + validateThatAttributeExists(); + validateThatAttributeIsSingleNumeric(); + validateThatAttributeIsFastSearch(); + } + + protected void validateThatAttributeExists() { + if (attribute == null) { + failValidation("does not exists"); + } + } + + protected void validateThatAttributeIsSingleNumeric() { + if (!attribute.getCollectionType().equals(Attribute.CollectionType.SINGLE) || + attribute.getType().equals(Attribute.Type.STRING) || + attribute.getType().equals(Attribute.Type.PREDICATE)) + { + failValidation("must be single value numeric, but it is '" + + attribute.getDataType().getName() + "'"); + } + } + + protected void validateThatAttributeIsFastSearch() { + if ( ! attribute.isFastSearch()) { + failValidation("must be fast-search, but it is not"); + } + } + + protected void failValidation(String what) { + throw new IllegalArgumentException(createMessagePrefix() + what); + } + + public String getValidationType() { return "match-phase"; } + + private String createMessagePrefix() { + return "In search definition '" + searchName + + "', rank-profile '" + rankProfileName + + "': " + getValidationType() + " attribute '" + attributeName + "' "; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MatchedElementsOnlyResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/MatchedElementsOnlyResolver.java new file mode 100644 index 00000000000..ed95f87d7d6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MatchedElementsOnlyResolver.java @@ -0,0 +1,95 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ComplexAttributeFieldUtils; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isComplexFieldWithOnlyStructFieldAttributes; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isSupportedComplexField; + +/** + * Iterates all summary fields with 'matched-elements-only' and adjusts transform (if all struct-fields are attributes) + * and validates that the field type is supported. + * + * @author geirst + */ +public class MatchedElementsOnlyResolver extends Processor { + + public MatchedElementsOnlyResolver(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (var entry : schema.getSummaries().entrySet()) { + var summary = entry.getValue(); + for (var field : summary.getSummaryFields().values()) { + if (field.getTransform() == SummaryTransform.MATCHED_ELEMENTS_FILTER) { + processSummaryField(summary, field, validate); + } + } + } + } + + private void processSummaryField(DocumentSummary summary, SummaryField field, boolean validate) { + var sourceField = schema.getField(field.getSingleSource()); + if (sourceField != null) { + if (isSupportedComplexField(sourceField)) { + if (isComplexFieldWithOnlyStructFieldAttributes(sourceField)) { + field.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); + } + } else if (isSupportedMultiValueField(sourceField)) { + if (sourceField.doesAttributing()) { + field.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); + } + } else if (validate) { + fail(summary, field, "'matched-elements-only' is not supported for this field type. " + + "Supported field types are: array of primitive, weighted set of primitive, " + + "array of simple struct, map of primitive type to simple struct, " + + "and map of primitive type to primitive type"); + } + } + // else case is handled in SummaryFieldsMustHaveValidSource + } + + private boolean isSupportedMultiValueField(ImmutableSDField sourceField) { + var type = sourceField.getDataType(); + return (isArrayOfPrimitiveType(type) || isWeightedsetOfPrimitiveType(type)); + } + + private boolean isArrayOfPrimitiveType(DataType type) { + if (type instanceof ArrayDataType) { + var arrayType = (ArrayDataType) type; + return ComplexAttributeFieldUtils.isPrimitiveType(arrayType.getNestedType()); + } + return false; + } + + private boolean isWeightedsetOfPrimitiveType(DataType type) { + if (type instanceof WeightedSetDataType) { + var wsetType = (WeightedSetDataType) type; + return ComplexAttributeFieldUtils.isPrimitiveType(wsetType.getNestedType()); + } + return false; + } + + private void fail(DocumentSummary summary, SummaryField field, String msg) { + throw new IllegalArgumentException(formatError(schema, summary, field, msg)); + } + + private String formatError(Schema schema, DocumentSummary summary, SummaryField field, String msg) { + return "For " + schema + ", document summary '" + summary.getName() + + "', summary field '" + field.getName() + "': " + msg; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MultifieldIndexHarmonizer.java b/config-model/src/main/java/com/yahoo/schema/processing/MultifieldIndexHarmonizer.java new file mode 100644 index 00000000000..3a889085871 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MultifieldIndexHarmonizer.java @@ -0,0 +1,76 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.processing.multifieldresolver.IndexCommandResolver; +import com.yahoo.schema.processing.multifieldresolver.RankTypeResolver; +import com.yahoo.schema.processing.multifieldresolver.StemmingResolver; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.List; +import java.util.Map; + +/** + * Ensures that there are no conflicting types or field settings + * in multifield indices, either by changing settings or by splitting + * conflicting fields in multiple ones with different settings. + * + * @author bratseth + */ +public class MultifieldIndexHarmonizer extends Processor { + + /** A map from index names to a List of fields going to that index */ + private Map<String,List<SDField>> indexToFields=new java.util.HashMap<>(); + + public MultifieldIndexHarmonizer(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + populateIndexToFields(schema); + resolveAllConflicts(schema); + } + + private void populateIndexToFields(Schema schema) { + for (SDField field : schema.allConcreteFields() ) { + if ( ! field.doesIndexing()) continue; + addIndexField(field.getName(), field); + } + } + + private void addIndexField(String indexName,SDField field) { + List<SDField> fields = indexToFields.get(indexName); + if (fields == null) { + fields = new java.util.ArrayList<>(); + indexToFields.put(indexName, fields); + } + fields.add(field); + } + + private void resolveAllConflicts(Schema schema) { + for (Map.Entry<String, List<SDField>> entry : indexToFields.entrySet()) { + String indexName = entry.getKey(); + List<SDField> fields = entry.getValue(); + if (fields.size() == 1) continue; // It takes two to make a conflict + resolveConflicts(indexName, fields, schema); + } + } + + /** + * Resolves all conflicts for one index + * + * @param indexName the name of the index in question + * @param fields all the fields indexed to this index + * @param schema the search definition having this + */ + private void resolveConflicts(String indexName, List<SDField> fields, Schema schema) { + new StemmingResolver(indexName, fields, schema, deployLogger).resolve(); + new IndexCommandResolver(indexName, fields, schema, deployLogger).resolve(); + new RankTypeResolver(indexName, fields, schema, deployLogger).resolve(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MutableAttributes.java b/config-model/src/main/java/com/yahoo/schema/processing/MutableAttributes.java new file mode 100644 index 00000000000..854f6b2dddb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MutableAttributes.java @@ -0,0 +1,29 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +public class MutableAttributes extends Processor { + + public MutableAttributes(Schema schema, DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) + { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if ( ! field.isExtraField() && field.getAttributes().containsKey(field.getName())) { + if (field.getAttributes().get(field.getName()).isMutable()) { + throw new IllegalArgumentException("Field '" + field.getName() + "' in '" + schema.getDocument().getName() + + "' can not be marked mutable as it is inside the document clause."); + } + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java new file mode 100644 index 00000000000..f1ff910be43 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java @@ -0,0 +1,78 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.indexinglanguage.expressions.*; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * The implementation of "gram" matching - splitting the incoming text and the queries into + * n-grams for matching. This will also validate the gram settings. + * + * @author bratseth + */ +public class NGramMatch extends Processor { + + public static final int DEFAULT_GRAM_SIZE = 2; + + public NGramMatch(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getMatching().getType().equals(MatchType.GRAM)) + implementGramMatch(schema, field, validate); + else if (validate && field.getMatching().getGramSize() >= 0) + throw new IllegalArgumentException("gram-size can only be set when the matching mode is 'gram'"); + } + } + + private void implementGramMatch(Schema schema, SDField field, boolean validate) { + if (validate && field.doesAttributing() && ! field.doesIndexing()) + throw new IllegalArgumentException("gram matching is not supported with attributes, use 'index' in indexing"); + + int n = field.getMatching().getGramSize(); + if (n < 0) + n = DEFAULT_GRAM_SIZE; // not set - use default gram size + if (validate && n == 0) + throw new IllegalArgumentException("Illegal gram size in " + field + ": Must be at least 1"); + field.getNormalizing().inferCodepoint(); + field.setStemming(Stemming.NONE); // not compatible with stemming and normalizing + field.addQueryCommand("ngram " + n); + field.setIndexingScript((ScriptExpression)new MyProvider(schema, n).convert(field.getIndexingScript())); + } + + private static class MyProvider extends TypedTransformProvider { + + final int ngram; + + MyProvider(Schema schema, int ngram) { + super(NGramExpression.class, schema); + this.ngram = ngram; + } + + @Override + protected boolean requiresTransform(Expression exp, DataType fieldType) { + return exp instanceof OutputExpression; + } + + @Override + protected Expression newTransform(DataType fieldType) { + Expression exp = new NGramExpression(null, ngram); + if (fieldType instanceof CollectionDataType) + exp = new ForEachExpression(exp); + return exp; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelConfigGenerator.java b/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelConfigGenerator.java new file mode 100644 index 00000000000..ce56a4320d3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelConfigGenerator.java @@ -0,0 +1,96 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.expressiontransforms.OnnxModelTransformer; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.vespa.model.container.search.QueryProfiles; +import com.yahoo.vespa.model.ml.OnnxModelInfo; + +import java.util.Map; + +/** + * Processes ONNX ranking features of the form: + * + * onnx("files/model.onnx", "path/to/output:1") + * + * And generates an "onnx-model" configuration as if it was defined in the profile: + * + * onnx-model files_model_onnx { + * file: "files/model.onnx" + * } + * + * Inputs and outputs are resolved in OnnxModelTypeResolver, which must be + * processed after this. + * + * @author lesters + */ +public class OnnxModelConfigGenerator extends Processor { + + public OnnxModelConfigGenerator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + if (profile.getFirstPhaseRanking() != null) { + process(profile.getFirstPhaseRanking().getRoot(), profile); + } + if (profile.getSecondPhaseRanking() != null) { + process(profile.getSecondPhaseRanking().getRoot(), profile); + } + for (Map.Entry<String, RankProfile.RankingExpressionFunction> function : profile.getFunctions().entrySet()) { + process(function.getValue().function().getBody().getRoot(), profile); + } + for (ReferenceNode feature : profile.getSummaryFeatures()) { + process(feature, profile); + } + } + } + + private void process(ExpressionNode node, RankProfile profile) { + if (node instanceof ReferenceNode) { + process((ReferenceNode)node, profile); + } else if (node instanceof CompositeNode) { + for (ExpressionNode child : ((CompositeNode) node).children()) { + process(child, profile); + } + } + } + + private void process(ReferenceNode feature, RankProfile profile) { + if (feature.getName().equals("onnxModel") || feature.getName().equals("onnx")) { + if (feature.getArguments().size() > 0) { + if (feature.getArguments().expressions().get(0) instanceof ConstantNode) { + ConstantNode node = (ConstantNode) feature.getArguments().expressions().get(0); + String path = OnnxModelTransformer.stripQuotes(node.toString()); + String modelConfigName = OnnxModelTransformer.asValidIdentifier(path); + + // Only add the configuration if the model can actually be found. + if ( ! OnnxModelInfo.modelExists(path, schema.applicationPackage())) { + path = ApplicationPackage.MODELS_DIR.append(path).toString(); + if ( ! OnnxModelInfo.modelExists(path, schema.applicationPackage())) { + return; + } + } + + OnnxModel onnxModel = profile.onnxModels().get(modelConfigName); + if (onnxModel == null) + profile.add(new OnnxModel(modelConfigName, path)); + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelTypeResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelTypeResolver.java new file mode 100644 index 00000000000..32229ea635b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelTypeResolver.java @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; +import com.yahoo.vespa.model.ml.OnnxModelInfo; + +/** + * Processes every "onnx-model" element in the schema. Associates model type + * information by retrieving from either the ONNX model file directly or from + * preprocessed information in ZK. Adds missing input and output mappings + * (assigning default names). + * + * Must be processed before RankingExpressingTypeResolver. + * + * @author lesters + */ +public class OnnxModelTypeResolver extends Processor { + + public OnnxModelTypeResolver(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + for (OnnxModel onnxModel : schema.declaredOnnxModels().values()) + onnxModel.setModelInfo(OnnxModelInfo.load(onnxModel.getFileName(), schema.applicationPackage())); + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + for (OnnxModel onnxModel : profile.declaredOnnxModels().values()) + onnxModel.setModelInfo(OnnxModelInfo.load(onnxModel.getFileName(), schema.applicationPackage())); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/OptimizeIlscript.java b/config-model/src/main/java/com/yahoo/schema/processing/OptimizeIlscript.java new file mode 100644 index 00000000000..a3b026fb724 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/OptimizeIlscript.java @@ -0,0 +1,38 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.indexinglanguage.ExpressionOptimizer; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Run ExpressionOptimizer on all scripts, to get rid of expressions that have no effect. + */ +public class OptimizeIlscript extends Processor { + + public OptimizeIlscript(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + ScriptExpression script = field.getIndexingScript(); + if (script == null) continue; + + field.setIndexingScript((ScriptExpression)new ExpressionOptimizer().convert(script)); + if ( ! field.getIndexingScript().toString().equals(script.toString())) { + info(schema, field, "Rewrote ilscript from:\n" + script.toString() + + "\nto\n" + field.getIndexingScript().toString()); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/PagedAttributeValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/PagedAttributeValidator.java new file mode 100644 index 00000000000..34bb6e1db2e --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/PagedAttributeValidator.java @@ -0,0 +1,66 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.Field; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Optional; + +/** + * Validates the 'paged' attribute setting and throws if specified on unsupported types. + * + * @author geirst + */ +public class PagedAttributeValidator extends Processor { + + public PagedAttributeValidator(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (!validate) { + return; + } + for (var field : schema.allConcreteFields()) { + for (var attribute : field.getAttributes().values()) { + if (attribute.isPaged()) { + validatePagedSetting(field, attribute); + } + } + } + } + + private void validatePagedSetting(Field field, Attribute attribute) { + if (!isSupportedType(attribute)) { + fail(schema, field, "The 'paged' attribute setting is not supported for non-dense tensor, predicate and reference types"); + } + } + + private boolean isSupportedType(Attribute attribute) { + var type = attribute.getType(); + return (type != Attribute.Type.PREDICATE) && + (type != Attribute.Type.REFERENCE) && + (isSupportedTensorType(attribute.tensorType())); + } + + private boolean isSupportedTensorType(Optional<TensorType> tensorType) { + if (tensorType.isPresent()) { + return isDenseTensorType(tensorType.get()); + } + return true; + } + + private boolean isDenseTensorType(TensorType type) { + return type.dimensions().stream().allMatch(d -> d.isIndexed()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java new file mode 100644 index 00000000000..280eae3d88b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java @@ -0,0 +1,144 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.OptimizePredicateExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SetValueExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SetVarExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.ArrayList; +import java.util.List; + +/** + * Validates the predicate fields. + * + * @author Lester Solbakken + */ +public class PredicateProcessor extends Processor { + + public PredicateProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getDataType() == DataType.PREDICATE) { + if (validate && field.doesIndexing()) { + fail(schema, field, "Use 'attribute' instead of 'index'. This will require a refeed if you have upgraded."); + } + if (field.doesAttributing()) { + Attribute attribute = field.getAttributes().get(field.getName()); + for (Index index : field.getIndices().values()) { + BooleanIndexDefinition booleanDefinition = index.getBooleanIndexDefiniton(); + if (validate && (booleanDefinition == null || ! booleanDefinition.hasArity())) { + fail(schema, field, "Missing arity value in predicate field."); + } + if (validate && (booleanDefinition.getArity() < 2)) { + fail(schema, field, "Invalid arity value in predicate field, must be greater than 1."); + } + double threshold = booleanDefinition.getDensePostingListThreshold(); + if (validate && (threshold <= 0 || threshold > 1)) { + fail(schema, field, "Invalid dense-posting-list-threshold value in predicate field. " + + "Value must be in range (0..1]."); + } + + attribute.setArity(booleanDefinition.getArity()); + attribute.setLowerBound(booleanDefinition.getLowerBound()); + attribute.setUpperBound(booleanDefinition.getUpperBound()); + + attribute.setDensePostingListThreshold(threshold); + addPredicateOptimizationIlScript(field, booleanDefinition); + } + DocumentSummary summary = schema.getSummariesInThis().get("attributeprefetch"); + if (summary != null) { + summary.remove(attribute.getName()); + } + for (SummaryField summaryField : schema.getSummaryFields(field)) { + summaryField.setTransform(SummaryTransform.NONE); + } + } + } else if (validate && field.getDataType().getPrimitiveType() == DataType.PREDICATE) { + fail(schema, field, "Collections of predicates are not allowed."); + } else if (validate && field.getDataType() == DataType.RAW && field.doesIndexing()) { + fail(schema, field, "Indexing of RAW fields is not supported."); + } else if (validate) { + // if field is not a predicate, disallow predicate-related index parameters + for (Index index : field.getIndices().values()) { + if (index.getBooleanIndexDefiniton() != null) { + BooleanIndexDefinition def = index.getBooleanIndexDefiniton(); + if (def.hasArity()) { + fail(schema, field, "Arity parameter is used only for predicate type fields."); + } else if (def.hasLowerBound() || def.hasUpperBound()) { + fail(schema, field, "Parameters lower-bound and upper-bound are used only for predicate type fields."); + } else if (def.hasDensePostingListThreshold()) { + fail(schema, field, "Parameter dense-posting-list-threshold is used only for predicate type fields."); + } + } + } + } + } + } + + private void addPredicateOptimizationIlScript(SDField field, BooleanIndexDefinition booleanIndexDefiniton) { + Expression script = field.getIndexingScript(); + if (script == null) return; + + script = new StatementExpression(makeSetPredicateVariablesScript(booleanIndexDefiniton), script); + + ExpressionConverter converter = new PredicateOutputTransformer(schema); + field.setIndexingScript(new ScriptExpression((StatementExpression)converter.convert(script))); + } + + private Expression makeSetPredicateVariablesScript(BooleanIndexDefinition options) { + List<Expression> expressions = new ArrayList<>(); + expressions.add(new SetValueExpression(new IntegerFieldValue(options.getArity()))); + expressions.add(new SetVarExpression("arity")); + if (options.hasLowerBound()) { + expressions.add(new SetValueExpression(new LongFieldValue(options.getLowerBound()))); + expressions.add(new SetVarExpression("lower_bound")); + } + if (options.hasUpperBound()) { + expressions.add(new SetValueExpression(new LongFieldValue(options.getUpperBound()))); + expressions.add(new SetVarExpression("upper_bound")); + } + return new StatementExpression(expressions); + } + + private static class PredicateOutputTransformer extends TypedTransformProvider { + + PredicateOutputTransformer(Schema schema) { + super(OptimizePredicateExpression.class, schema); + } + + @Override + protected boolean requiresTransform(Expression exp, DataType fieldType) { + return exp instanceof OutputExpression && fieldType == DataType.PREDICATE; + } + + @Override + protected Expression newTransform(DataType fieldType) { + return new OptimizePredicateExpression(); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/Processing.java b/config-model/src/main/java/com/yahoo/schema/processing/Processing.java new file mode 100644 index 00000000000..63eca2121c1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/Processing.java @@ -0,0 +1,152 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.processing.multifieldresolver.RankProfileTypeSettingsProcessor; +import com.yahoo.vespa.model.container.search.QueryProfiles; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.deploy.TestProperties; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; + +/** + * Executor of processors. This defines the right order of processor execution. + * + * @author bratseth + * @author bjorncs + */ +public class Processing { + + private final ModelContext.Properties properties; + + public Processing() { this.properties = new TestProperties(); } + + public Processing(ModelContext.Properties properties) { this.properties = properties; } + + private Collection<ProcessorFactory> processors() { + return Arrays.asList( + SearchMustHaveDocument::new, + UrlFieldValidator::new, + BuiltInFieldSets::new, + ReservedDocumentNames::new, + IndexFieldNames::new, + IntegerIndex2Attribute::new, + MakeAliases::new, + UriHack::new, + LiteralBoost::new, + TagType::new, + ValidateFieldTypesDocumentsOnly::new, + IndexingInputs::new, + OptimizeIlscript::new, + ValidateFieldWithIndexSettingsCreatesIndex::new, + AttributesImplicitWord::new, + MutableAttributes::new, + CreatePositionZCurve::new, + DictionaryProcessor::new, + WordMatch::new, + ImportedFieldsResolver::new, + ImplicitSummaries::new, + ImplicitSummaryFields::new, + AdjustPositionSummaryFields::new, + SummaryConsistency::new, + SummaryNamesFieldCollisions::new, + SummaryFieldsMustHaveValidSource::new, + MatchedElementsOnlyResolver::new, + AddAttributeTransformToSummaryOfImportedFields::new, + MakeDefaultSummaryTheSuperSet::new, + Bolding::new, + AttributeProperties::new, + SetRankTypeEmptyOnFilters::new, + SummaryDynamicStructsArrays::new, + StringSettingsOnNonStringFields::new, + IndexingOutputs::new, + ExactMatch::new, + NGramMatch::new, + TextMatch::new, + MultifieldIndexHarmonizer::new, + FilterFieldNames::new, + MatchConsistency::new, + ValidateStructTypeInheritance::new, + ValidateFieldTypes::new, + SummaryDiskAccessValidator::new, + DisallowComplexMapAndWsetKeyTypes::new, + SortingSettings::new, + FieldSetSettings::new, + AddExtraFieldsToDocument::new, + PredicateProcessor::new, + MatchPhaseSettingsValidator::new, + DiversitySettingsValidator::new, + TensorFieldProcessor::new, + RankProfileTypeSettingsProcessor::new, + ReferenceFieldsProcessor::new, + FastAccessValidator::new, + ReservedFunctionNames::new, + OnnxModelConfigGenerator::new, + OnnxModelTypeResolver::new, + RankingExpressionTypeResolver::new, + BoolAttributeValidator::new, + PagedAttributeValidator::new, + // These should be last: + IndexingValidation::new, + IndexingValues::new); + } + + /** Processors of rank profiles only (those who tolerate and do something useful when the search field is null) */ + private Collection<ProcessorFactory> rankProfileProcessors() { + return Arrays.asList( + RankProfileTypeSettingsProcessor::new, + ReservedFunctionNames::new, + RankingExpressionTypeResolver::new); + } + + private void runProcessor(Processor processor, boolean validate, boolean documentsOnly) { + processor.process(validate, documentsOnly, properties); + } + + /** + * Runs all search processors on the given {@link Schema} object. These will modify the search object, <b>possibly + * exchanging it with another</b>, as well as its document types. + * + * @param schema the search to process + * @param deployLogger the log to log messages and warnings for application deployment to + * @param rankProfileRegistry a {@link com.yahoo.schema.RankProfileRegistry} + * @param queryProfiles the query profiles contained in the application this search is part of + * @param processorsToSkip a set of processor classes we should not invoke in this. Useful for testing. + */ + public void process(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles, boolean validate, boolean documentsOnly, + Set<Class<? extends Processor>> processorsToSkip) + { + Collection<ProcessorFactory> factories = processors(); + factories.stream() + .map(factory -> factory.create(schema, deployLogger, rankProfileRegistry, queryProfiles)) + .filter(processor -> ! processorsToSkip.contains(processor.getClass())) + .forEach(processor -> runProcessor(processor, validate, documentsOnly)); + } + + /** + * Runs rank profiles processors only. + * + * @param deployLogger the log to log messages and warnings for application deployment to + * @param rankProfileRegistry a {@link com.yahoo.schema.RankProfileRegistry} + * @param queryProfiles the query profiles contained in the application this search is part of + */ + public void processRankProfiles(DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles, boolean validate, boolean documentsOnly) { + Collection<ProcessorFactory> factories = rankProfileProcessors(); + factories.stream() + .map(factory -> factory.create(null, deployLogger, rankProfileRegistry, queryProfiles)) + .forEach(processor -> runProcessor(processor, validate, documentsOnly)); + } + + @FunctionalInterface + public interface ProcessorFactory { + Processor create(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/Processor.java b/config-model/src/main/java/com/yahoo/schema/processing/Processor.java new file mode 100644 index 00000000000..9768f33c27d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/Processor.java @@ -0,0 +1,157 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.schema.Index; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Iterator; +import java.util.List; +import java.util.logging.Level; + +/** + * Abstract superclass of all search definition processors. + * + * @author bratseth + */ +public abstract class Processor { + + protected final Schema schema; + protected final DeployLogger deployLogger; + protected final RankProfileRegistry rankProfileRegistry; + protected final QueryProfiles queryProfiles; + + /** + * Base constructor + * + * @param schema the search to process + * @param deployLogger Logger du use when logging deploy output. + * @param rankProfileRegistry Registry with all rank profiles, used for lookup and insertion. + * @param queryProfiles The query profiles contained in the application this search is part of. + */ + public Processor(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + this.schema = schema; + this.deployLogger = deployLogger; + this.rankProfileRegistry = rankProfileRegistry; + this.queryProfiles = queryProfiles; + } + + /** + * Processes the input search definition by <b>modifying</b> the input search and its documents, and returns the + * input search definition. + * + * @param validate true to throw exceptions on validation errors, false to make the best possible effort + * at completing processing without throwing an exception. + * If we are not validating, emitting warnings have no effect and can (but must not) be skipped. + * @param documentsOnly true to skip processing (including validation, regardless of the validate setting) + * of aspects not relating to document definitions (e.g rank profiles) + */ + public abstract void process(boolean validate, boolean documentsOnly); + + /** + * As above, possibly with properties from a context. Override if needed. + **/ + public void process(boolean validate, boolean documentsOnly, ModelContext.Properties properties) { + process(validate, documentsOnly); + } + + /** + * Convenience method for adding a no-strings-attached implementation field for a regular field + * + * @param schema the search definition in question + * @param field the field to add an implementation field for + * @param suffix the suffix of the added implementation field (without the underscore) + * @param indexing the indexing statement of the field + * @param queryCommand the query command of the original field, or null if none + * @return the implementation field which is added to the search + */ + protected SDField addField(Schema schema, SDField field, String suffix, String indexing, String queryCommand) { + SDField implementationField = schema.getConcreteField(field.getName() + "_" + suffix); + if (implementationField != null) { + deployLogger.logApplicationPackage(Level.WARNING, "Implementation field " + implementationField + " added twice"); + } else { + implementationField = new SDField(schema.getDocument(), field.getName() + "_" + suffix, DataType.STRING); + } + implementationField.setRankType(RankType.EMPTY); + implementationField.setStemming(Stemming.NONE); + implementationField.getNormalizing().inferCodepoint(); + implementationField.parseIndexingScript(indexing); + String indexName = field.getName(); + String implementationIndexName = indexName + "_" + suffix; + Index implementationIndex = new Index(implementationIndexName); + schema.addIndex(implementationIndex); + if (queryCommand != null) { + field.addQueryCommand(queryCommand); + } + schema.addExtraField(implementationField); + schema.fieldSets().addBuiltInFieldSetItem(BuiltInFieldSets.INTERNAL_FIELDSET_NAME, implementationField.getName()); + return implementationField; + } + + /** + * Returns an iterator of all the rank settings with given type in all the rank profiles in this search + * definition. + */ + protected Iterator<RankProfile.RankSetting> matchingRankSettingsIterator( + Schema schema, RankProfile.RankSetting.Type type) + { + List<RankProfile.RankSetting> someRankSettings = new java.util.ArrayList<>(); + + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + for (Iterator j = profile.declaredRankSettingIterator(); j.hasNext(); ) { + RankProfile.RankSetting setting = (RankProfile.RankSetting)j.next(); + if (setting.getType().equals(type)) { + someRankSettings.add(setting); + } + } + } + return someRankSettings.iterator(); + } + + protected String formatError(String schemaName, String fieldName, String msg) { + return "For schema '" + schemaName + "', field '" + fieldName + "': " + msg; + } + + protected RuntimeException newProcessException(String schemaName, String fieldName, String msg) { + return new IllegalArgumentException(formatError(schemaName, fieldName, msg)); + } + + protected RuntimeException newProcessException(Schema schema, Field field, String msg) { + return newProcessException(schema.getName(), field.getName(), msg); + } + + public void fail(Schema schema, Field field, String msg) { + throw newProcessException(schema, field, msg); + } + + protected void warn(String schemaName, String fieldName, String message) { + String fullMsg = formatError(schemaName, fieldName, message); + deployLogger.logApplicationPackage(Level.WARNING, fullMsg); + } + + protected void warn(Schema schema, Field field, String message) { + warn(schema.getName(), field.getName(), message); + } + + protected void info(String schemaName, String fieldName, String message) { + String fullMsg = formatError(schemaName, fieldName, message); + deployLogger.logApplicationPackage(Level.INFO, fullMsg); + } + + protected void info(Schema schema, Field field, String message) { + info(schema.getName(), field.getName(), message); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java new file mode 100644 index 00000000000..07f79f16334 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java @@ -0,0 +1,135 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.schema.MapEvaluationTypeContext; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.searchlib.rankingexpression.ExpressionFunction; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.evaluation.TypeContext; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.logging.Level; + +/** + * Resolves and assigns types to all functions in a ranking expression, and + * validates the types of all ranking expressions under a search instance: + * Some operators constrain the types of inputs, and first-and second-phase expressions + * must return scalar values. + * + * In addition, the existence of all referred attribute, query and constant + * features is ensured. + * + * @author bratseth + */ +public class RankingExpressionTypeResolver extends Processor { + + private final QueryProfileRegistry queryProfiles; + + public RankingExpressionTypeResolver(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + this.queryProfiles = queryProfiles.getRegistry(); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + + Set<Reference> warnedAbout = new HashSet<>(); + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + try { + resolveTypesIn(profile, validate, warnedAbout); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("In " + (schema != null ? schema + ", " : "") + profile, e); + } + } + } + + /** + * Resolves the types of all functions in the given profile + * + * @throws IllegalArgumentException if validate is true and the given rank profile does not produce valid types + */ + private void resolveTypesIn(RankProfile profile, boolean validate, Set<Reference> warnedAbout) { + MapEvaluationTypeContext context = profile.typeContext(queryProfiles); + for (Map.Entry<String, RankProfile.RankingExpressionFunction> function : profile.getFunctions().entrySet()) { + ExpressionFunction expressionFunction = function.getValue().function(); + if (hasUntypedArguments(expressionFunction)) continue; + + // Add any missing inputs for type resolution + for (String argument : expressionFunction.arguments()) { + Reference ref = Reference.fromIdentifier(argument); + if (context.getType(ref).equals(TensorType.empty)) { + context.setType(ref, expressionFunction.argumentTypes().get(argument)); + } + } + context.forgetResolvedTypes(); + + TensorType type = resolveType(expressionFunction.getBody(), "function '" + function.getKey() + "'", context); + function.getValue().setReturnType(type); + } + + if (validate) { + profile.getSummaryFeatures().forEach(f -> resolveType(f, "summary feature " + f, context)); + ensureValidDouble(profile.getFirstPhaseRanking(), "first-phase expression", context); + ensureValidDouble(profile.getSecondPhaseRanking(), "second-phase expression", context); + if ( ( context.tensorsAreUsed() || profile.isStrict()) + && ! context.queryFeaturesNotDeclared().isEmpty() + && ! warnedAbout.containsAll(context.queryFeaturesNotDeclared())) { + if (profile.isStrict()) + throw new IllegalArgumentException(profile + " is strict but is missing a query profile type " + + "declaration of features " + context.queryFeaturesNotDeclared()); + else + deployLogger.logApplicationPackage(Level.WARNING, "The following query features used in " + profile + + " are not declared in query profile " + + "types and will be interpreted as scalars, not tensors: " + + context.queryFeaturesNotDeclared()); + warnedAbout.addAll(context.queryFeaturesNotDeclared()); + } + } + } + + private boolean hasUntypedArguments(ExpressionFunction function) { + return function.arguments().size() > function.argumentTypes().size(); + } + + private TensorType resolveType(RankingExpression expression, String expressionDescription, TypeContext<Reference> context) { + if (expression == null) return null; + return resolveType(expression.getRoot(), expressionDescription, context); + } + + private TensorType resolveType(ExpressionNode expression, String expressionDescription, TypeContext<Reference> context) { + TensorType type; + try { + type = expression.type(context); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("The " + expressionDescription + " is invalid", e); + } + if (type == null) // Not expected to happen + throw new IllegalStateException("Could not determine the type produced by " + expressionDescription); + return type; + } + + private void ensureValidDouble(RankingExpression expression, String expressionDescription, TypeContext<Reference> context) { + if (expression == null) return; + TensorType type = resolveType(expression, expressionDescription, context); + if ( ! type.equals(TensorType.empty)) + throw new IllegalArgumentException("The " + expressionDescription + " must produce a double " + + "(a tensor with no dimensions), but produces " + type); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ReferenceFieldsProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/ReferenceFieldsProcessor.java new file mode 100644 index 00000000000..43e39b1e546 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ReferenceFieldsProcessor.java @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Class that processes reference fields and removes attribute aspect of such fields from summary. + * + * A document summary for a reference field should always be fetched from the document instance in back-end + * as the attribute vector does not store the original document id string. + * + * @author geirst + */ +public class ReferenceFieldsProcessor extends Processor { + + public ReferenceFieldsProcessor(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + clearSummaryAttributeAspectForConcreteFields(); + clearSummaryAttributeAspectForExplicitSummaryFields(); + } + + private void clearSummaryAttributeAspectForExplicitSummaryFields() { + for (DocumentSummary docSum : schema.getSummaries().values()) { + docSum.getSummaryFields().values().stream() + .filter(summaryField -> summaryField.getDataType() instanceof NewDocumentReferenceDataType) + .forEach(summaryField -> summaryField.setTransform(SummaryTransform.NONE)); + } + } + + private void clearSummaryAttributeAspectForConcreteFields() { + for (SDField field : schema.allConcreteFields()) { + if (field.getDataType() instanceof NewDocumentReferenceDataType) { + removeFromAttributePrefetchSummaryClass(field); + clearSummaryTransformOnSummaryFields(field); + } + } + } + + private void removeFromAttributePrefetchSummaryClass(SDField field) { + DocumentSummary summary = schema.getSummariesInThis().get("attributeprefetch"); + if (summary != null) { + summary.remove(field.getName()); + } + } + + private void clearSummaryTransformOnSummaryFields(SDField field) { + schema.getSummaryFields(field).forEach(summaryField -> summaryField.setTransform(SummaryTransform.NONE)); + } + +} + diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ReservedDocumentNames.java b/config-model/src/main/java/com/yahoo/schema/processing/ReservedDocumentNames.java new file mode 100644 index 00000000000..7eaf690d899 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ReservedDocumentNames.java @@ -0,0 +1,39 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashSet; +import java.util.Set; + +/** + * @author Simon Thoresen Hult + */ +public class ReservedDocumentNames extends Processor { + + private static final Set<String> RESERVED_NAMES = new HashSet<>(); + + static { + for (SDDocumentType dataType : SDDocumentType.VESPA_DOCUMENT.getTypes()) { + RESERVED_NAMES.add(dataType.getName()); + } + } + + public ReservedDocumentNames(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + String docName = schema.getDocument().getName(); + if (RESERVED_NAMES.contains(docName)) + throw new IllegalArgumentException("For " + schema + ": Document name '" + docName + "' is reserved."); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ReservedFunctionNames.java b/config-model/src/main/java/com/yahoo/schema/processing/ReservedFunctionNames.java new file mode 100644 index 00000000000..1ec4d5b58f2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ReservedFunctionNames.java @@ -0,0 +1,56 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.google.common.collect.ImmutableSet; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParserConstants; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Set; +import java.util.logging.Level; + +/** + * Issues a warning if some function has a reserved name. This is not necessarily + * an error, as a rank profile function can shadow a built-in function. + * + * @author lesters + */ +public class ReservedFunctionNames extends Processor { + + private static Set<String> reservedNames = getReservedNames(); + + public ReservedFunctionNames(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + if (documentsOnly) return; + + for (RankProfile rp : rankProfileRegistry.all()) { + for (String functionName : rp.getFunctions().keySet()) { + if (reservedNames.contains(functionName)) { + deployLogger.logApplicationPackage(Level.WARNING, "Function '" + functionName + "' " + + "in rank profile '" + rp.name() + "' " + + "has a reserved name. This might mean that the function shadows " + + "the built-in function with the same name." + ); + } + } + } + } + + private static ImmutableSet<String> getReservedNames() { + ImmutableSet.Builder<String> names = ImmutableSet.builder(); + for (String token : RankingExpressionParserConstants.tokenImage) { + String tokenWithoutQuotes = token.substring(1, token.length()-1); + names.add(tokenWithoutQuotes); + } + return names.build(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SearchMustHaveDocument.java b/config-model/src/main/java/com/yahoo/schema/processing/SearchMustHaveDocument.java new file mode 100644 index 00000000000..b90a5fdec98 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SearchMustHaveDocument.java @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * A search must have a document definition of the same name inside of it, otherwise crashes may occur as late as + * during feeding + * + * @author Vegard Havdal + */ +public class SearchMustHaveDocument extends Processor { + + public SearchMustHaveDocument(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + if (schema.getDocument() == null) + throw new IllegalArgumentException("For " + schema + + ": A search specification must have an equally named document inside of it."); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SetRankTypeEmptyOnFilters.java b/config-model/src/main/java/com/yahoo/schema/processing/SetRankTypeEmptyOnFilters.java new file mode 100644 index 00000000000..f84d6f19145 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SetRankTypeEmptyOnFilters.java @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * All rank: filter fields should have rank type empty. + * + * @author bratseth + */ +public class SetRankTypeEmptyOnFilters extends Processor { + + public SetRankTypeEmptyOnFilters(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getRanking().isFilter()) { + field.setRankType(RankType.EMPTY); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SortingSettings.java b/config-model/src/main/java/com/yahoo/schema/processing/SortingSettings.java new file mode 100644 index 00000000000..e0dfbab9780 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SortingSettings.java @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Sorting; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Validate conflicting settings for sorting + * + * @author Vegard Havdal + */ +public class SortingSettings extends Processor { + + public SortingSettings(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (SDField field : schema.allConcreteFields()) { + for (Attribute attribute : field.getAttributes().values()) { + Sorting sorting = attribute.getSorting(); + if (sorting.getFunction() != Sorting.Function.UCA) { + if (sorting.getStrength()!=null && sorting.getStrength() != Sorting.Strength.PRIMARY) { + warn(schema, field, "Sort strength only works for sort function 'uca'."); + } + if (sorting.getLocale() != null && ! "".equals(sorting.getLocale())) { + warn(schema, field, "Sort locale only works for sort function 'uca'."); + } + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/StringSettingsOnNonStringFields.java b/config-model/src/main/java/com/yahoo/schema/processing/StringSettingsOnNonStringFields.java new file mode 100644 index 00000000000..8ca0b595907 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/StringSettingsOnNonStringFields.java @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.NumericDataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +public class StringSettingsOnNonStringFields extends Processor { + + public StringSettingsOnNonStringFields(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (SDField field : schema.allConcreteFields()) { + if ( ! doCheck(field)) continue; + if (field.getMatching().isTypeUserSet()) { + warn(schema, field, "Matching type " + field.getMatching().getType() + + " is only allowed for string fields."); + } + if (field.getRanking().isLiteral()) { + warn(schema, field, "Rank type literal only applies to string fields"); + } + } + } + + private boolean doCheck(SDField field) { + if (field.getDataType() instanceof NumericDataType) return true; + if (field.getDataType() instanceof CollectionDataType) { + if (((CollectionDataType)field.getDataType()).getNestedType() instanceof NumericDataType) { + return true; + } + } + return false; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryConsistency.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryConsistency.java new file mode 100644 index 00000000000..4fb45c3c68f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryConsistency.java @@ -0,0 +1,131 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isComplexFieldWithOnlyStructFieldAttributes; + +/** + * Ensure that summary field transforms for fields having the same name + * are consistent across summary classes + * + * @author bratseth + */ +public class SummaryConsistency extends Processor { + + public SummaryConsistency(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (DocumentSummary summary : schema.getSummaries().values()) { + if (summary.getName().equals("default")) continue; + + for (SummaryField summaryField : summary.getSummaryFields().values()) { + assertConsistency(summaryField, schema, validate); + makeAttributeTransformIfAppropriate(summaryField, schema); + makeAttributeCombinerTransformIfAppropriate(summaryField, schema); + } + } + } + + private void assertConsistency(SummaryField summaryField, Schema schema, boolean validate) { + // Compare to default: + SummaryField existingDefault = schema.getSummariesInThis().get("default").getSummaryField(summaryField.getName()); + if (existingDefault != null) { + if (validate) + assertConsistentTypes(existingDefault, summaryField); + makeConsistentWithDefaultOrThrow(existingDefault, summaryField); + } + else { + // If no default, compare to whichever definition of the field + SummaryField existing = schema.getExplicitSummaryField(summaryField.getName()); + if (existing == null) return; + if (validate) + assertConsistentTypes(existing, summaryField); + makeConsistentOrThrow(existing, summaryField, schema); + } + } + + /** If the source is an attribute, make this use the attribute transform */ + private void makeAttributeTransformIfAppropriate(SummaryField summaryField, Schema schema) { + if (summaryField.getTransform() != SummaryTransform.NONE) return; + Attribute attribute = schema.getAttribute(summaryField.getSingleSource()); + if (attribute == null) return; + summaryField.setTransform(SummaryTransform.ATTRIBUTE); + } + + /** If the source is a complex field with only struct field attributes then make this use the attribute combiner transform */ + private void makeAttributeCombinerTransformIfAppropriate(SummaryField summaryField, Schema schema) { + if (summaryField.getTransform() == SummaryTransform.NONE) { + String source_field_name = summaryField.getSingleSource(); + ImmutableSDField source = schema.getField(source_field_name); + if (source != null && isComplexFieldWithOnlyStructFieldAttributes(source)) { + summaryField.setTransform(SummaryTransform.ATTRIBUTECOMBINER); + } + } + } + + private void assertConsistentTypes(SummaryField existing, SummaryField seen) { + if (existing.getDataType() instanceof WeightedSetDataType && seen.getDataType() instanceof WeightedSetDataType && + ((WeightedSetDataType)existing.getDataType()).getNestedType().equals(((WeightedSetDataType)seen.getDataType()).getNestedType())) + return; // Disregard create-if-nonexistent and create-if-zero distinction + if ( ! compatibleTypes(seen.getDataType(), existing.getDataType())) + throw new IllegalArgumentException(existing.toLocateString() + " is inconsistent with " + + seen.toLocateString() + ": All declarations of the same summary field must have the same type"); + } + + private boolean compatibleTypes(DataType summaryType, DataType existingType) { + if (summaryType instanceof TensorDataType && existingType instanceof TensorDataType) { + return summaryType.isAssignableFrom(existingType); // TODO: Just do this for all types + } + return summaryType.equals(existingType); + } + + private void makeConsistentOrThrow(SummaryField field1, SummaryField field2, Schema schema) { + if (field2.getTransform() == SummaryTransform.ATTRIBUTE && field1.getTransform() == SummaryTransform.NONE) { + Attribute attribute = schema.getAttribute(field1.getName()); + if (attribute != null) { + field1.setTransform(SummaryTransform.ATTRIBUTE); + } + } + + if (field2.getTransform().equals(SummaryTransform.NONE)) { + field2.setTransform(field1.getTransform()); + } + else { // New field sets an explicit transform - must be the same + assertEqualTransform(field1,field2); + } + } + + private void makeConsistentWithDefaultOrThrow(SummaryField defaultField, SummaryField newField) { + if (newField.getTransform().equals(SummaryTransform.NONE)) { + newField.setTransform(defaultField.getTransform()); + } + else { // New field sets an explicit transform - must be the same + assertEqualTransform(defaultField,newField); + } + } + + private void assertEqualTransform(SummaryField field1, SummaryField field2) { + if ( ! field2.getTransform().equals(field1.getTransform())) { + throw new IllegalArgumentException("Conflicting summary transforms. " + field2 + " is already defined as " + + field1 + ". A field with the same name " + + "can not have different transforms in different summary classes"); + } + } + + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryDiskAccessValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryDiskAccessValidator.java new file mode 100644 index 00000000000..40c38a350b0 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryDiskAccessValidator.java @@ -0,0 +1,73 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.SummaryClass; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Optional; +import java.util.logging.Level; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isComplexFieldWithOnlyStructFieldAttributes; + +/** + * Emits a warning for summaries which accesses disk. + * + * @author bratseth + */ +public class SummaryDiskAccessValidator extends Processor { + + public SummaryDiskAccessValidator(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + if (documentsOnly) return; + + for (DocumentSummary summary : schema.getSummaries().values()) { + for (SummaryField summaryField : summary.getSummaryFields().values()) { + for (SummaryField.Source source : summaryField.getSources()) { + ImmutableSDField field = schema.getField(source.getName()); + if (field == null) + field = findFieldProducingSummaryField(source.getName(), schema).orElse(null); + if (field == null && ! source.getName().equals(SummaryClass.DOCUMENT_ID_FIELD)) + throw new IllegalArgumentException(summaryField + " in " + summary + " references " + + source + ", but this field does not exist"); + if ( ! isInMemory(field, summaryField) && ! summary.isFromDisk()) { + deployLogger.logApplicationPackage(Level.WARNING, summaryField + " in " + summary + " references " + + source + ", which is not an attribute: Using this " + + "summary will cause disk accesses. " + + "Set 'from-disk' on this summary class to silence this warning."); + } + } + } + } + } + + private boolean isInMemory(ImmutableSDField field, SummaryField summaryField) { + if (field == null) return false; // For DOCUMENT_ID_FIELD, which may be implicit, but is then not in memory + if (isComplexFieldWithOnlyStructFieldAttributes(field) && + (summaryField.getTransform() == SummaryTransform.ATTRIBUTECOMBINER || + summaryField.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER)) { + return true; + } + return field.doesAttributing(); + } + + private Optional<ImmutableSDField> findFieldProducingSummaryField(String name, Schema schema) { + return schema.allFields().filter(field -> field.getSummaryFields().get(name) != null).findAny(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryDynamicStructsArrays.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryDynamicStructsArrays.java new file mode 100644 index 00000000000..ed1f47611eb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryDynamicStructsArrays.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.*; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Fail if: + * An SD field explicitly says summary:dynamic , but the field is wset, array or struct. + * If there is an explicitly defined summary class, saying dynamic in one of its summary + * fields is always legal. + * + * @author Vegard Havdal + */ +public class SummaryDynamicStructsArrays extends Processor { + + public SummaryDynamicStructsArrays(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (SDField field : schema.allConcreteFields()) { + DataType type = field.getDataType(); + if (type instanceof ArrayDataType || type instanceof WeightedSetDataType || type instanceof StructDataType) { + for (SummaryField sField : field.getSummaryFields().values()) { + if (sField.getTransform().equals(SummaryTransform.DYNAMICTEASER)) { + throw new IllegalArgumentException("For field '"+field.getName()+"': dynamic summary is illegal " + + "for fields of type struct, array or weighted set. Use an " + + "explicit summary class with explicit summary fields sourcing" + + " from the array/struct/weighted set."); + } + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryFieldsMustHaveValidSource.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryFieldsMustHaveValidSource.java new file mode 100644 index 00000000000..c8f201e2915 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryFieldsMustHaveValidSource.java @@ -0,0 +1,81 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.SummaryClass; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Verifies that the source fields actually refers to a valid field. + * + * @author baldersheim + */ +public class SummaryFieldsMustHaveValidSource extends Processor { + + SummaryFieldsMustHaveValidSource(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (DocumentSummary summary : schema.getSummaries().values()) { + for (SummaryField summaryField : summary.getSummaryFields().values()) { + if (summaryField.getSources().isEmpty()) { + if ((summaryField.getTransform() != SummaryTransform.RANKFEATURES) && + (summaryField.getTransform() != SummaryTransform.SUMMARYFEATURES)) + { + verifySource(summaryField.getName(), summaryField, summary); + } + } else if (summaryField.getSourceCount() == 1) { + verifySource(summaryField.getSingleSource(), summaryField, summary); + } else { + for (SummaryField.Source source : summaryField.getSources()) { + if ( ! source.getName().equals(summaryField.getName()) ) { + verifySource(source.getName(), summaryField, summary); + } + } + } + } + } + + } + + private boolean isValid(String source, SummaryField summaryField, DocumentSummary summary) { + return isDocumentField(source) || + (isNotInThisSummaryClass(summary, source) && isSummaryField(source)) || + (isInThisSummaryClass(summary, source) && !source.equals(summaryField.getName())) || + (SummaryClass.DOCUMENT_ID_FIELD.equals(source)); + } + + private void verifySource(String source, SummaryField summaryField, DocumentSummary summary) { + if ( ! isValid(source, summaryField, summary) ) { + throw new IllegalArgumentException("For " + schema + ", summary class '" + + summary.getName() + "'," + " summary field '" + summaryField.getName() + + "': there is no valid source '" + source + "'."); + } + } + + private static boolean isNotInThisSummaryClass(DocumentSummary summary, String name) { + return summary.getSummaryField(name) == null; + } + + private static boolean isInThisSummaryClass(DocumentSummary summary, String name) { + return summary.getSummaryField(name) != null; + } + + private boolean isDocumentField(String name) { + return schema.getField(name) != null; + } + + private boolean isSummaryField(String name) { + return schema.getSummaryField(name) != null; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryNamesFieldCollisions.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryNamesFieldCollisions.java new file mode 100644 index 00000000000..da5dfeb407b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryNamesFieldCollisions.java @@ -0,0 +1,60 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import java.util.HashMap; +import java.util.Map; + +import com.yahoo.collections.Pair; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryField.Source; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Verifies that equally named summary fields in different summary classes don't use different fields for source. + * The summarymap config doesn't model this. + * + * @author Vegard Havdal + */ +public class SummaryNamesFieldCollisions extends Processor { + + public SummaryNamesFieldCollisions(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + Map<String, Pair<String, String>> fieldToClassAndSource = new HashMap<>(); + for (DocumentSummary summary : schema.getSummaries().values()) { + if ("default".equals(summary.getName())) continue; + for (SummaryField summaryField : summary.getSummaryFields().values()) { + if (summaryField.isImplicit()) continue; + Pair<String, String> prevClassAndSource = fieldToClassAndSource.get(summaryField.getName()); + for (Source source : summaryField.getSources()) { + if (prevClassAndSource!=null) { + String prevClass = prevClassAndSource.getFirst(); + String prevSource = prevClassAndSource.getSecond(); + if ( ! prevClass.equals(summary.getName())) { + if ( ! prevSource.equals(source.getName())) { + throw new IllegalArgumentException("For " + schema + + ", summary class '" + summary.getName() + "'," + + " summary field '" + summaryField.getName() + "':" + + " Can not use source '" + source.getName() + + "' for this summary field, an equally named field in summary class '" + + prevClass + "' uses a different source: '" + prevSource + "'."); + } + } + } else { + fieldToClassAndSource.put(summaryField.getName(), new Pair<>(summary.getName(), source.getName())); + } + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TagType.java b/config-model/src/main/java/com/yahoo/schema/processing/TagType.java new file mode 100644 index 00000000000..f511d572bc6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/TagType.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.*; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * The implementation of the tag datatype + * + * @author bratseth + */ +public class TagType extends Processor { + + public TagType(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getDataType() instanceof WeightedSetDataType && ((WeightedSetDataType)field.getDataType()).isTag()) + implementTagType(field); + } + } + + private void implementTagType(SDField field) { + field.setDataType(DataType.getWeightedSet(DataType.STRING, true, true)); + // Don't set matching and ranking if this field is not attribute nor index + if (!field.doesIndexing() && !field.doesAttributing()) return; + Matching m = field.getMatching(); + if ( ! m.isTypeUserSet()) + m.setType(MatchType.WORD); + if (field.getRankType() == null || field.getRankType() == RankType.DEFAULT) + field.setRankType((RankType.TAGS)); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TensorFieldProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/TensorFieldProcessor.java new file mode 100644 index 00000000000..e0ce9917179 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/TensorFieldProcessor.java @@ -0,0 +1,118 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.HnswIndexParams; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Class that processes and validates tensor fields. + * + * @author geirst + */ +public class TensorFieldProcessor extends Processor { + + public TensorFieldProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (var field : schema.allConcreteFields()) { + if ( field.getDataType() instanceof TensorDataType ) { + if (validate) { + validateIndexingScripsForTensorField(field); + validateAttributeSettingForTensorField(field); + validateHnswIndexParametersRequiresIndexing(field); + } + processIndexSettingsForTensorField(field, validate); + } + else if (field.getDataType() instanceof CollectionDataType){ + if (validate) { + validateDataTypeForCollectionField(field); + } + } + } + } + + private void validateIndexingScripsForTensorField(SDField field) { + if (field.doesIndexing() && !isTensorTypeThatSupportsHnswIndex(field)) { + fail(schema, field, "A tensor of type '" + tensorTypeToString(field) + "' does not support having an 'index'. " + + "Currently, only tensors with 1 indexed dimension supports that."); + } + } + + private boolean isTensorTypeThatSupportsHnswIndex(ImmutableSDField field) { + var type = ((TensorDataType)field.getDataType()).getTensorType(); + // Tensors with 1 indexed dimension supports a hnsw index (used for approximate nearest neighbor search). + if ((type.dimensions().size() == 1) && + type.dimensions().get(0).isIndexed()) { + return true; + } + return false; + } + + private boolean isTensorTypeThatSupportsDirectStore(ImmutableSDField field) { + var type = ((TensorDataType)field.getDataType()).getTensorType(); + // Tensors with at least one mapped/sparse dimensions can be "direct" + // (currenty triggered by fast-search flag) + for (var dim : type.dimensions()) { + if (dim.isMapped()) { + return true; + } + } + return false; + } + + private String tensorTypeToString(ImmutableSDField field) { + return ((TensorDataType)field.getDataType()).getTensorType().toString(); + } + + private void validateAttributeSettingForTensorField(SDField field) { + if (field.doesAttributing()) { + var attribute = field.getAttributes().get(field.getName()); + if (attribute != null && attribute.isFastSearch()) { + if (! isTensorTypeThatSupportsDirectStore(field)) { + fail(schema, field, "An attribute of type 'tensor' cannot be 'fast-search'."); + } + } + } + } + + private void validateHnswIndexParametersRequiresIndexing(SDField field) { + var index = field.getIndex(field.getName()); + if (index != null && index.getHnswIndexParams().isPresent() && !field.doesIndexing()) { + fail(schema, field, "A tensor that specifies hnsw index parameters must also specify 'index' in 'indexing'"); + } + } + + private void processIndexSettingsForTensorField(SDField field, boolean validate) { + if (!field.doesIndexing()) { + return; + } + if (isTensorTypeThatSupportsHnswIndex(field)) { + if (validate && !field.doesAttributing()) { + fail(schema, field, "A tensor that has an index must also be an attribute."); + } + var index = field.getIndex(field.getName()); + // TODO: Calculate default params based on tensor dimension size + var params = new HnswIndexParams(); + if (index != null) { + params = params.overrideFrom(index.getHnswIndexParams()); + } + field.getAttribute().setHnswIndexParams(params); + } + } + + private void validateDataTypeForCollectionField(SDField field) { + if (((CollectionDataType)field.getDataType()).getNestedType() instanceof TensorDataType) + fail(schema, field, "A field with collection type of tensor is not supported. Use simple type 'tensor' instead."); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java new file mode 100644 index 00000000000..1783a3c7c63 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java @@ -0,0 +1,127 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; +import com.yahoo.vespa.indexinglanguage.expressions.TokenizeExpression; +import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Set; +import java.util.TreeSet; + +/** + * @author Simon Thoresen Hult + */ +public class TextMatch extends Processor { + + public TextMatch(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getMatching().getType() != MatchType.TEXT) continue; + + ScriptExpression script = field.getIndexingScript(); + if (script == null) continue; + + DataType fieldType = field.getDataType(); + if (fieldType instanceof CollectionDataType) { + fieldType = ((CollectionDataType)fieldType).getNestedType(); + } + if (fieldType != DataType.STRING) continue; + + Set<String> dynamicSummary = new TreeSet<>(); + Set<String> staticSummary = new TreeSet<>(); + new IndexingOutputs(schema, deployLogger, rankProfileRegistry, queryProfiles).findSummaryTo(schema, + field, + dynamicSummary, + staticSummary); + MyVisitor visitor = new MyVisitor(dynamicSummary); + visitor.visit(script); + if ( ! visitor.requiresTokenize) continue; + + ExpressionConverter converter = new MyStringTokenizer(schema, findAnnotatorConfig(schema, field)); + field.setIndexingScript((ScriptExpression)converter.convert(script)); + } + } + + private AnnotatorConfig findAnnotatorConfig(Schema schema, SDField field) { + AnnotatorConfig ret = new AnnotatorConfig(); + Stemming activeStemming = field.getStemming(); + if (activeStemming == null) { + activeStemming = schema.getStemming(); + } + ret.setStemMode(activeStemming.toStemMode()); + ret.setRemoveAccents(field.getNormalizing().doRemoveAccents()); + if ((field.getMatching() != null) && (field.getMatching().maxLength() != null)) { + ret.setMaxTokenLength(field.getMatching().maxLength()); + } + return ret; + } + + private static class MyVisitor extends ExpressionVisitor { + + final Set<String> dynamicSummaryFields; + boolean requiresTokenize = false; + + MyVisitor(Set<String> dynamicSummaryFields) { + this.dynamicSummaryFields = dynamicSummaryFields; + } + + @Override + protected void doVisit(Expression exp) { + if (exp instanceof IndexExpression) { + requiresTokenize = true; + } + if (exp instanceof SummaryExpression && + dynamicSummaryFields.contains(((SummaryExpression)exp).getFieldName())) + { + requiresTokenize = true; + } + } + + } + + private static class MyStringTokenizer extends TypedTransformProvider { + + final AnnotatorConfig annotatorCfg; + + MyStringTokenizer(Schema schema, AnnotatorConfig annotatorCfg) { + super(TokenizeExpression.class, schema); + this.annotatorCfg = annotatorCfg; + } + + @Override + protected boolean requiresTransform(Expression exp, DataType fieldType) { + return exp instanceof OutputExpression; + } + + @Override + protected Expression newTransform(DataType fieldType) { + Expression exp = new TokenizeExpression(null, annotatorCfg); + if (fieldType instanceof CollectionDataType) { + exp = new ForEachExpression(exp); + } + return exp; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java new file mode 100644 index 00000000000..1836cd631ad --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java @@ -0,0 +1,62 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.indexinglanguage.ValueTransformProvider; +import com.yahoo.vespa.indexinglanguage.expressions.*; + +/** + * @author Simon Thoresen Hult + */ +public abstract class TypedTransformProvider extends ValueTransformProvider { + + private final Schema schema; + private DataType fieldType; + + TypedTransformProvider(Class<? extends Expression> transformClass, Schema schema) { + super(transformClass); + this.schema = schema; + } + + @Override + protected final boolean requiresTransform(Expression exp) { + if (exp instanceof OutputExpression) { + String fieldName = ((OutputExpression)exp).getFieldName(); + if (exp instanceof AttributeExpression) { + Attribute attribute = schema.getAttribute(fieldName); + if (attribute == null) + throw new IllegalArgumentException("Attribute '" + fieldName + "' not found."); + fieldType = attribute.getDataType(); + } + else if (exp instanceof IndexExpression) { + Field field = schema.getConcreteField(fieldName); + if (field == null) + throw new IllegalArgumentException("Index field '" + fieldName + "' not found."); + fieldType = field.getDataType(); + } + else if (exp instanceof SummaryExpression) { + Field field = schema.getSummaryField(fieldName); + if (field == null) + throw new IllegalArgumentException("Summary field '" + fieldName + "' not found."); + fieldType = field.getDataType(); + } + else { + throw new UnsupportedOperationException(); + } + } + return requiresTransform(exp, fieldType); + } + + @Override + protected final Expression newTransform() { + return newTransform(fieldType); + } + + protected abstract boolean requiresTransform(Expression exp, DataType fieldType); + + protected abstract Expression newTransform(DataType fieldType); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java b/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java new file mode 100644 index 00000000000..a4773a42ed6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java @@ -0,0 +1,77 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Arrays; +import java.util.List; + +/** + * @author baldersheim + */ +public class UriHack extends Processor { + + private static final List<String> URL_SUFFIX = + Arrays.asList("scheme", "host", "port", "path", "query", "fragment", "hostname"); + + UriHack(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.doesIndexing()) { + DataType fieldType = field.getDataType(); + if (fieldType instanceof CollectionDataType) { + fieldType = ((CollectionDataType)fieldType).getNestedType(); + } + if (fieldType == DataType.URI) { + processField(schema, field); + } + } + } + } + + private void processField(Schema schema, SDField uriField) { + String uriName = uriField.getName(); + uriField.setStemming(Stemming.NONE); + DataType generatedType = DataType.STRING; + if (uriField.getDataType() instanceof ArrayDataType) { + generatedType = new ArrayDataType(DataType.STRING); + } + else if (uriField.getDataType() instanceof WeightedSetDataType) { + WeightedSetDataType wdt = (WeightedSetDataType) uriField.getDataType(); + generatedType = new WeightedSetDataType(DataType.STRING, wdt.createIfNonExistent(), wdt.removeIfZero()); + } + + for (String suffix : URL_SUFFIX) { + String partName = uriName + "." + suffix; + // I wonder if this is explicit in qrs or implicit in backend? + // search.addFieldSetItem(uriName, partName); + SDField partField = new SDField(schema.getDocument(), partName, generatedType); + partField.setIndexStructureField(uriField.doesIndexing()); + partField.setRankType(uriField.getRankType()); + partField.setStemming(Stemming.NONE); + partField.getNormalizing().inferLowercase(); + if (uriField.getIndex(suffix) != null) { + partField.addIndex(uriField.getIndex(suffix)); + } + schema.addExtraField(partField); + schema.fieldSets().addBuiltInFieldSetItem(BuiltInFieldSets.INTERNAL_FIELDSET_NAME, partField.getName()); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/UrlFieldValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/UrlFieldValidator.java new file mode 100644 index 00000000000..63d4a342c72 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/UrlFieldValidator.java @@ -0,0 +1,34 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * @author bratseth + */ +public class UrlFieldValidator extends Processor { + + public UrlFieldValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (ImmutableSDField field : schema.allConcreteFields()) { + if ( ! field.getDataType().equals(DataType.URI)) continue; + + if (field.doesAttributing()) + throw new IllegalArgumentException("Error in " + field + " in " + schema + ": " + + "uri type fields cannot be attributes"); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypes.java b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypes.java new file mode 100644 index 00000000000..2327cf4d9c9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypes.java @@ -0,0 +1,83 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashMap; +import java.util.Map; + +/** + * This Processor makes sure all fields with the same name have the same {@link DataType}. This check + * explicitly disregards whether a field is an index field, an attribute or a summary field. This is a requirement if we + * hope to move to a model where index fields, attributes and summary fields share a common field class. + * + * @author Simon Thoresen Hult + */ +public class ValidateFieldTypes extends Processor { + + public ValidateFieldTypes(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (!validate) return; + + String searchName = schema.getName(); + Map<String, DataType> seenFields = new HashMap<>(); + verifySearchAndDocFields(searchName, seenFields); + verifySummaryFields(searchName, seenFields); + } + + final protected void verifySearchAndDocFields(String searchName, Map<String, DataType> seenFields) { + schema.allFields().forEach(field -> { + checkFieldType(searchName, "index field", field.getName(), field.getDataType(), seenFields); + for (Map.Entry<String, Attribute> entry : field.getAttributes().entrySet()) { + checkFieldType(searchName, "attribute", entry.getKey(), entry.getValue().getDataType(), seenFields); + } + }); + + } + final protected void verifySummaryFields(String searchName, Map<String, DataType> seenFields) { + for (DocumentSummary summary : schema.getSummaries().values()) { + for (SummaryField field : summary.getSummaryFields().values()) { + checkFieldType(searchName, "summary field", field.getName(), field.getDataType(), seenFields); + } + } + } + + private void checkFieldType(String searchName, String fieldDesc, String fieldName, DataType fieldType, + Map<String, DataType> seenFields) { + DataType seenType = seenFields.get(fieldName); + if (seenType == null) { + seenFields.put(fieldName, fieldType); + } else if ( ! compatibleTypes(seenType, fieldType)) { + throw newProcessException(searchName, fieldName, "Incompatible types. Expected " + + seenType.getName() + " for " + fieldDesc + + " '" + fieldName + "', got " + fieldType.getName() + "."); + } + } + + private static boolean compatibleTypes(DataType seenType, DataType fieldType) { + // legacy tag field type compatibility; probably not needed any more (Oct 2016) + if ("tag".equals(seenType.getName())) { + return "tag".equals(fieldType.getName()) || "WeightedSet<string>".equals(fieldType.getName()); + } + if ("tag".equals(fieldType.getName())) { + return "tag".equals(seenType.getName()) || "WeightedSet<string>".equals(seenType.getName()); + } + if (seenType instanceof TensorDataType && fieldType instanceof TensorDataType) { + return fieldType.isAssignableFrom(seenType); // TODO: Just do this for all types + } + return seenType.equals(fieldType); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypesDocumentsOnly.java b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypesDocumentsOnly.java new file mode 100644 index 00000000000..08771b40fe9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypesDocumentsOnly.java @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashMap; +import java.util.Map; + +public class ValidateFieldTypesDocumentsOnly extends ValidateFieldTypes { + public ValidateFieldTypesDocumentsOnly(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + String searchName = schema.getName(); + Map<String, DataType> seenFields = new HashMap<>(); + verifySearchAndDocFields(searchName, seenFields); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldWithIndexSettingsCreatesIndex.java b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldWithIndexSettingsCreatesIndex.java new file mode 100644 index 00000000000..5423defa74a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldWithIndexSettingsCreatesIndex.java @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.Ranking; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Check that fields with index settings actually creates an index or attribute + * + * @author bratseth + */ +public class ValidateFieldWithIndexSettingsCreatesIndex extends Processor { + + public ValidateFieldWithIndexSettingsCreatesIndex(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + Matching defaultMatching = new Matching(); + Ranking defaultRanking = new Ranking(); + for (SDField field : schema.allConcreteFields()) { + if (field.doesIndexing()) continue; + if (field.doesAttributing()) continue; + + if ( ! field.getRanking().equals(defaultRanking)) + fail(schema, field, + "Fields which are not creating an index or attribute can not contain rank settings."); + if ( ! field.getMatching().equals(defaultMatching)) + fail(schema, field, + "Fields which are not creating an index or attribute can not contain match settings."); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ValidateStructTypeInheritance.java b/config-model/src/main/java/com/yahoo/schema/processing/ValidateStructTypeInheritance.java new file mode 100644 index 00000000000..cad555a24b1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ValidateStructTypeInheritance.java @@ -0,0 +1,71 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.schema.Schema; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import com.yahoo.document.Field; +import com.yahoo.schema.document.SDDocumentType; + +import java.util.ArrayList; +import java.util.HashSet; + +/** + * @author arnej + */ +public class ValidateStructTypeInheritance extends Processor { + + public ValidateStructTypeInheritance(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (!validate) return; + verifyNoRedeclarations(schema.getDocument()); + } + + void fail(Field field, String message) { + throw newProcessException(schema, field, message); + } + + void verifyNoRedeclarations(SDDocumentType docType) { + for (SDDocumentType type : docType.allTypes().values()) { + if (type.isStruct()) { + var inheritedTypes = new ArrayList<SDDocumentType>(type.getInheritedTypes()); + for (int i = 0; i < inheritedTypes.size(); i++) { + SDDocumentType inherit = inheritedTypes.get(i); + for (var extra : inherit.getInheritedTypes()) { + if (! inheritedTypes.contains(extra)) { + inheritedTypes.add(extra); + } + } + } + if (inheritedTypes.isEmpty()) continue; + var seenFieldNames = new HashSet<>(); + for (var field : type.getDocumentType().contentStruct().getFieldsThisTypeOnly()) { + if (seenFieldNames.contains(field.getName())) { + // cannot happen? + fail(field, "struct "+type.getName()+" has multiple fields with same name: "+field.getName()); + } + seenFieldNames.add(field.getName()); + } + for (SDDocumentType inherit : inheritedTypes) { + if (inherit.isStruct()) { + for (var field : inherit.getDocumentType().contentStruct().getFieldsThisTypeOnly()) { + if (seenFieldNames.contains(field.getName())) { + fail(field, "struct "+type.getName()+" cannot inherit from "+inherit.getName()+" and redeclare field "+field.getName()); + } + seenFieldNames.add(field.getName()); + } + } else { + fail(new Field("no field"), "struct cannot inherit from non-struct "+inherit.getName()+" class "+inherit.getClass()); + } + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java new file mode 100644 index 00000000000..1e312b71afd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * The implementation of word matching - with word matching the field is assumed to contain a single "word" - some + * contiguous sequence of word and number characters - but without changing the data at the indexing side (as with text + * matching) to enforce this. Word matching is thus almost like exact matching on the indexing side (no action taken), + * and like text matching on the query side. This may be suitable for attributes, where people both expect the data to + * be left as in the input document, and trivially written queries to work by default. However, this may easily lead to + * data which cannot be matched at all as the indexing and query side does not agree. + * + * @author bratseth + */ +public class WordMatch extends Processor { + + public WordMatch(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + processFieldRecursive(field); + } + } + + private void processFieldRecursive(SDField field) { + processField(field); + for (SDField structField : field.getStructFields()) { + processField(structField); + } + } + + private void processField(SDField field) { + if (!field.getMatching().getType().equals(MatchType.WORD)) { + return; + } + field.setStemming(Stemming.NONE); + field.getNormalizing().inferLowercase(); + field.addQueryCommand("word"); + } + + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/IndexCommandResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/IndexCommandResolver.java new file mode 100644 index 00000000000..565a377f2a9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/IndexCommandResolver.java @@ -0,0 +1,62 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Level; + +/** + * Resolver-class for harmonizing index-commands in multifield indexes + */ +public class IndexCommandResolver extends MultiFieldResolver { + + /** Commands which don't have to be harmonized between fields */ + private static List<String> ignoredCommands = new ArrayList<>(); + + /** Commands which must be harmonized between fields */ + private static List<String> harmonizedCommands = new ArrayList<>(); + + static { + String[] ignore = { "complete-boost", "literal-boost", "highlight" }; + ignoredCommands.addAll(Arrays.asList(ignore)); + String[] harmonize = { "stemming", "normalizing" }; + harmonizedCommands.addAll(Arrays.asList(harmonize)); + } + + public IndexCommandResolver(String indexName, List<SDField> fields, Schema schema, DeployLogger logger) { + super(indexName, fields, schema, logger); + } + + /** + * Check index-commands for each field, report and attempt to fix any + * inconsistencies + */ + public void resolve() { + for (SDField field : fields) { + for (String command : field.getQueryCommands()) { + if (!ignoredCommands.contains(command)) + checkCommand(command); + } + } + } + + private void checkCommand(String command) { + for (SDField field : fields) { + if (!field.hasQueryCommand(command)) { + if (harmonizedCommands.contains(command)) { + deployLogger.logApplicationPackage(Level.WARNING, command + " must be added to all fields going to the same index (" + indexName + ")" + + ", adding to field " + field.getName()); + field.addQueryCommand(command); + } else { + deployLogger.logApplicationPackage(Level.WARNING, "All fields going to the same index should have the same query-commands. Field \'" + field.getName() + + "\' doesn't contain command \'" + command+"\'"); + } + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/MultiFieldResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/MultiFieldResolver.java new file mode 100644 index 00000000000..ed8ad61706b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/MultiFieldResolver.java @@ -0,0 +1,33 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; +import java.util.List; + +/** + * Abstract superclass of all multifield conflict resolvers + */ +public abstract class MultiFieldResolver { + + protected String indexName; + protected List<SDField> fields; + protected Schema schema; + + protected DeployLogger deployLogger; + + public MultiFieldResolver(String indexName, List<SDField> fields, Schema schema, DeployLogger logger) { + this.indexName = indexName; + this.fields = fields; + this.schema = schema; + this.deployLogger = logger; + } + + /** + * Checks the list of fields for specific conflicts, and reports and/or + * attempts to correct them + */ + public abstract void resolve(); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankProfileTypeSettingsProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankProfileTypeSettingsProcessor.java new file mode 100644 index 00000000000..3d79ac7d68a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankProfileTypeSettingsProcessor.java @@ -0,0 +1,105 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.FieldType; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.profile.types.TensorFieldType; +import com.yahoo.schema.FeatureNames; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.ImportedField; +import com.yahoo.schema.document.ImportedFields; +import com.yahoo.schema.processing.Processor; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Map; +import java.util.Optional; + +/** + * This processes a schema and adds input type settings on all rank profiles. + * + * Currently, type settings are limited to the type of tensor attribute fields and tensor query features. + * + * @author geirst + */ +public class RankProfileTypeSettingsProcessor extends Processor { + + public RankProfileTypeSettingsProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + + processAttributeFields(); + processImportedFields(); + processQueryProfileTypes(); + } + + private void processAttributeFields() { + if (schema == null) return; // we're processing global profiles + for (ImmutableSDField field : schema.allConcreteFields()) { + Attribute attribute = field.getAttributes().get(field.getName()); + if (attribute != null && attribute.tensorType().isPresent()) { + addAttributeTypeToRankProfiles(attribute.getName(), attribute.tensorType().get().toString()); + } + } + } + + private void processImportedFields() { + if (schema == null) return; // we're processing global profiles + Optional<ImportedFields> importedFields = schema.importedFields(); + if (importedFields.isPresent()) { + importedFields.get().fields().forEach((fieldName, field) -> processImportedField(field)); + } + } + + private void processImportedField(ImportedField field) { + ImmutableSDField targetField = field.targetField(); + Attribute attribute = targetField.getAttributes().get(targetField.getName()); + if (attribute != null && attribute.tensorType().isPresent()) { + addAttributeTypeToRankProfiles(field.fieldName(), attribute.tensorType().get().toString()); + } + } + + private void addAttributeTypeToRankProfiles(String attributeName, String attributeType) { + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + profile.addAttributeType(attributeName, attributeType); + } + } + + private void processQueryProfileTypes() { + for (QueryProfileType queryProfileType : queryProfiles.getRegistry().getTypeRegistry().allComponents()) { + for (Map.Entry<String, FieldDescription> fieldDescEntry : queryProfileType.fields().entrySet()) { + processFieldDescription(fieldDescEntry.getValue()); + } + } + } + + private void processFieldDescription(FieldDescription fieldDescription) { + FieldType fieldType = fieldDescription.getType(); + if (fieldType instanceof TensorFieldType) { + TensorFieldType tensorFieldType = (TensorFieldType)fieldType; + Optional<Reference> reference = Reference.simple(fieldDescription.getName()); + if (reference.isPresent() && FeatureNames.isQueryFeature(reference.get())) + addQueryFeatureTypeToRankProfiles(reference.get(), tensorFieldType.asTensorType()); + } + } + + private void addQueryFeatureTypeToRankProfiles(Reference queryFeature, TensorType queryFeatureType) { + for (RankProfile profile : rankProfileRegistry.all()) { + if (! profile.inputs().containsKey(queryFeature)) // declared inputs have precedence + profile.addInput(queryFeature, + new RankProfile.Input(queryFeature, queryFeatureType, Optional.empty())); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankTypeResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankTypeResolver.java new file mode 100644 index 00000000000..6424fd8ba06 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankTypeResolver.java @@ -0,0 +1,46 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; + +import java.util.List; +import java.util.logging.Level; + +/** + * Checks if fields have defined different rank types for the same + * index (typically in an index-to statement), and if they have + * output a warning and use the first ranktype. + * + * @author hmusum + */ +public class RankTypeResolver extends MultiFieldResolver { + + public RankTypeResolver(String indexName, List<SDField> fields, Schema schema, DeployLogger logger) { + super(indexName, fields, schema, logger); + } + + public void resolve() { + RankType rankType = null; + if (fields.size() > 0) { + boolean first = true; + for (SDField field : fields) { + if (first) { + rankType = fields.get(0).getRankType(); + first = false; + } else if (!field.getRankType().equals(rankType)) { + deployLogger.logApplicationPackage(Level.WARNING, "In field '" + field.getName() + "' " + + field.getRankType() + " for index '" + indexName + + "' conflicts with " + rankType + + " defined for the same index in field '" + + field.getName() + "'. Using " + + rankType + "."); + field.setRankType(rankType); + } + } + } + } +} + diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/StemmingResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/StemmingResolver.java new file mode 100644 index 00000000000..95d9a50a6ab --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/StemmingResolver.java @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; + +import java.util.List; +import java.util.logging.Level; + +/** + * Class resolving conflicts when fields with different stemming-settings are + * combined into the same index + */ +public class StemmingResolver extends MultiFieldResolver { + + public StemmingResolver(String indexName, List<SDField> fields, Schema schema, DeployLogger logger) { + super(indexName, fields, schema, logger); + } + + @Override + public void resolve() { + checkStemmingForIndexFields(indexName, fields); + } + + private void checkStemmingForIndexFields(String indexName, List<SDField> fields) { + Stemming stemming = null; + SDField stemmingField = null; + for (SDField field : fields) { + if (stemming == null && stemmingField==null) { + stemming = field.getStemming(schema); + stemmingField = field; + } else if (stemming != field.getStemming(schema)) { + deployLogger.logApplicationPackage(Level.WARNING, "Field '" + field.getName() + "' has " + field.getStemming(schema) + + ", whereas field '" + stemmingField.getName() + "' has " + stemming + + ". All fields indexing to the index '" + indexName + "' must have the same stemming." + + " This should be corrected as it will make indexing fail in a few cases."); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/package-info.java b/config-model/src/main/java/com/yahoo/schema/processing/package-info.java new file mode 100644 index 00000000000..e81d50897ac --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/package-info.java @@ -0,0 +1,14 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Classes in this package (processors) implements some search + * definition features by reducing them to simpler features. + * The processors are run after parsing of the search definition, + * before creating the derived model. + * + * For simplicity, features should always be implemented here + * rather than in the derived model if possible. + * + * New processors must be added to the list in Processing. + */ +@com.yahoo.api.annotations.PackageMarker +package com.yahoo.schema.processing; |