// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.schema; import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.application.api.FileRegistry; import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.application.provider.BaseDeployLogger; import com.yahoo.config.model.deploy.TestProperties; import com.yahoo.document.DataTypeName; import com.yahoo.document.Field; import com.yahoo.schema.derived.SummaryClass; import com.yahoo.schema.document.Attribute; import com.yahoo.schema.document.ImmutableSDField; import com.yahoo.schema.document.ImportedField; import com.yahoo.schema.document.ImportedFields; import com.yahoo.schema.document.SDDocumentType; import com.yahoo.schema.document.SDField; import com.yahoo.schema.document.Stemming; import com.yahoo.schema.document.TemporaryImportedFields; import com.yahoo.schema.document.annotation.SDAnnotationType; import com.yahoo.searchlib.rankingexpression.Reference; import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.documentmodel.SummaryField; import java.io.Reader; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.TreeMap; import java.util.logging.Level; import java.util.stream.Stream; /** * A schema contains a document type, additional fields, rank profiles and document summaries. * * @author bratseth */ // TODO: Make a class owned by this, for each of these responsibilities: // Managing indexes, managing attributes, managing summary classes. // Ensure that after the processing step, all implicit instances of the above types are explicitly represented public class Schema implements ImmutableSchema { private static final String SD_DOC_FIELD_NAME = "sddocname"; private static final List RESERVED_NAMES = List.of( "index", "index_url", "summary", "attribute", "select_input", "host", SummaryClass.DOCUMENT_ID_FIELD, "position", "split_foreach", "tokenize", "if", "else", "switch", "case", SD_DOC_FIELD_NAME, "relevancy"); /** The unique name of this schema */ private String name; /** The application package this is constructed from */ private final ApplicationPackage applicationPackage; /** The name of the schema this should inherit all the content of, if any */ private final Optional inherited; /** True if this doesn't define a search, just a document type */ private final boolean documentsOnly; private Boolean rawAsBase64 = null; /** The stemming setting of this schema. Default is BEST. */ private Stemming stemming = null; private final FieldSets fieldSets = new FieldSets(Optional.of(this)); /** The document contained in this schema */ private SDDocumentType documentType; /** The extra fields of this schema */ private final Map fields = new LinkedHashMap<>(); private final Map indices = new LinkedHashMap<>(); /** The explicitly defined summaries of this schema. _Must_ preserve order. */ private final Map summaries = new LinkedHashMap<>(); /** External ranking expression files of this */ private final LargeRankingExpressions largeRankingExpressions; /** Constants that will be available in all rank profiles. */ // TODO: Remove on Vespa 9: Should always be in a rank profile private final Map constants = new LinkedHashMap<>(); // TODO: Remove on Vespa 9: Should always be in a rank profile private final Map onnxModels = new LinkedHashMap<>(); /** All imported fields of this (and parent schemas) */ // TODO: Use empty, not optional // TODO: Merge this and importedFields private final Optional temporaryImportedFields = Optional.of(new TemporaryImportedFields(this)); /** The resulting processed field */ private Optional importedFields = Optional.empty(); private final DeployLogger deployLogger; private final ModelContext.Properties properties; private Application owner; /** Testing only */ public Schema(String name, ApplicationPackage applicationPackage) { this(name, applicationPackage, Optional.empty(), null, new BaseDeployLogger(), new TestProperties()); } public Schema(String name, ApplicationPackage applicationPackage, FileRegistry fileRegistry, DeployLogger deployLogger, ModelContext.Properties properties) { this(name, applicationPackage, Optional.empty(), fileRegistry, deployLogger, properties); } /** * Creates a schema * * @param name of the schema * @param inherited the schema this inherits, if any */ public Schema(String name, ApplicationPackage applicationPackage, Optional inherited, FileRegistry fileRegistry, DeployLogger deployLogger, ModelContext.Properties properties) { this(inherited, applicationPackage, fileRegistry, deployLogger, properties, false); this.name = Objects.requireNonNull(name, "A schema must have a name"); } protected Schema(ApplicationPackage applicationPackage, FileRegistry fileRegistry, DeployLogger deployLogger, ModelContext.Properties properties) { this(Optional.empty(), applicationPackage, fileRegistry, deployLogger, properties, true); } private Schema(Optional inherited, ApplicationPackage applicationPackage, FileRegistry fileRegistry, DeployLogger deployLogger, ModelContext.Properties properties, boolean documentsOnly) { this.inherited = inherited; this.applicationPackage = applicationPackage; this.deployLogger = deployLogger; this.properties = properties; this.documentsOnly = documentsOnly; largeRankingExpressions = new LargeRankingExpressions(fileRegistry); } /** * Assigns the owner of this * * @throws IllegalStateException if an owner is already assigned */ public void setOwner(Application owner) { if (this.owner != null) throw new IllegalStateException("Cannot reassign the owner of " + this); this.owner = owner; } protected void setName(String name) { this.name = name; } @Override public String getName() {return name; } /** Returns true if this only defines a document type, not a full schema */ public boolean isDocumentsOnly() { return documentsOnly; } @Override public Optional inherited() { return inherited.map(name -> owner.schemas().get(name)); } /** * Returns true if 'raw' fields shall be presented as base64 in summary * Note that this is temporary and will disappear on Vespa 8 as it will become default, and only option. * * @return true if raw shall be encoded as base64 in summary */ public boolean isRawAsBase64() { if (rawAsBase64 != null) return rawAsBase64; if (inherited.isEmpty()) return true; return requireInherited().isRawAsBase64(); } public void enableRawAsBase64(boolean value) { rawAsBase64 = value; } /** * Sets the stemming default of fields. Default is ALL * * @param stemming set default stemming for this searchdefinition * @throws NullPointerException if this is attempted set to null */ public void setStemming(Stemming stemming) { this.stemming = Objects.requireNonNull(stemming, "Stemming cannot be null"); } /** Returns whether fields should be stemmed by default or not. Default is BEST. This is never null. */ public Stemming getStemming() { if (stemming != null) return stemming; if (inherited.isEmpty()) return Stemming.BEST; return requireInherited().getStemming(); } /** * Adds a document type which is defined in this search definition * * @param document the document type to add */ public void addDocument(SDDocumentType document) { if (documentType != null) { throw new IllegalArgumentException("Schema cannot have more than one document"); } documentType = document; } @Override public LargeRankingExpressions rankExpressionFiles() { return largeRankingExpressions; } public void add(RankProfile.Constant constant) { constants.put(constant.name(), constant); } /** Returns an unmodifiable map of the constants declared in this. */ public Map declaredConstants() { return constants; } /** Returns an unmodifiable map of the constants available in this. */ @Override public Map constants() { if (inherited().isEmpty()) return Collections.unmodifiableMap(constants); if (constants.isEmpty()) return inherited().get().constants(); Map allConstants = new LinkedHashMap<>(inherited().get().constants()); allConstants.putAll(constants); return allConstants; } public void add(OnnxModel model) { onnxModels.put(model.getName(), model); } /** Returns an unmodifiable map of the onnx models declared in this. */ public Map declaredOnnxModels() { return onnxModels; } /** Returns an unmodifiable map of the onnx models available in this. */ @Override public Map onnxModels() { if (inherited().isEmpty()) return Collections.unmodifiableMap(onnxModels); if (onnxModels.isEmpty()) return inherited().get().onnxModels(); Map allModels = new LinkedHashMap<>(inherited().get().onnxModels()); allModels.putAll(onnxModels); return allModels; } public Optional temporaryImportedFields() { return temporaryImportedFields; } public Optional importedFields() { return importedFields; } public void setImportedFields(ImportedFields importedFields) { this.importedFields = Optional.of(importedFields); } @Override public Stream allImportedFields() { return importedFields .map(fields -> fields.fields().values().stream()) .orElse(Stream.empty()) .map(field -> field.asImmutableSDField()); } @Override public ImmutableSDField getField(String name) { ImmutableSDField field = getConcreteField(name); if (field != null) return field; return allImportedFields() .filter(f -> f.getName().equals(name)) .findFirst() .orElse(null); } @Override public List allFieldsList() { List all = new ArrayList<>(); all.addAll(extraFieldList()); for (Field field : documentType.fieldSet()) { all.add((ImmutableSDField) field); } if (importedFields.isPresent()) { for (ImportedField imported : importedFields.get().fields().values()) { all.add(imported.asImmutableSDField()); } } return all; } /** * Gets a document from this search definition * * @param name the name of the document to return * @return the contained or used document type, or null if there is no such document */ public SDDocumentType getDocument(String name) { if (documentType != null && name.equals(documentType.getName())) { return documentType; } return null; } /** Returns true if the document has been added. */ public boolean hasDocument() { return documentType != null; } /** Returns the document in this search. */ @Override public SDDocumentType getDocument() { return documentType; } /** * Returns a list of all the fields of this search definition, that is all fields in all documents, in the documents * they inherit, and all extra fields. The caller receives ownership to the list - subsequent changes to it will not * impact this */ @Override public List allConcreteFields() { List allFields = new ArrayList<>(); allFields.addAll(extraFieldList()); for (Field field : documentType.fieldSet()) { allFields.add((SDField)field); } return allFields; } /** * Returns the content of a ranking expression file */ @Override public Reader getRankingExpression(String fileName) { return applicationPackage.getRankingExpression(fileName); } public Application application() { return owner; } @Override public ApplicationPackage applicationPackage() { return applicationPackage; } @Override public DeployLogger getDeployLogger() { return deployLogger; } @Override public ModelContext.Properties getDeployProperties() { return properties; } /** * Returns a field defined in this search definition or one if its documents. Fields in this search definition takes * precedence over document fields having the same name * * @param name of the field * @return the SDField representing the field */ @Override public SDField getConcreteField(String name) { SDField field = getExtraField(name); if (field != null) return field; return (SDField) documentType.getField(name); } /** * Returns a field defined in one of the documents of this schema. * This does not include the extra fields defined outside the document * (those accessible through the getExtraField() method). * * @param name the name of the field to return * @return the named field, or null if not found */ public SDField getDocumentField(String name) { return (SDField) documentType.getField(name); } /** * Adds an extra field of this search definition not contained in a document * * @param field to add to the schemas list of external fields */ public void addExtraField(SDField field) { if (fields.containsKey(field.getName())) { deployLogger.logApplicationPackage(Level.WARNING, "Duplicate field " + field.getName() + " in search definition " + getName()); } else { field.setIsExtraField(true); fields.put(field.getName(), field); } } public Collection extraFieldList() { if (inherited.isEmpty()) return fields.values(); var fields = new HashSet<>(requireInherited().extraFieldList()); fields.addAll(this.fields.values()); return fields; } public Collection allExtraFields() { Map extraFields = new TreeMap<>(); if (inherited.isPresent()) requireInherited().allExtraFields().forEach(field -> extraFields.put(field.getName(), field)); for (Field field : documentType.fieldSet()) { SDField sdField = (SDField) field; if (sdField.isExtraField()) { extraFields.put(sdField.getName(), sdField); } } for (SDField field : extraFieldList()) { extraFields.put(field.getName(), field); } return extraFields.values(); } /** * Returns a field by name, or null if it is not present * * @param fieldName the name of the external field to get * @return the SDField of this name */ public SDField getExtraField(String fieldName) { SDField field = fields.get(fieldName); if (field != null) return field; if (inherited.isEmpty()) return null; return requireInherited().getExtraField(fieldName); } /** * Adds an explicitly defined index to this search definition * * @param index the index to add */ public void addIndex(Index index) { indices.put(index.getName(), index); } /** * Returns an index, or null if no index with this name has had some explicit settings applied. Even if * this returns null, the index may be implicitly defined by an indexing statement. This will return the * index whether it is defined on this schema or on one of its fields. * * @param name the name of the index to get * @return the index requested */ @Override public Index getIndex(String name) { List sameIndices = new ArrayList<>(1); getSchemaIndex(name).ifPresent(sameIndices::add); for (ImmutableSDField field : allConcreteFields()) { if (field.getIndex(name) != null) sameIndices.add(field.getIndex(name)); } if (sameIndices.size() == 0) return null; if (sameIndices.size() == 1) return sameIndices.get(0); return consolidateIndices(sameIndices); } /** Returns the schema level index of this name, in this or any inherited schema, if any */ Optional getSchemaIndex(String name) { if (indices.containsKey(name)) return Optional.of(indices.get(name)); if (inherited.isPresent()) return requireInherited().getSchemaIndex(name); return Optional.empty(); } public boolean existsIndex(String name) { if (indices.get(name) != null) return true; if (inherited.isPresent() && requireInherited().existsIndex(name)) return true; for (ImmutableSDField field : allConcreteFields()) { if (field.existsIndex(name)) return true; } return false; } /** * Consolidates a set of index settings for the same index into one * * @param indices the list of indexes to consolidate * @return the consolidated index */ private Index consolidateIndices(List indices) { Index first = indices.get(0); Index consolidated = new Index(first.getName()); consolidated.setRankType(first.getRankType()); consolidated.setType(first.getType()); for (Index current : indices) { if (current.isPrefix()) { consolidated.setPrefix(true); } if (current.useInterleavedFeatures()) { consolidated.setInterleavedFeatures(true); } if (consolidated.getRankType() == null) { consolidated.setRankType(current.getRankType()); } else { if (current.getRankType() != null && consolidated.getRankType() != current.getRankType()) deployLogger.logApplicationPackage(Level.WARNING, "Conflicting rank type settings for " + first.getName() + " in " + this + ", using " + consolidated.getRankType()); } for (Iterator j = current.aliasIterator(); j.hasNext();) { consolidated.addAlias(j.next()); } } return consolidated; } /** All explicitly defined indices, both on this schema itself (returned first) and all its fields */ @Override public List getExplicitIndices() { List allIndices = new ArrayList<>(indices.values()); if (inherited.isPresent()) { for (Index inheritedIndex : requireInherited().getExplicitIndices()) { if ( ! indices.containsKey(inheritedIndex.getName())) // child redefinitions shadows parents allIndices.add(inheritedIndex); } } for (ImmutableSDField field : allConcreteFields()) allIndices.addAll(field.getIndices().values()); return Collections.unmodifiableList(allIndices); } /** Adds an explicitly defined summary to this search definition */ public void addSummary(DocumentSummary summary) { summaries.put(summary.getName(), summary); } /** * Returns a summary class defined by this search definition, or null if no summary with this name is defined. * The default summary, named "default" is always present. */ public DocumentSummary getSummary(String name) { var summary = summaries.get(name); if (summary != null) return summary; if (inherited.isEmpty()) return null; return requireInherited().getSummary(name); } /** * Returns the first explicit instance found of a summary field with this name, or null if not present (implicitly * or explicitly) in any summary class. */ public SummaryField getSummaryField(String name) { for (DocumentSummary summary : summaries.values()) { SummaryField summaryField = summary.getSummaryField(name); if (summaryField != null) { return summaryField; } } if (inherited.isEmpty()) return null; return requireInherited().getSummaryField(name); } /** * Returns the first explicit instance found of a summary field with this name, or null if not present explicitly in * any summary class * * @param name the name of the explicit summary field to get. * @return the SummaryField found. */ public SummaryField getExplicitSummaryField(String name) { for (DocumentSummary summary : summaries.values()) { SummaryField summaryField = summary.getSummaryField(name); if (summaryField != null && !summaryField.isImplicit()) return summaryField; } if (inherited.isEmpty()) return null; return requireInherited().getExplicitSummaryField(name); } /** * Summaries defined by fields of this search definition. The default summary, named "default", is always the first * one in the returned iterator. */ public Map getSummaries() { // Shortcuts if (inherited.isEmpty()) return summaries; if (summaries.isEmpty()) return requireInherited().getSummaries(); var allSummaries = new LinkedHashMap<>(requireInherited().getSummaries()); allSummaries.putAll(summaries); return allSummaries; } /** Returns the summaries defines in this only, not any that are inherited. */ public Map getSummariesInThis() { return Collections.unmodifiableMap(summaries); } /** * Returns all summary fields, of all document summaries, which has the given field as source. * The list becomes owned by the receiver. * * @param field the source field * @return the list of summary fields found */ @Override public List getSummaryFields(ImmutableSDField field) { List summaryFields = inherited.isPresent() ? requireInherited().getSummaryFields(field) : new java.util.ArrayList<>(); for (DocumentSummary documentSummary : summaries.values()) { for (SummaryField summaryField : documentSummary.getSummaryFields().values()) { if (summaryField.hasSource(field.getName())) { boolean wanted = true; for (var already : summaryFields) { if (summaryField == already) wanted = false; } if (wanted) { summaryFields.add(summaryField); } } } } return summaryFields; } /** * Returns one summary field for each summary field name. If there are multiple summary fields with the same * name, the last one will be used. Multiple fields of the same name should all have the same content in a valid * search definition, except from the destination set. So this method can be used for all summary handling except * processing the destination set. The map becomes owned by the receiver. */ public Map getUniqueNamedSummaryFields() { Map summaryFields = inherited.isPresent() ? requireInherited().getUniqueNamedSummaryFields() : new java.util.LinkedHashMap<>(); for (DocumentSummary documentSummary : summaries.values()) { for (SummaryField summaryField : documentSummary.getSummaryFields().values()) { summaryFields.put(summaryField.getName(), summaryField); } } return summaryFields; } /** Returns the first occurrence of an attribute having this name, or null if none */ public Attribute getAttribute(String name) { for (ImmutableSDField field : allConcreteFields()) { Attribute attribute = field.getAttributes().get(name); if (attribute != null) { return attribute; } } return null; } @Override public boolean equals(Object o) { if (!(o instanceof Schema)) { return false; } Schema other = (Schema)o; return getName().equals(other.getName()); } @Override public int hashCode() { return name.hashCode(); } @Override public String toString() { return "schema '" + getName() + "'"; } public boolean isAccessingDiskSummary(SummaryField field) { if (!field.getTransform().isInMemory()) return true; if (field.getSources().size() == 0) return isAccessingDiskSummary(getName()); for (SummaryField.Source source : field.getSources()) { if (isAccessingDiskSummary(source.getName())) return true; } return false; } private boolean isAccessingDiskSummary(String source) { SDField field = getConcreteField(source); if (field == null) return false; if (field.doesSummarying() && !field.doesAttributing()) return true; return false; } public FieldSets fieldSets() { return fieldSets; } private Schema inheritedSchema = null; public void setInheritedSchema(Schema value) { inheritedSchema = value; } /** Returns the schema inherited by this, or throws if none */ private Schema requireInherited() { if (inheritedSchema != null) return inheritedSchema; return owner.schemas().get(inherited.get()); } /** * For adding structs defined in document scope * * @param dt the struct to add * @return self, for chaining */ public Schema addType(SDDocumentType dt) { documentType.addType(dt); // TODO This is a very very dirty thing. It must go return this; } public Schema addAnnotation(SDAnnotationType dt) { documentType.addAnnotation(dt); return this; } public void validate(DeployLogger logger) { if (inherited.isPresent()) { if (! owner.schemas().containsKey(inherited.get())) throw new IllegalArgumentException(this + " inherits '" + inherited.get() + "', but this schema does not exist"); // Require schema and document type inheritance to be consistent to keep things simple // And require it to be explicit so we have the option to support other possibilities later var parentDocument = owner.schemas().get(inherited.get()).getDocument(); if ( ! getDocument().inheritedTypes().containsKey(new DataTypeName(parentDocument.getName()))) throw new IllegalArgumentException(this + " inherits '" + inherited.get() + "', but its document type does not inherit the parent's document type"); } for (var summary : summaries.values()) summary.validate(logger); } /** Returns true if the given field name is a reserved name */ public static boolean isReservedName(String name) { return RESERVED_NAMES.contains(name); } }