diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-05-19 12:03:06 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2022-05-19 12:03:06 +0200 |
commit | 5c24dc5c9642a8d9ed70aee4c950fd0678a1ebec (patch) | |
tree | bd9b74bf00c832456f0b83c1b2cd7010be387d68 /config-model/src/main/java/com/yahoo/schema/derived | |
parent | f17c4fe7de4c55f5c4ee61897eab8c2f588d8405 (diff) |
Rename the 'searchdefinition' package to 'schema'
Diffstat (limited to 'config-model/src/main/java/com/yahoo/schema/derived')
32 files changed, 4440 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java b/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java new file mode 100644 index 00000000000..99f73a75669 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java @@ -0,0 +1,324 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.subscription.ConfigInstanceUtil; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.Dictionary; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.Ranking; +import com.yahoo.schema.document.Sorting; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.vespa.indexinglanguage.expressions.ToPositionExpression; + +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isArrayOfSimpleStruct; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfPrimitiveType; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfSimpleStruct; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isSupportedComplexField; + +/** + * The set of all attribute fields defined by a search definition + * + * @author bratseth + */ +public class AttributeFields extends Derived implements AttributesConfig.Producer { + + public enum FieldSet {ALL, FAST_ACCESS} + + private Map<String, Attribute> attributes = new java.util.LinkedHashMap<>(); + private Map<String, Attribute> importedAttributes = new java.util.LinkedHashMap<>(); + + /** Whether this has any position attribute */ + private boolean hasPosition = false; + + public static final AttributeFields empty = new AttributeFields(null); + + public AttributeFields(Schema schema) { + if (schema != null) + derive(schema); + } + + /** Derives everything from a field */ + @Override + protected void derive(ImmutableSDField field, Schema schema) { + if (unsupportedFieldType(field)) { + return; // Ignore complex struct and map fields for indexed search (only supported for streaming search) + } + if (isArrayOfSimpleStruct(field)) { + deriveArrayOfSimpleStruct(field); + } else if (isMapOfSimpleStruct(field)) { + deriveMapOfSimpleStruct(field); + } else if (isMapOfPrimitiveType(field)) { + deriveMapOfPrimitiveType(field); + } else { + deriveAttributes(field); + } + } + + private static boolean unsupportedFieldType(ImmutableSDField field) { + return (field.usesStructOrMap() && + !isSupportedComplexField(field) && + !GeoPos.isAnyPos(field)); + } + + /** Returns an attribute by name, or null if it doesn't exist */ + public Attribute getAttribute(String attributeName) { + return attributes.get(attributeName); + } + + public boolean containsAttribute(String attributeName) { + return getAttribute(attributeName) != null; + } + + /** Derives one attribute. TODO: Support non-default named attributes */ + private void deriveAttributes(ImmutableSDField field) { + if (field.isImportedField()) { + deriveImportedAttributes(field); + return; + } + for (Attribute fieldAttribute : field.getAttributes().values()) { + deriveAttribute(field, fieldAttribute); + } + + if (field.containsExpression(ToPositionExpression.class)) { + // TODO: Move this check to processing and remove this + if (hasPosition) { + throw new IllegalArgumentException("Can not specify more than one set of position attributes per field: " + field.getName()); + } + hasPosition = true; + } + } + + private void applyRanking(ImmutableSDField field, Attribute attribute) { + Ranking ranking = field.getRanking(); + if (ranking != null && ranking.isFilter()) { + attribute.setEnableBitVectors(true); + attribute.setEnableOnlyBitVector(true); + } + } + + private void deriveAttribute(ImmutableSDField field, Attribute fieldAttribute) { + Attribute attribute = getAttribute(fieldAttribute.getName()); + if (attribute == null) { + attributes.put(fieldAttribute.getName(), fieldAttribute); + attribute = getAttribute(fieldAttribute.getName()); + } + applyRanking(field, attribute); + } + + private void deriveImportedAttributes(ImmutableSDField field) { + for (Attribute attribute : field.getAttributes().values()) { + if (!importedAttributes.containsKey(field.getName())) { + importedAttributes.put(field.getName(), attribute); + } + } + } + + private void deriveArrayOfSimpleStruct(ImmutableSDField field) { + for (ImmutableSDField structField : field.getStructFields()) { + deriveAttributeAsArrayType(structField); + } + } + + private void deriveAttributeAsArrayType(ImmutableSDField field) { + if (field.isImportedField()) { + deriveImportedAttributes(field); + return; + } + Attribute attribute = field.getAttributes().get(field.getName()); + if (attribute != null) { + applyRanking(field, attribute); + attributes.put(attribute.getName(), attribute.convertToArray()); + } + } + + private void deriveMapOfSimpleStruct(ImmutableSDField field) { + deriveAttributeAsArrayType(field.getStructField("key")); + deriveMapValueField(field.getStructField("value")); + } + + private void deriveMapValueField(ImmutableSDField valueField) { + for (ImmutableSDField structField : valueField.getStructFields()) { + deriveAttributeAsArrayType(structField); + } + } + + private void deriveMapOfPrimitiveType(ImmutableSDField field) { + deriveAttributeAsArrayType(field.getStructField("key")); + deriveAttributeAsArrayType(field.getStructField("value")); + } + + /** Returns a read only attribute iterator */ + public Iterator attributeIterator() { + return attributes().iterator(); + } + + public Collection<Attribute> attributes() { + return Collections.unmodifiableCollection(attributes.values()); + } + + public Collection<Attribute> structFieldAttributes(String baseFieldName) { + String structPrefix = baseFieldName + "."; + return attributes().stream() + .filter(attribute -> attribute.getName().startsWith(structPrefix)) + .collect(Collectors.toList()); + } + + public String toString() { + return "attributes " + getName(); + } + + @Override + protected String getDerivedName() { + return "attributes"; + } + + @SuppressWarnings("removal") // TODO Vespa 8: remove + private Map<String, AttributesConfig.Attribute.Builder> toMap(List<AttributesConfig.Attribute.Builder> ls) { + Map<String, AttributesConfig.Attribute.Builder> ret = new LinkedHashMap<>(); + for (AttributesConfig.Attribute.Builder builder : ls) { + ret.put((String) ConfigInstanceUtil.getField(builder, "name"), builder); + } + return ret; + } + + @Override + public void getConfig(AttributesConfig.Builder builder) { + //TODO This is just to get some exporting tests to work, Should be undone and removed + getConfig(builder, FieldSet.ALL, 77777, false); + } + + private boolean isAttributeInFieldSet(Attribute attribute, FieldSet fs) { + return (fs == FieldSet.ALL) || ((fs == FieldSet.FAST_ACCESS) && attribute.isFastAccess()); + } + + private AttributesConfig.Attribute.Builder getConfig(String attrName, Attribute attribute, boolean imported) { + AttributesConfig.Attribute.Builder aaB = new AttributesConfig.Attribute.Builder() + .name(attrName) + .datatype(AttributesConfig.Attribute.Datatype.Enum.valueOf(attribute.getType().getExportAttributeTypeName())) + .collectiontype(AttributesConfig.Attribute.Collectiontype.Enum.valueOf(attribute.getCollectionType().getName())); + if (attribute.isRemoveIfZero()) { + aaB.removeifzero(true); + } + if (attribute.isCreateIfNonExistent()) { + aaB.createifnonexistent(true); + } + aaB.enablebitvectors(attribute.isEnabledBitVectors()); + aaB.enableonlybitvector(attribute.isEnabledOnlyBitVector()); + if (attribute.isFastSearch() || attribute.isFastRank()) { + // TODO make a separate fastrank flag in config instead of overloading fastsearch + aaB.fastsearch(true); + } + if (attribute.isFastAccess()) { + aaB.fastaccess(true); + } + if (attribute.isMutable()) { + aaB.ismutable(true); + } + if (attribute.isHuge()) { + aaB.huge(true); + } + if (attribute.isPaged()) { + aaB.paged(true); + } + if (attribute.getSorting().isDescending()) { + aaB.sortascending(false); + } + if (attribute.getSorting().getFunction() != Sorting.Function.UCA) { + aaB.sortfunction(AttributesConfig.Attribute.Sortfunction.Enum.valueOf(attribute.getSorting().getFunction().toString())); + } + if (attribute.getSorting().getStrength() != Sorting.Strength.PRIMARY) { + aaB.sortstrength(AttributesConfig.Attribute.Sortstrength.Enum.valueOf(attribute.getSorting().getStrength().toString())); + } + if (!attribute.getSorting().getLocale().isEmpty()) { + aaB.sortlocale(attribute.getSorting().getLocale()); + } + aaB.arity(attribute.arity()); + aaB.lowerbound(attribute.lowerBound()); + aaB.upperbound(attribute.upperBound()); + aaB.densepostinglistthreshold(attribute.densePostingListThreshold()); + if (attribute.tensorType().isPresent()) { + aaB.tensortype(attribute.tensorType().get().toString()); + } + aaB.imported(imported); + var dma = attribute.distanceMetric(); + aaB.distancemetric(AttributesConfig.Attribute.Distancemetric.Enum.valueOf(dma.toString())); + if (attribute.hnswIndexParams().isPresent()) { + var ib = new AttributesConfig.Attribute.Index.Builder(); + var params = attribute.hnswIndexParams().get(); + ib.hnsw.enabled(true); + ib.hnsw.maxlinkspernode(params.maxLinksPerNode()); + ib.hnsw.neighborstoexploreatinsert(params.neighborsToExploreAtInsert()); + ib.hnsw.multithreadedindexing(params.multiThreadedIndexing()); + aaB.index(ib); + } + Dictionary dictionary = attribute.getDictionary(); + if (dictionary != null) { + aaB.dictionary.type(convert(dictionary.getType())); + aaB.dictionary.match(convert(dictionary.getMatch())); + } + aaB.match(convertMatch(attribute.getCase())); + return aaB; + } + + private static AttributesConfig.Attribute.Dictionary.Type.Enum convert(Dictionary.Type type) { + switch (type) { + case BTREE: + return AttributesConfig.Attribute.Dictionary.Type.BTREE; + case HASH: + return AttributesConfig.Attribute.Dictionary.Type.HASH; + case BTREE_AND_HASH: + return AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH; + } + return AttributesConfig.Attribute.Dictionary.Type.BTREE; + } + private static AttributesConfig.Attribute.Dictionary.Match.Enum convert(Case type) { + switch (type) { + case CASED: + return AttributesConfig.Attribute.Dictionary.Match.CASED; + case UNCASED: + return AttributesConfig.Attribute.Dictionary.Match.UNCASED; + } + return AttributesConfig.Attribute.Dictionary.Match.UNCASED; + } + private static AttributesConfig.Attribute.Match.Enum convertMatch(Case type) { + switch (type) { + case CASED: + return AttributesConfig.Attribute.Match.CASED; + case UNCASED: + return AttributesConfig.Attribute.Match.UNCASED; + } + return AttributesConfig.Attribute.Match.UNCASED; + } + + public void getConfig(AttributesConfig.Builder builder, FieldSet fs, long maxUnCommittedMemory, boolean enableBitVectors) { + for (Attribute attribute : attributes.values()) { + if (isAttributeInFieldSet(attribute, fs)) { + AttributesConfig.Attribute.Builder attrBuilder = getConfig(attribute.getName(), attribute, false); + attrBuilder.maxuncommittedmemory(maxUnCommittedMemory); + if (enableBitVectors && attribute.isFastSearch()) { + attrBuilder.enablebitvectors(true); + } + builder.attribute(attrBuilder); + } + } + if (fs == FieldSet.ALL) { + for (Map.Entry<String, Attribute> entry : importedAttributes.entrySet()) { + AttributesConfig.Attribute.Builder attrBuilder = getConfig(entry.getKey(), entry.getValue(), true); + attrBuilder.maxuncommittedmemory(maxUnCommittedMemory); + builder.attribute(attrBuilder); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Derived.java b/config-model/src/main/java/com/yahoo/schema/derived/Derived.java new file mode 100644 index 00000000000..9943a02a2f2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Derived.java @@ -0,0 +1,141 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.ConfigInstance; +import com.yahoo.config.ConfigInstance.Builder; +import com.yahoo.document.Field; +import com.yahoo.io.IOUtils; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.text.StringUtilities; + +import java.io.IOException; +import java.io.Writer; +import java.lang.reflect.Method; +import java.util.List; + +/** + * Superclass of all derived configurations + * + * @author bratseth + */ +public abstract class Derived implements Exportable { + + private String name; + + public Derived() { + this(""); + } + + public Derived(String name) { + this.name = name; + } + + public String getName() { return name; } + + protected final void setName(String name) { this.name = name; } + + /** + * Derives the content of this configuration. This + * default calls derive(Document) for each document + * and derive(SDField) for each search definition level field + * AND sets the name of this to the name of the input search definition + */ + protected void derive(Schema schema) { + setName(schema.getName()); + derive(schema.getDocument(), schema); + for (Index index : schema.getExplicitIndices()) + derive(index, schema); + for (SDField field : schema.allExtraFields()) + derive(field, schema); + schema.allImportedFields().forEach(importedField -> derive(importedField, schema)); + } + + + /** + * Derives the content of this configuration. This + * default calls derive(SDField) for each document field + */ + protected void derive(SDDocumentType document, Schema schema) { + for (Field field : document.fieldSet()) { + SDField sdField = (SDField) field; + if ( ! sdField.isExtraField()) { + derive(sdField, schema); + } + } + } + + /** + * Derives the content of this configuration. This + * default does nothing. + */ + protected void derive(ImmutableSDField field, Schema schema) {} + + /** + * Derives the content of this configuration. This + * default does nothing. + */ + protected void derive(Index index, Schema schema) { + } + + protected abstract String getDerivedName(); + + /** Returns the value of getName if true, the given number as a string otherwise */ + protected String getIndex(int number, boolean labels) { + return labels ? getName() : String.valueOf(number); + } + + /** + * Exports this derived configuration to its .cfg file + * in toDirectory + * + * @param toDirectory the directory to export to, or null + * + */ + public final void export(String toDirectory) throws IOException { + Writer writer = null; + try { + String fileName = getDerivedName() + ".cfg"; + if (toDirectory != null) + writer = IOUtils.createWriter(toDirectory + "/" + fileName,false); + try { + exportBuilderConfig(writer); + } catch (ReflectiveOperationException | SecurityException | IllegalArgumentException e) { + throw new RuntimeException(e); + } + } + finally { + if (writer != null) IOUtils.closeWriter(writer); + } + } + + /** + * Checks what this is a producer of, instantiate that and export to writer + */ + // TODO move to ReflectionUtil, and move that to unexported pkg + private void exportBuilderConfig(Writer writer) throws ReflectiveOperationException, SecurityException, IllegalArgumentException, IOException { + for (Class<?> intf : getClass().getInterfaces()) { + if (ConfigInstance.Producer.class.isAssignableFrom(intf)) { + Class<?> configClass = intf.getEnclosingClass(); + String builderClassName = configClass.getCanonicalName()+"$Builder"; + Class<?> builderClass = Class.forName(builderClassName); + ConfigInstance.Builder builder = (Builder) builderClass.getDeclaredConstructor().newInstance(); + Method getConfig = getClass().getMethod("getConfig", builderClass); + getConfig.invoke(this, builder); + ConfigInstance inst = (ConfigInstance) configClass.getConstructor(builderClass).newInstance(builder); + List<String> payloadL = ConfigInstance.serialize(inst); + String payload = StringUtilities.implodeMultiline(payloadL); + writer.write(payload); + } + } + } + + @Override + public String getFileName() { + return getDerivedName() + ".cfg"; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java new file mode 100644 index 00000000000..0f5721bbab3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java @@ -0,0 +1,212 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.ConfigInstance; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.document.config.DocumenttypesConfig; +import com.yahoo.document.config.DocumentmanagerConfig; +import com.yahoo.io.IOUtils; +import com.yahoo.protect.Validator; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.validation.Validation; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.vespa.config.search.core.RankingConstantsConfig; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.io.IOException; +import java.io.Writer; + +/** + * A set of all derived configuration of a schema. Use this as a facade to individual configurations when + * necessary. + * + * @author bratseth + */ +public class DerivedConfiguration implements AttributesConfig.Producer { + + private final Schema schema; + private Summaries summaries; + private SummaryMap summaryMap; + private Juniperrc juniperrc; + private AttributeFields attributeFields; + private RankProfileList rankProfileList; + private IndexingScript indexingScript; + private IndexInfo indexInfo; + private SchemaInfo schemaInfo; + private VsmFields streamingFields; + private VsmSummary streamingSummary; + private IndexSchema indexSchema; + private ImportedFields importedFields; + private final QueryProfileRegistry queryProfiles; + private final long maxUncommittedMemory; + private final boolean enableBitVectors; + + /** + * Creates a complete derived configuration from a search definition. + * Only used in tests. + * + * @param schema the search to derive a configuration from. Derived objects will be snapshots, but this argument is + * live. Which means that this object will be inconsistent when the given search definition is later + * modified. + * @param rankProfileRegistry a {@link com.yahoo.schema.RankProfileRegistry} + */ + public DerivedConfiguration(Schema schema, RankProfileRegistry rankProfileRegistry) { + this(schema, rankProfileRegistry, new QueryProfileRegistry()); + } + + DerivedConfiguration(Schema schema, RankProfileRegistry rankProfileRegistry, QueryProfileRegistry queryProfiles) { + this(schema, new DeployState.Builder().rankProfileRegistry(rankProfileRegistry).queryProfiles(queryProfiles).build()); + } + + /** + * Creates a complete derived configuration snapshot from a schema. + * + * @param schema the schema to derive a configuration from. Derived objects will be snapshots, but this + * argument is live. Which means that this object will be inconsistent if the given + * schema is later modified. + */ + public DerivedConfiguration(Schema schema, DeployState deployState) { + Validator.ensureNotNull("Schema", schema); + this.schema = schema; + this.queryProfiles = deployState.getQueryProfiles().getRegistry(); + this.maxUncommittedMemory = deployState.getProperties().featureFlags().maxUnCommittedMemory(); + this.enableBitVectors = deployState.getProperties().featureFlags().enableBitVectors(); + if ( ! schema.isDocumentsOnly()) { + streamingFields = new VsmFields(schema); + streamingSummary = new VsmSummary(schema); + } + if ( ! schema.isDocumentsOnly()) { + attributeFields = new AttributeFields(schema); + summaries = new Summaries(schema, deployState.getDeployLogger(), deployState.getProperties().featureFlags()); + summaryMap = new SummaryMap(schema); + juniperrc = new Juniperrc(schema); + rankProfileList = new RankProfileList(schema, schema.rankExpressionFiles(), attributeFields, deployState); + indexingScript = new IndexingScript(schema); + indexInfo = new IndexInfo(schema); + schemaInfo = new SchemaInfo(schema, deployState.rankProfileRegistry(), summaries, summaryMap); + indexSchema = new IndexSchema(schema); + importedFields = new ImportedFields(schema); + } + Validation.validate(this, schema); + } + + /** + * Exports a complete set of configuration-server format config files. + * + * @param toDirectory the directory to export to, current dir if null + * @throws IOException if exporting fails, some files may still be created + */ + public void export(String toDirectory) throws IOException { + if (!schema.isDocumentsOnly()) { + summaries.export(toDirectory); + summaryMap.export(toDirectory); + juniperrc.export(toDirectory); + attributeFields.export(toDirectory); + streamingFields.export(toDirectory); + streamingSummary.export(toDirectory); + indexSchema.export(toDirectory); + rankProfileList.export(toDirectory); + indexingScript.export(toDirectory); + indexInfo.export(toDirectory); + importedFields.export(toDirectory); + schemaInfo.export(toDirectory); + } + } + + public static void exportDocuments(DocumentmanagerConfig.Builder documentManagerCfg, String toDirectory) throws IOException { + exportCfg(new DocumentmanagerConfig(documentManagerCfg), toDirectory + "/" + "documentmanager.cfg"); + } + + public static void exportDocuments(DocumenttypesConfig.Builder documentTypesCfg, String toDirectory) throws IOException { + exportCfg(new DocumenttypesConfig(documentTypesCfg), toDirectory + "/" + "documenttypes.cfg"); + } + + public static void exportQueryProfiles(QueryProfileRegistry queryProfileRegistry, String toDirectory) throws IOException { + exportCfg(new QueryProfiles(queryProfileRegistry, (level, message) -> {}).getConfig(), toDirectory + "/" + "query-profiles.cfg"); + } + + public void exportConstants(String toDirectory) throws IOException { + RankingConstantsConfig.Builder b = new RankingConstantsConfig.Builder(); + rankProfileList.getConfig(b); + exportCfg(b.build(), toDirectory + "/" + "ranking-constants.cfg"); + } + + private static void exportCfg(ConfigInstance instance, String fileName) throws IOException { + Writer writer = null; + try { + writer = IOUtils.createWriter(fileName, false); + writer.write(instance.toString()); + writer.write("\n"); + } finally { + if (writer != null) { + IOUtils.closeWriter(writer); + } + } + } + + public Summaries getSummaries() { + return summaries; + } + + public AttributeFields getAttributeFields() { + return attributeFields; + } + + @Override + public void getConfig(AttributesConfig.Builder builder) { + getConfig(builder, AttributeFields.FieldSet.ALL); + } + + public void getConfig(AttributesConfig.Builder builder, AttributeFields.FieldSet fs) { + attributeFields.getConfig(builder, fs, maxUncommittedMemory, enableBitVectors); + } + + public IndexingScript getIndexingScript() { + return indexingScript; + } + + public IndexInfo getIndexInfo() { + return indexInfo; + } + + public SchemaInfo getSchemaInfo() { return schemaInfo; } + + public void setIndexingScript(IndexingScript script) { + this.indexingScript = script; + } + + public Schema getSchema() { return schema; } + + public RankProfileList getRankProfileList() { + return rankProfileList; + } + + public VsmSummary getVsmSummary() { + return streamingSummary; + } + + public VsmFields getVsmFields() { + return streamingFields; + } + + public IndexSchema getIndexSchema() { + return indexSchema; + } + + public Juniperrc getJuniperrc() { + return juniperrc; + } + + public SummaryMap getSummaryMap() { + return summaryMap; + } + + public ImportedFields getImportedFields() { + return importedFields; + } + + public QueryProfileRegistry getQueryProfiles() { return queryProfiles; } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Deriver.java b/config-model/src/main/java/com/yahoo/schema/derived/Deriver.java new file mode 100644 index 00000000000..44bea43a8e3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Deriver.java @@ -0,0 +1,48 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; +import com.yahoo.document.config.DocumenttypesConfig; +import com.yahoo.document.config.DocumentmanagerConfig; +import com.yahoo.schema.ApplicationBuilder; +import com.yahoo.schema.parser.ParseException; +import com.yahoo.vespa.configmodel.producers.DocumentManager; +import com.yahoo.vespa.configmodel.producers.DocumentTypes; +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +/** + * Facade for deriving configs from schemas + * + * @author bratseth + */ +public class Deriver { + + public static ApplicationBuilder getSchemaBuilder(List<String> schemas) { + ApplicationBuilder builder = new ApplicationBuilder(); + try { + for (String schema : schemas) + builder.addSchemaFile(schema); + } catch (ParseException | IOException e) { + throw new IllegalArgumentException(e); + } + builder.build(true); + return builder; + } + + public static DocumentmanagerConfig.Builder getDocumentManagerConfig(String sd) { + return getDocumentManagerConfig(Collections.singletonList(sd)); + } + + public static DocumentmanagerConfig.Builder getDocumentManagerConfig(List<String> schemas) { + return new DocumentManager().produce(getSchemaBuilder(schemas).getModel(), new DocumentmanagerConfig.Builder()); + } + + public static DocumenttypesConfig.Builder getDocumentTypesConfig(String schema) { + return getDocumentTypesConfig(Collections.singletonList(schema)); + } + + public static DocumenttypesConfig.Builder getDocumentTypesConfig(List<String> schemas) { + return new DocumentTypes().produce(getSchemaBuilder(schemas).getModel(), new DocumenttypesConfig.Builder()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Exportable.java b/config-model/src/main/java/com/yahoo/schema/derived/Exportable.java new file mode 100644 index 00000000000..4fccfb5d9f8 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Exportable.java @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +/** + * Classes exportable to configurations + * + * @author bratseth + */ +public interface Exportable { + + /** + * Exports the configuration of this object + * + * + * @param toDirectory the directory to export to, does not write to disk if null + * @throws java.io.IOException if exporting fails, some files may still be created + */ + public void export(String toDirectory) throws java.io.IOException; + + /** + * The (short) name of the exported file + * @return a String with the (short) name of the exported file + */ + public String getFileName(); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FieldRankSettings.java b/config-model/src/main/java/com/yahoo/schema/derived/FieldRankSettings.java new file mode 100644 index 00000000000..ccb25df031c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/FieldRankSettings.java @@ -0,0 +1,75 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.collections.Pair; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * The rank settings of a field used for native rank features. + * + * @author geirst + */ +public class FieldRankSettings { + + private final String fieldName; + + private final Map<String, NativeTable> tables = new LinkedHashMap<>(); + + public FieldRankSettings(String fieldName) { + this.fieldName = fieldName; + } + + public void addTable(NativeTable table) { + NativeTable existing = tables.get(table.getType().getName()); + if (existing != null) { + // TODO: Throw? + return; + } + tables.put(table.getType().getName(), table); + } + + public static boolean isIndexFieldTable(NativeTable table) { + return isFieldMatchTable(table) || isProximityTable(table); + } + + public static boolean isAttributeFieldTable(NativeTable table) { + return isAttributeMatchTable(table); + } + + private static boolean isFieldMatchTable(NativeTable table) { + return (table.getType().equals(NativeTable.Type.FIRST_OCCURRENCE) || + table.getType().equals(NativeTable.Type.OCCURRENCE_COUNT)); + } + + private static boolean isAttributeMatchTable(NativeTable table) { + return (table.getType().equals(NativeTable.Type.WEIGHT)); + } + + private static boolean isProximityTable(NativeTable table) { + return (table.getType().equals(NativeTable.Type.PROXIMITY) || + table.getType().equals(NativeTable.Type.REVERSE_PROXIMITY)); + } + + public List<Pair<String, String>> deriveRankProperties() { + List<Pair<String, String>> properties = new ArrayList<>(); + for (NativeTable table : tables.values()) { + if (isFieldMatchTable(table)) + properties.add(new Pair<>("nativeFieldMatch." + table.getType().getName() + "." + fieldName, table.getName())); + if (isAttributeMatchTable(table)) + properties.add(new Pair<>("nativeAttributeMatch." + table.getType().getName() + "." + fieldName, table.getName())); + if (isProximityTable(table)) + properties.add(new Pair<>("nativeProximity." + table.getType().getName() + "." + fieldName, table.getName())); + } + return properties; + } + + @Override + public String toString() { + return "rank settings of field " + fieldName; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FieldResultTransform.java b/config-model/src/main/java/com/yahoo/schema/derived/FieldResultTransform.java new file mode 100644 index 00000000000..99b2925d714 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/FieldResultTransform.java @@ -0,0 +1,57 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.vespa.documentmodel.SummaryTransform; + +/** + * The result transformation of a named field + * + * @author bratseth + */ +public class FieldResultTransform { + + private final String fieldName; + + private SummaryTransform transform; + + private final String argument; + + public FieldResultTransform(String fieldName, SummaryTransform transform, String argument) { + this.fieldName = fieldName; + this.transform = transform; + this.argument = argument; + } + + public String getFieldName() { return fieldName; } + + public SummaryTransform getTransform() { return transform; } + + public void setTransform(SummaryTransform transform) { this.transform = transform; } + + /** Returns the argument of this (used as input to the backend docsum rewriter) */ + public String getArgument() { return argument; } + + public int hashCode() { + return fieldName.hashCode() + 11 * transform.hashCode() + 17 * argument.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (! (o instanceof FieldResultTransform)) return false; + FieldResultTransform other = (FieldResultTransform)o; + + return + this.fieldName.equals(other.fieldName) && + this.transform.equals(other.transform) && + this.argument.equals(other.argument); + } + + @Override + public String toString() { + String sourceString = ""; + if ( ! argument.equals(fieldName)) + sourceString = " (argument: " + argument + ")"; + return "field " + fieldName + ": " + transform + sourceString; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedConstants.java b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedConstants.java new file mode 100644 index 00000000000..05f6be2f6f1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedConstants.java @@ -0,0 +1,87 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.schema.DistributableResource; +import com.yahoo.schema.RankProfile; +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.config.search.core.RankingConstantsConfig; + +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Constant values for ranking/model execution tied to a rank profile, + * to be distributed as files. + * + * @author bratseth + */ +public class FileDistributedConstants { + + private final Map<String, DistributableConstant> constants; + + public FileDistributedConstants(FileRegistry fileRegistry, Collection<RankProfile.Constant> constants) { + Map<String, DistributableConstant> distributableConstants = new LinkedHashMap<>(); + for (var constant : constants) { + if ( ! constant.valuePath().isPresent()) continue; + + var distributableConstant = new DistributableConstant(constant.name().simpleArgument().get(), + constant.type(), + constant.valuePath().get(), + constant.pathType().get()); + distributableConstant.validate(); + distributableConstant.register(fileRegistry); + distributableConstants.put(distributableConstant.getName(), distributableConstant); + } + this.constants = Collections.unmodifiableMap(distributableConstants); + } + + /** Returns a read-only map of the constants in this indexed by name. */ + public Map<String, DistributableConstant> asMap() { return constants; } + + public void getConfig(RankingConstantsConfig.Builder builder) { + for (var constant : constants.values()) { + builder.constant(new RankingConstantsConfig.Constant.Builder() + .name(constant.getName()) + .fileref(constant.getFileReference()) + .type(constant.getType())); + } + } + + public static class DistributableConstant extends DistributableResource { + + private final TensorType tensorType; + + public DistributableConstant(String name, TensorType type, String fileName) { + this(name, type, fileName, PathType.FILE); + } + + public DistributableConstant(String name, TensorType type, String fileName, PathType pathType) { + super(name, fileName, pathType); + this.tensorType = type; + validate(); + } + + public TensorType getTensorType() { return tensorType; } + public String getType() { return tensorType.toString(); } + + public void validate() { + super.validate(); + if (tensorType == null) + throw new IllegalArgumentException("Ranking constant '" + getName() + "' must have a type."); + if (tensorType.dimensions().stream().anyMatch(d -> d.isIndexed() && d.size().isEmpty())) + throw new IllegalArgumentException("Illegal type in field " + getName() + " type " + tensorType + + ": Dense tensor dimensions must have a size"); + } + + @Override + public String toString() { + return super.toString() + "' of type '" + tensorType + "'"; + } + + } + +} + diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java new file mode 100644 index 00000000000..b5c3909c78c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java @@ -0,0 +1,60 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.schema.OnnxModel; +import com.yahoo.vespa.config.search.core.OnnxModelsConfig; + +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.logging.Logger; + +/** + * ONNX models distributed as files. + * + * @author bratseth + */ +public class FileDistributedOnnxModels { + + private static final Logger log = Logger.getLogger(FileDistributedOnnxModels.class.getName()); + + private final Map<String, OnnxModel> models; + + public FileDistributedOnnxModels(FileRegistry fileRegistry, Collection<OnnxModel> models) { + Map<String, OnnxModel> distributableModels = new LinkedHashMap<>(); + for (var model : models) { + model.validate(); + model.register(fileRegistry); + distributableModels.put(model.getName(), model); + } + this.models = Collections.unmodifiableMap(distributableModels); + } + + public Map<String, OnnxModel> asMap() { return models; } + + public void getConfig(OnnxModelsConfig.Builder builder) { + for (OnnxModel model : models.values()) { + if ("".equals(model.getFileReference())) + log.warning("Illegal file reference " + model); // Let tests pass ... we should find a better way + else { + OnnxModelsConfig.Model.Builder modelBuilder = new OnnxModelsConfig.Model.Builder(); + modelBuilder.dry_run_on_setup(true); + modelBuilder.name(model.getName()); + modelBuilder.fileref(model.getFileReference()); + model.getInputMap().forEach((name, source) -> modelBuilder.input(new OnnxModelsConfig.Model.Input.Builder().name(name).source(source))); + model.getOutputMap().forEach((name, as) -> modelBuilder.output(new OnnxModelsConfig.Model.Output.Builder().name(name).as(as))); + if (model.getStatelessExecutionMode().isPresent()) + modelBuilder.stateless_execution_mode(model.getStatelessExecutionMode().get()); + if (model.getStatelessInterOpThreads().isPresent()) + modelBuilder.stateless_interop_threads(model.getStatelessInterOpThreads().get()); + if (model.getStatelessIntraOpThreads().isPresent()) + modelBuilder.stateless_intraop_threads(model.getStatelessIntraOpThreads().get()); + + builder.model(modelBuilder); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/ImportedFields.java b/config-model/src/main/java/com/yahoo/schema/derived/ImportedFields.java new file mode 100644 index 00000000000..fa3f49f06d5 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/ImportedFields.java @@ -0,0 +1,105 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.ImportedComplexField; +import com.yahoo.schema.document.ImportedField; +import com.yahoo.vespa.config.search.ImportedFieldsConfig; + +import java.util.Optional; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isArrayOfSimpleStruct; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfPrimitiveType; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfSimpleStruct; + +/** + * This class derives imported fields from search definition and produces imported-fields.cfg as needed by the search backend. + * + * @author geirst + */ +public class ImportedFields extends Derived implements ImportedFieldsConfig.Producer { + + private Optional<com.yahoo.schema.document.ImportedFields> importedFields = Optional.empty(); + + public ImportedFields(Schema schema) { + derive(schema); + } + + @Override + protected void derive(Schema schema) { + importedFields = schema.importedFields(); + } + + @Override + protected String getDerivedName() { + return "imported-fields"; + } + + @Override + public void getConfig(ImportedFieldsConfig.Builder builder) { + if (importedFields.isPresent()) { + importedFields.get().fields().forEach( (name, field) -> considerField(builder, field)); + } + } + + private static boolean isNestedFieldName(String fieldName) { + return fieldName.indexOf('.') != -1; + } + + private static void considerField(ImportedFieldsConfig.Builder builder, ImportedField field) { + if (field instanceof ImportedComplexField) { + considerComplexField(builder, (ImportedComplexField) field); + } else { + considerSimpleField(builder, field); + } + } + + private static void considerComplexField(ImportedFieldsConfig.Builder builder, ImportedComplexField field) { + ImmutableSDField targetField = field.targetField(); + if (GeoPos.isAnyPos(targetField)) { + // no action needed + } else if (isArrayOfSimpleStruct(targetField)) { + considerNestedFields(builder, field); + } else if (isMapOfSimpleStruct(targetField)) { + considerSimpleField(builder, field.getNestedField("key")); + considerNestedFields(builder, field.getNestedField("value")); + } else if (isMapOfPrimitiveType(targetField)) { + considerSimpleField(builder, field.getNestedField("key")); + considerSimpleField(builder, field.getNestedField("value")); + } + } + + private static void considerNestedFields(ImportedFieldsConfig.Builder builder, ImportedField field) { + if (field instanceof ImportedComplexField) { + ImportedComplexField complexField = (ImportedComplexField) field; + complexField.getNestedFields().forEach(nestedField -> considerSimpleField(builder, nestedField)); + } + } + + private static void considerSimpleField(ImportedFieldsConfig.Builder builder, ImportedField field) { + ImmutableSDField targetField = field.targetField(); + String targetFieldName = targetField.getName(); + if (!isNestedFieldName(targetFieldName)) { + if (targetField.doesAttributing()) { + builder.attribute.add(createAttributeBuilder(field)); + } + } else { + Attribute attribute = targetField.getAttribute(); + if (attribute != null) { + builder.attribute.add(createAttributeBuilder(field)); + } + } + } + + private static ImportedFieldsConfig.Attribute.Builder createAttributeBuilder(ImportedField field) { + ImportedFieldsConfig.Attribute.Builder result = new ImportedFieldsConfig.Attribute.Builder(); + result.name(field.fieldName()); + result.referencefield(field.reference().referenceField().getName()); + result.targetfield(field.targetField().getName()); + return result; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Index.java b/config-model/src/main/java/com/yahoo/schema/derived/Index.java new file mode 100644 index 00000000000..3b5e617d3dc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Index.java @@ -0,0 +1,64 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.NumericDataType; +import com.yahoo.document.datatypes.*; + +/** + * A type of an index structure + * + * @author bratseth + */ +public class Index { + + /** The index type enumeration */ + public static class Type { + + public static final Type TEXT=new Type("text"); + public static final Type INT64=new Type("long"); + public static final Type BOOLEANTREE=new Type("booleantree"); + + private String name; + + private Type(String name) { + this.name=name; + } + + public int hashCode() { + return name.hashCode(); + } + + public String getName() { return name; } + + public boolean equals(Object other) { + if ( ! (other instanceof Type)) return false; + return this.name.equals(((Type)other).name); + } + + public String toString() { + return "type: " + name; + } + + } + + /** Sets the right index type from a field type */ + public static Type convertType(DataType fieldType) { + FieldValue fval = fieldType.createFieldValue(); + if (fieldType instanceof NumericDataType) { + return Type.INT64; + } else if (fval instanceof StringFieldValue) { + return Type.TEXT; + } else if (fval instanceof Raw) { + return Type.BOOLEANTREE; + } else if (fval instanceof PredicateFieldValue) { + return Type.BOOLEANTREE; + } else if (fieldType instanceof CollectionDataType) { + return convertType(((CollectionDataType) fieldType).getNestedType()); + } else { + throw new IllegalArgumentException("Don't know which index type to " + + "convert " + fieldType + " to"); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java new file mode 100644 index 00000000000..4887ad52974 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java @@ -0,0 +1,595 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.document.MapDataType; +import com.yahoo.document.NumericDataType; +import com.yahoo.document.PrimitiveDataType; +import com.yahoo.document.StructuredDataType; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.FieldSet; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.Stemming; +import com.yahoo.schema.processing.ExactMatch; +import com.yahoo.schema.processing.NGramMatch; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.search.config.IndexInfoConfig; + +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +/** + * Per-index commands which should be applied to queries prior to searching + * + * @author bratseth + */ +public class IndexInfo extends Derived implements IndexInfoConfig.Producer { + + private static final String CMD_ATTRIBUTE = "attribute"; + private static final String CMD_DEFAULT_POSITION = "default-position"; + private static final String CMD_DYNTEASER = "dynteaser"; + private static final String CMD_FULLURL = "fullurl"; + private static final String CMD_HIGHLIGHT = "highlight"; + private static final String CMD_INDEX = "index"; + private static final String CMD_LOWERCASE = "lowercase"; + private static final String CMD_NORMALIZE = "normalize"; + private static final String CMD_STEM = "stem"; + private static final String CMD_URLHOST = "urlhost"; + private static final String CMD_WORD = "word"; + private static final String CMD_PLAIN_TOKENS = "plain-tokens"; + private static final String CMD_MULTIVALUE = "multivalue"; + private static final String CMD_FAST_SEARCH = "fast-search"; + private static final String CMD_PREDICATE = "predicate"; + private static final String CMD_PREDICATE_BOUNDS = "predicate-bounds"; + private static final String CMD_NUMERICAL = "numerical"; + private static final String CMD_PHRASE_SEGMENTING = "phrase-segmenting"; + private final Set<IndexCommand> commands = new java.util.LinkedHashSet<>(); + private final Map<String, String> aliases = new java.util.LinkedHashMap<>(); + private final Map<String, FieldSet> fieldSets; + private Schema schema; + + public IndexInfo(Schema schema) { + this.fieldSets = schema.fieldSets().userFieldSets(); + addIndexCommand("sddocname", CMD_INDEX); + addIndexCommand("sddocname", CMD_WORD); + derive(schema); + } + + @Override + protected void derive(Schema schema) { + super.derive(schema); // Derive per field + this.schema = schema; + // Populate fieldsets with actual field objects, bit late to do that here but + for (FieldSet fs : fieldSets.values()) { + for (String fieldName : fs.getFieldNames()) { + fs.fields().add(schema.getField(fieldName)); + } + } + // Must follow, because index settings overrides field settings + for (Index index : schema.getExplicitIndices()) { + derive(index, schema); + } + + // Commands for summary fields + // TODO: Move to fieldinfo and implement differently. This is not right + for (SummaryField summaryField : schema.getUniqueNamedSummaryFields().values()) { + if (summaryField.getTransform().isTeaser()) { + addIndexCommand(summaryField.getName(), CMD_DYNTEASER); + } + if (summaryField.getTransform().isBolded()) { + addIndexCommand(summaryField.getName(), CMD_HIGHLIGHT); + } + } + } + + private static boolean isPositionField(ImmutableSDField field) { + return GeoPos.isAnyPos(field); + } + + @Override + protected void derive(ImmutableSDField field, Schema schema) { + derive(field, schema, false); + } + + protected void derive(ImmutableSDField field, Schema schema, boolean inPosition) { + if (field.getDataType().equals(DataType.PREDICATE)) { + addIndexCommand(field, CMD_PREDICATE); + Index index = field.getIndex(field.getName()); + if (index != null) { + BooleanIndexDefinition options = index.getBooleanIndexDefiniton(); + if (options.hasLowerBound() || options.hasUpperBound()) { + addIndexCommand(field.getName(), CMD_PREDICATE_BOUNDS + " [" + + (options.hasLowerBound() ? Long.toString(options.getLowerBound()) : "") + ".." + + (options.hasUpperBound() ? Long.toString(options.getUpperBound()) : "") + "]"); + } + } + } + + // Field level aliases + for (Map.Entry<String, String> e : field.getAliasToName().entrySet()) { + String alias = e.getKey(); + String name = e.getValue(); + addIndexAlias(alias, name); + } + boolean isPosition = isPositionField(field); + if (field.usesStructOrMap()) { + for (ImmutableSDField structField : field.getStructFields()) { + derive(structField, schema, isPosition); // Recursion + } + } + + if (isPosition) { + addIndexCommand(field.getName(), CMD_DEFAULT_POSITION); + } + + addIndexCommand(field, CMD_INDEX); // List the indices + + if (needLowerCase(field)) { + addIndexCommand(field, CMD_LOWERCASE); + } + + if (field.getDataType().isMultivalue()) { + addIndexCommand(field, CMD_MULTIVALUE); + } + + Attribute attribute = field.getAttribute(); + if ((field.doesAttributing() || (attribute != null && !inPosition)) && !field.doesIndexing()) { + addIndexCommand(field.getName(), CMD_ATTRIBUTE); + if (attribute != null && attribute.isFastSearch()) + addIndexCommand(field.getName(), CMD_FAST_SEARCH); + } else if (field.doesIndexing()) { + if (stemSomehow(field, schema)) { + addIndexCommand(field, stemCmd(field, schema), new StemmingOverrider(this, schema)); + } + if (normalizeAccents(field)) { + addIndexCommand(field, CMD_NORMALIZE); + } + if (field.getMatching() == null || field.getMatching().getType().equals(MatchType.TEXT)) { + addIndexCommand(field, CMD_PLAIN_TOKENS); + } + } + + if (isUriField(field)) { + addUriIndexCommands(field); + } + + if (field.getDataType().getPrimitiveType() instanceof NumericDataType) { + addIndexCommand(field, CMD_NUMERICAL); + } + + // Explicit commands + for (String command : field.getQueryCommands()) { + addIndexCommand(field, command); + } + + } + + private static boolean isAnyChildString(DataType dataType) { + PrimitiveDataType primitive = dataType.getPrimitiveType(); + if (primitive == PrimitiveDataType.STRING) return true; + if (primitive != null) return false; + if (dataType instanceof StructuredDataType) { + StructuredDataType structured = (StructuredDataType) dataType; + for (Field field : structured.getFields()) { + if (isAnyChildString(field.getDataType())) return true; + } + } else if (dataType instanceof MapDataType) { + MapDataType mapType = (MapDataType) dataType; + return isAnyChildString(mapType.getKeyType()) || isAnyChildString(mapType.getValueType()); + } + return false; + } + + private static boolean needLowerCase(ImmutableSDField field) { + return field.doesIndexing() + || field.doesLowerCasing() + || ((field.doesAttributing() || (field.getAttribute() != null)) + && isAnyChildString(field.getDataType()) + && field.getMatching().getCase().equals(Case.UNCASED)); + } + + static String stemCmd(ImmutableSDField field, Schema schema) { + return CMD_STEM + ":" + field.getStemming(schema).toStemMode(); + } + + private boolean stemSomehow(ImmutableSDField field, Schema schema) { + if (field.getStemming(schema).equals(Stemming.NONE)) return false; + return isTypeOrNested(field, DataType.STRING); + } + + private boolean normalizeAccents(ImmutableSDField field) { + return field.getNormalizing().doRemoveAccents() && isTypeOrNested(field, DataType.STRING); + } + + private boolean isTypeOrNested(ImmutableSDField field, DataType type) { + return field.getDataType().equals(type) || field.getDataType().equals(DataType.getArray(type)) || + field.getDataType().equals(DataType.getWeightedSet(type)); + } + + private boolean isUriField(ImmutableSDField field) { + DataType fieldType = field.getDataType(); + if (DataType.URI.equals(fieldType)) { + return true; + } + if (fieldType instanceof CollectionDataType && + DataType.URI.equals(((CollectionDataType)fieldType).getNestedType())) + { + return true; + } + return false; + } + + private void addUriIndexCommands(ImmutableSDField field) { + String fieldName = field.getName(); + addIndexCommand(fieldName, CMD_FULLURL); + addIndexCommand(fieldName, CMD_LOWERCASE); + addIndexCommand(fieldName + "." + fieldName, CMD_FULLURL); + addIndexCommand(fieldName + "." + fieldName, CMD_LOWERCASE); + addIndexCommand(fieldName + ".path", CMD_FULLURL); + addIndexCommand(fieldName + ".path", CMD_LOWERCASE); + addIndexCommand(fieldName + ".query", CMD_FULLURL); + addIndexCommand(fieldName + ".query", CMD_LOWERCASE); + addIndexCommand(fieldName + ".hostname", CMD_URLHOST); + addIndexCommand(fieldName + ".hostname", CMD_LOWERCASE); + + // XXX hack + Index index = field.getIndex("hostname"); + if (index != null) { + addIndexCommand(index, CMD_URLHOST); + } + } + + /** + * Sets a command for all indices of a field + */ + private void addIndexCommand(Index index, String command) { + addIndexCommand(index.getName(), command); + } + + /** + * Sets a command for all indices of a field + */ + private void addIndexCommand(ImmutableSDField field, String command) { + addIndexCommand(field, command, null); + } + + /** + * Sets a command for all indices of a field + */ + private void addIndexCommand(ImmutableSDField field, String command, IndexOverrider overrider) { + if (overrider == null || !overrider.override(field.getName(), command, field)) { + addIndexCommand(field.getName(), command); + } + } + + private void addIndexCommand(String indexName, String command) { + commands.add(new IndexCommand(indexName, command)); + } + + private void addIndexAlias(String alias, String indexName) { + aliases.put(alias, indexName); + } + + /** + * Returns whether a particular command is prsent in this index info + */ + public boolean hasCommand(String indexName, String command) { + return commands.contains(new IndexCommand(indexName, command)); + } + + private boolean notInCommands(String index) { + for (IndexCommand command : commands) { + if (command.getIndex().equals(index)) { + return false; + } + } + return true; + } + + @Override + public void getConfig(IndexInfoConfig.Builder builder) { + IndexInfoConfig.Indexinfo.Builder iiB = new IndexInfoConfig.Indexinfo.Builder(); + iiB.name(getName()); + for (IndexCommand command : commands) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(command.getIndex()) + .command(command.getCommand())); + } + // Make user defined field sets searchable + for (FieldSet fieldSet : fieldSets.values()) { + if (notInCommands(fieldSet.getName())) { + addFieldSetCommands(iiB, fieldSet); + } + } + + for (Map.Entry<String, String> e : aliases.entrySet()) { + iiB.alias( + new IndexInfoConfig.Indexinfo.Alias.Builder() + .alias(e.getKey()) + .indexname(e.getValue())); + } + builder.indexinfo(iiB); + } + + // TODO: Move this to the FieldSetSettings processor (and rename it) as that already has to look at this. + private void addFieldSetCommands(IndexInfoConfig.Indexinfo.Builder iiB, FieldSet fieldSet) { + for (String qc : fieldSet.queryCommands()) + iiB.command(new IndexInfoConfig.Indexinfo.Command.Builder().indexname(fieldSet.getName()).command(qc)); + boolean anyIndexing = false; + boolean anyAttributing = false; + boolean anyLowerCasing = false; + boolean anyStemming = false; + boolean anyNormalizing = false; + String phraseSegmentingCommand = null; + String stemmingCommand = null; + Matching fieldSetMatching = fieldSet.getMatching(); // null if no explicit matching + // First a pass over the fields to read some params to decide field settings implicitly: + for (ImmutableSDField field : fieldSet.fields()) { + if (field.doesIndexing()) { + anyIndexing = true; + } + if (field.doesAttributing()) { + anyAttributing = true; + } + if (needLowerCase(field)) { + anyLowerCasing = true; + } + if (stemming(field)) { + anyStemming = true; + stemmingCommand = CMD_STEM + ":" + getEffectiveStemming(field).toStemMode(); + } + if (field.getNormalizing().doRemoveAccents()) { + anyNormalizing = true; + } + if (fieldSetMatching == null && field.getMatching().getType() != Matching.defaultType) { + fieldSetMatching = field.getMatching(); + } + Optional<String> explicitPhraseSegmentingCommand = field.getQueryCommands().stream().filter(c -> c.startsWith(CMD_PHRASE_SEGMENTING)).findFirst(); + if (explicitPhraseSegmentingCommand.isPresent()) { + phraseSegmentingCommand = explicitPhraseSegmentingCommand.get(); + } + } + if (anyIndexing && anyAttributing && fieldSet.getMatching() == null) { + // We have both attributes and indexes and no explicit match setting -> + // use default matching as that at least works if the data in the attribute consists + // of single tokens only. + fieldSetMatching = new Matching(); + } + if (anyLowerCasing) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_LOWERCASE)); + } + if (hasMultiValueField(fieldSet)) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_MULTIVALUE)); + } + if (anyIndexing) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_INDEX)); + if ( ! isExactMatch(fieldSetMatching)) { + if (fieldSetMatching == null || fieldSetMatching.getType().equals(MatchType.TEXT)) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_PLAIN_TOKENS)); + } + if (anyStemming) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(stemmingCommand)); + } + if (anyNormalizing) + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_NORMALIZE)); + if (phraseSegmentingCommand != null) + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(phraseSegmentingCommand)); + } + } else { + // Assume only attribute fields + iiB + .command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_ATTRIBUTE)) + .command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_INDEX)); + } + if (fieldSetMatching != null) { + // Explicit matching set on fieldset + if (fieldSetMatching.getType().equals(MatchType.EXACT)) { + String term = fieldSetMatching.getExactMatchTerminator(); + if (term==null) term=ExactMatch.DEFAULT_EXACT_TERMINATOR; + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command("exact "+term)); + } else if (fieldSetMatching.getType().equals(MatchType.WORD)) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_WORD)); + } else if (fieldSetMatching.getType().equals(MatchType.GRAM)) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command("ngram "+(fieldSetMatching.getGramSize()>0 ? fieldSetMatching.getGramSize() : NGramMatch.DEFAULT_GRAM_SIZE))); + } else if (fieldSetMatching.getType().equals(MatchType.TEXT)) { + + } + } + } + + private boolean hasMultiValueField(FieldSet fieldSet) { + for (ImmutableSDField field : fieldSet.fields()) { + if (field.getDataType().isMultivalue()) + return true; + } + return false; + } + + private Stemming getEffectiveStemming(ImmutableSDField field) { + Stemming active = field.getStemming(schema); + if (field.getIndex(field.getName()) != null) { + if (field.getIndex(field.getName()).getStemming()!=null) { + active = field.getIndex(field.getName()).getStemming(); + } + } + if (active != null) { + return active; + } + return Stemming.BEST; // assume default + } + + private boolean stemming(ImmutableSDField field) { + if (field.getStemming() != null) { + return !field.getStemming().equals(Stemming.NONE); + } + if (schema.getStemming() == Stemming.NONE) return false; + if (field.isImportedField()) return false; + if (field.getIndex(field.getName())==null) return true; + if (field.getIndex(field.getName()).getStemming()==null) return true; + return !(field.getIndex(field.getName()).getStemming().equals(Stemming.NONE)); + } + + private boolean isExactMatch(Matching m) { + if (m == null) return false; + if (m.getType().equals(MatchType.EXACT)) return true; + if (m.getType().equals(MatchType.WORD)) return true; + return false; + } + + @Override + protected String getDerivedName() { + return "index-info"; + } + + /** + * An index command. Null commands are also represented, to detect consistency issues. This is an (immutable) value + * object. + */ + public static class IndexCommand { + + private String index; + + private String command; + + public IndexCommand(String index, String command) { + this.index = index; + this.command = command; + } + + public String getIndex() { + return index; + } + + public String getCommand() { + return command; + } + + /** + * Returns true if this is the null command (do nothing) + */ + public boolean isNull() { + return command.equals(""); + } + + public int hashCode() { + return index.hashCode() + 17 * command.hashCode(); + } + + public boolean equals(Object object) { + if (!(object instanceof IndexCommand)) { + return false; + } + + IndexCommand other = (IndexCommand)object; + return + other.index.equals(this.index) && + other.command.equals(this.command); + } + + public String toString() { + return "index command " + command + " on index " + index; + } + + } + + /** + * A command which may override the command setting of a field for a particular index + */ + private static abstract class IndexOverrider { + + protected IndexInfo owner; + + public IndexOverrider(IndexInfo owner) { + this.owner = owner; + } + + /** + * Override the setting of this index for this field, returns true if overriden, false if this index should be + * set according to the field + */ + public abstract boolean override(String indexName, String command, ImmutableSDField field); + + } + + private static class StemmingOverrider extends IndexOverrider { + + private Schema schema; + + public StemmingOverrider(IndexInfo owner, Schema schema) { + super(owner); + this.schema = schema; + } + + public boolean override(String indexName, String command, ImmutableSDField field) { + if (schema == null) { + return false; + } + + Index index = schema.getIndex(indexName); + if (index == null) { + return false; + } + + Stemming indexStemming = index.getStemming(); + if (indexStemming == null) { + return false; + } + + if (Stemming.NONE.equals(indexStemming)) { + // Add nothing + } else { + owner.addIndexCommand(indexName, CMD_STEM + ":" + indexStemming.toStemMode()); + } + return true; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexSchema.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexSchema.java new file mode 100644 index 00000000000..7f6c824b979 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexSchema.java @@ -0,0 +1,245 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.document.StructuredDataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.FieldSet; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.config.search.IndexschemaConfig; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +/** + * Deriver of indexschema config containing information of all text index fields with name and data type. + * + * @author geirst + */ +public class IndexSchema extends Derived implements IndexschemaConfig.Producer { + + private final List<IndexField> fields = new ArrayList<>(); + private final Map<String, FieldCollection> collections = new LinkedHashMap<>(); + private final Map<String, FieldSet> fieldSets = new LinkedHashMap<>(); + + public IndexSchema(Schema schema) { + fieldSets.putAll(schema.fieldSets().userFieldSets()); + derive(schema); + } + + public boolean containsField(String fieldName) { + return fields.stream().anyMatch(field -> field.getName().equals(fieldName)); + } + + @Override + protected void derive(Schema schema) { + super.derive(schema); + } + + private boolean isTensorField(ImmutableSDField field) { + return field.getDataType() instanceof TensorDataType; + } + + private void deriveIndexFields(ImmutableSDField field, Schema schema) { + // Note: Indexes for tensor fields are NOT part of the index schema for text fields. + if ((!field.doesIndexing() && !field.isIndexStructureField()) || + isTensorField(field)) + { + return; + } + List<Field> lst = flattenField(field.asField()); + if (lst.isEmpty()) { + return; + } + String fieldName = field.getName(); + for (Field flatField : lst) { + deriveIndexFields(flatField, schema); + } + if (lst.size() > 1) { + FieldSet fieldSet = new FieldSet(fieldName); + for (Field flatField : lst) { + fieldSet.addFieldName(flatField.getName()); + } + fieldSets.put(fieldName, fieldSet); + } + } + + private void deriveIndexFields(Field field, Schema schema) { + IndexField toAdd = new IndexField(field.getName(), Index.convertType(field.getDataType()), field.getDataType()); + com.yahoo.schema.Index definedIndex = schema.getIndex(field.getName()); + if (definedIndex != null) { + toAdd.setIndexSettings(definedIndex); + } + fields.add(toAdd); + addFieldToCollection(field.getName(), field.getName()); // implicit + } + + private FieldCollection getCollection(String collectionName) { + FieldCollection retval = collections.get(collectionName); + if (retval == null) { + collections.put(collectionName, new FieldCollection(collectionName)); + return collections.get(collectionName); + } + return retval; + } + + private void addFieldToCollection(String fieldName, String collectionName) { + FieldCollection collection = getCollection(collectionName); + collection.fields.add(fieldName); + } + + @Override + protected void derive(ImmutableSDField field, Schema schema) { + if (field.usesStructOrMap()) { + return; // unsupported + } + deriveIndexFields(field, schema); + } + + @Override + protected String getDerivedName() { + return "indexschema"; + } + + @Override + public void getConfig(IndexschemaConfig.Builder icB) { + for (int i = 0; i < fields.size(); ++i) { + IndexField f = fields.get(i); + IndexschemaConfig.Indexfield.Builder ifB = new IndexschemaConfig.Indexfield.Builder() + .name(f.getName()) + .datatype(IndexschemaConfig.Indexfield.Datatype.Enum.valueOf(f.getType())) + .prefix(f.hasPrefix()) + .phrases(f.hasPhrases()) + .positions(f.hasPositions()) + .interleavedfeatures(f.useInterleavedFeatures()); + if (!f.getCollectionType().equals("SINGLE")) { + ifB.collectiontype(IndexschemaConfig.Indexfield.Collectiontype.Enum.valueOf(f.getCollectionType())); + } + icB.indexfield(ifB); + } + for (FieldSet fieldSet : fieldSets.values()) { + IndexschemaConfig.Fieldset.Builder fsB = new IndexschemaConfig.Fieldset.Builder() + .name(fieldSet.getName()); + for (String f : fieldSet.getFieldNames()) { + fsB.field(new IndexschemaConfig.Fieldset.Field.Builder() + .name(f)); + } + icB.fieldset(fsB); + } + } + + @SuppressWarnings("deprecation") + static List<Field> flattenField(Field field) { + DataType fieldType = field.getDataType(); + if (fieldType.getPrimitiveType() != null){ + return Collections.singletonList(field); + } + if (fieldType instanceof ArrayDataType) { + List<Field> ret = new LinkedList<>(); + Field innerField = new Field(field.getName(), ((ArrayDataType)fieldType).getNestedType()); + for (Field flatField : flattenField(innerField)) { + ret.add(new Field(flatField.getName(), DataType.getArray(flatField.getDataType()))); + } + return ret; + } + if (fieldType instanceof StructuredDataType) { + List<Field> ret = new LinkedList<>(); + String fieldName = field.getName(); + for (Field childField : ((StructuredDataType)fieldType).getFields()) { + for (Field flatField : flattenField(childField)) { + ret.add(new Field(fieldName + "." + flatField.getName(), flatField)); + } + } + return ret; + } + throw new UnsupportedOperationException(fieldType.getName()); + } + + public List<IndexField> getFields() { + return fields; + } + + /** + * Representation of an index field with name and data type. + */ + public static class IndexField { + private String name; + private Index.Type type; + private com.yahoo.schema.Index.Type sdType; // The index type in "user intent land" + private DataType sdFieldType; + private boolean prefix = false; + private boolean phrases = false; // TODO dead, but keep a while to ensure config compatibility? + private boolean positions = true;// TODO dead, but keep a while to ensure config compatibility? + private BooleanIndexDefinition boolIndex = null; + // Whether the posting lists of this index field should have interleaved features (num occs, field length) in document id stream. + private boolean interleavedFeatures = false; + + public IndexField(String name, Index.Type type, DataType sdFieldType) { + this.name = name; + this.type = type; + this.sdFieldType = sdFieldType; + } + public void setIndexSettings(com.yahoo.schema.Index index) { + if (type.equals(Index.Type.TEXT)) { + prefix = index.isPrefix(); + interleavedFeatures = index.useInterleavedFeatures(); + } + sdType = index.getType(); + boolIndex = index.getBooleanIndexDefiniton(); + } + public String getName() { return name; } + public Index.Type getRawType() { return type; } + public String getType() { + return type.equals(Index.Type.INT64) + ? "INT64" : "STRING"; + } + public String getCollectionType() { + return (sdFieldType == null) + ? "SINGLE" + : (sdFieldType instanceof WeightedSetDataType) + ? "WEIGHTEDSET" + : (sdFieldType instanceof ArrayDataType) + ? "ARRAY" + : "SINGLE"; + } + public boolean hasPrefix() { return prefix; } + public boolean hasPhrases() { return phrases; } + public boolean hasPositions() { return positions; } + public boolean useInterleavedFeatures() { return interleavedFeatures; } + + public BooleanIndexDefinition getBooleanIndexDefinition() { + return boolIndex; + } + + /** + * The user set index type + * @return the type + */ + public com.yahoo.schema.Index.Type getSdType() { + return sdType; + } + } + + /** + * Representation of a collection of fields (aka index, physical view). + */ + @SuppressWarnings({ "UnusedDeclaration" }) + private static class FieldCollection { + + private final String name; + private final List<String> fields = new ArrayList<>(); + + FieldCollection(String name) { + this.name = name; + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java new file mode 100644 index 00000000000..6dae89bf692 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java @@ -0,0 +1,197 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.configdefinition.IlscriptsConfig; +import com.yahoo.vespa.configdefinition.IlscriptsConfig.Ilscript.Builder; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.GuardExpression; +import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.PassthroughExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SetLanguageExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * An indexing language script derived from a search definition. An indexing script contains a set of indexing + * statements, organized in a composite structure of indexing code snippets. + * + * @author bratseth + */ +public final class IndexingScript extends Derived implements IlscriptsConfig.Producer { + + private final List<String> docFields = new ArrayList<>(); + private final List<Expression> expressions = new ArrayList<>(); + private List<ImmutableSDField> fieldsSettingLanguage; + + public IndexingScript(Schema schema) { + derive(schema); + } + + @Override + protected void derive(Schema schema) { + fieldsSettingLanguage = fieldsSettingLanguage(schema); + if (fieldsSettingLanguage.size() == 1) // Assume this language should be used for all fields + addExpression(fieldsSettingLanguage.get(0).getIndexingScript()); + super.derive(schema); + } + + @Override + protected void derive(ImmutableSDField field, Schema schema) { + if (field.isImportedField()) return; + + if (field.hasFullIndexingDocprocRights()) + docFields.add(field.getName()); + + if (field.usesStructOrMap() && ! GeoPos.isAnyPos(field)) { + return; // unsupported + } + + if (fieldsSettingLanguage.size() == 1 && fieldsSettingLanguage.get(0).equals(field)) + return; // Already added + + addExpression(field.getIndexingScript()); + } + + private void addExpression(ScriptExpression expression) { + if ( expression.isEmpty()) return; + expressions.add(new StatementExpression(new ClearStateExpression(), new GuardExpression(expression))); + } + + private List<ImmutableSDField> fieldsSettingLanguage(Schema schema) { + return schema.allFieldsList().stream() + .filter(field -> ! field.isImportedField()) + .filter(field -> field.containsExpression(SetLanguageExpression.class)) + .collect(Collectors.toList()); + } + + public Iterable<Expression> expressions() { + return Collections.unmodifiableCollection(expressions); + } + + @Override + public String getDerivedName() { + return "ilscripts"; + } + + @Override + public void getConfig(IlscriptsConfig.Builder configBuilder) { + IlscriptsConfig.Ilscript.Builder ilscriptBuilder = new IlscriptsConfig.Ilscript.Builder(); + ilscriptBuilder.doctype(getName()); + ilscriptBuilder.docfield(docFields); + addContentInOrder(ilscriptBuilder); + configBuilder.ilscript(ilscriptBuilder); + } + + private void addContentInOrder(IlscriptsConfig.Ilscript.Builder ilscriptBuilder) { + ArrayList<Expression> later = new ArrayList<>(); + Set<String> touchedFields = new HashSet<>(); + for (Expression expression : expressions) { + if (modifiesSelf(expression) && ! setsLanguage(expression)) + later.add(expression); + else + ilscriptBuilder.content(expression.toString()); + + FieldScanVisitor fieldFetcher = new FieldScanVisitor(); + fieldFetcher.visit(expression); + touchedFields.addAll(fieldFetcher.touchedFields()); + } + for (Expression exp : later) + ilscriptBuilder.content(exp.toString()); + generateSyntheticStatementsForUntouchedFields(ilscriptBuilder, touchedFields); + } + + private void generateSyntheticStatementsForUntouchedFields(Builder ilscriptBuilder, Set<String> touchedFields) { + Set<String> fieldsWithSyntheticStatements = new HashSet<>(docFields); + fieldsWithSyntheticStatements.removeAll(touchedFields); + List<String> orderedFields = new ArrayList<>(fieldsWithSyntheticStatements); + Collections.sort(orderedFields); + for (String fieldName : orderedFields) { + StatementExpression copyField = new StatementExpression(new InputExpression(fieldName), + new PassthroughExpression(fieldName)); + ilscriptBuilder.content(copyField.toString()); + } + } + + private boolean setsLanguage(Expression expression) { + SetsLanguageVisitor visitor = new SetsLanguageVisitor(); + visitor.visit(expression); + return visitor.setsLanguage; + } + + private boolean modifiesSelf(Expression expression) { + ModifiesSelfVisitor visitor = new ModifiesSelfVisitor(); + visitor.visit(expression); + return visitor.modifiesSelf(); + } + + private static class ModifiesSelfVisitor extends ExpressionVisitor { + + private String inputField = null; + private String outputField = null; + + public boolean modifiesSelf() { return outputField != null && outputField.equals(inputField); } + + @Override + protected void doVisit(Expression expression) { + if (modifiesSelf()) return; + + if (expression instanceof InputExpression) { + inputField = ((InputExpression) expression).getFieldName(); + } + if (expression instanceof OutputExpression) { + outputField = ((OutputExpression) expression).getFieldName(); + } + } + } + + private static class SetsLanguageVisitor extends ExpressionVisitor { + + boolean setsLanguage = false; + + @Override + protected void doVisit(Expression expression) { + if (expression instanceof SetLanguageExpression) + setsLanguage = true; + } + + } + + private static class FieldScanVisitor extends ExpressionVisitor { + List<String> touchedFields = new ArrayList<String>(); + List<String> candidates = new ArrayList<String>(); + + @Override + protected void doVisit(Expression exp) { + if (exp instanceof OutputExpression) { + touchedFields.add(((OutputExpression) exp).getFieldName()); + } + if (exp instanceof InputExpression) { + candidates.add(((InputExpression) exp).getFieldName()); + } + if (exp instanceof ZCurveExpression) { + touchedFields.addAll(candidates); + } + } + + Collection<String> touchedFields() { + Collection<String> output = touchedFields; + touchedFields = null; // deny re-use to try and avoid obvious bugs + return output; + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Juniperrc.java b/config-model/src/main/java/com/yahoo/schema/derived/Juniperrc.java new file mode 100644 index 00000000000..162efbb25b4 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Juniperrc.java @@ -0,0 +1,62 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.config.search.summary.JuniperrcConfig; + +import java.util.Set; + +/** + * Generated juniperrc-config for controlling juniper. + * + * @author Simon Thoresen Hult + */ +public class Juniperrc extends Derived implements JuniperrcConfig.Producer { + + // List of all fields that should be bolded. + private Set<String> boldingFields = new java.util.LinkedHashSet<>(); + + /** + * Constructs a new juniper rc instance for a given search object. This will derive the configuration automatically, + * so there is no need to call {@link #derive(Schema)}. + * + * @param schema The search model to use for deriving. + */ + public Juniperrc(Schema schema) { + derive(schema); + } + + // Inherit doc from Derived. + @Override + protected void derive(Schema schema) { + super.derive(schema); + for (SummaryField summaryField : schema.getUniqueNamedSummaryFields().values()) { + if (summaryField.getTransform() == SummaryTransform.BOLDED) { + boldingFields.add(summaryField.getName()); + } + } + } + + // Inherit doc from Derived. + @Override + protected String getDerivedName() { + return "juniperrc"; + } + + @Override + public void getConfig(JuniperrcConfig.Builder builder) { + if (boldingFields.size() != 0) { + builder.prefix(true); + for (String name : boldingFields) { + builder.override(new JuniperrcConfig.Override.Builder() + .fieldname(name) + .length(65536) + .max_matches(1) + .min_length(8192) + .surround_max(65536)); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinition.java b/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinition.java new file mode 100644 index 00000000000..7d558ea51cc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinition.java @@ -0,0 +1,44 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.document.RankType; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * The definition of a rank type used for native rank features. + * + * @author geirst + */ +public class NativeRankTypeDefinition { + + /** The type this defines */ + private RankType type; + + /** The rank tables of this rank type */ + private List<NativeTable> rankTables = new java.util.ArrayList<>(); + + public NativeRankTypeDefinition(RankType type) { + this.type = type; + } + + public RankType getType() { + return type; + } + + public void addTable(NativeTable table) { + rankTables.add(table); + } + + /** Returns an unmodifiable list of the tables in this type definition */ + public Iterator<NativeTable> rankSettingIterator() { + return Collections.unmodifiableList(rankTables).iterator(); + } + + public String toString() { + return "native definition of rank type '" + type + "'"; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinitionSet.java b/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinitionSet.java new file mode 100644 index 00000000000..65e68181b5b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinitionSet.java @@ -0,0 +1,93 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.document.RankType; + +import java.util.Collections; +import java.util.Map; + +/** + * A set of rank type definitions used for native rank features. + * + * @author geirst + */ +public class NativeRankTypeDefinitionSet { + + /** The name of this rank definition set */ + private String name; + + /** The unmodifiable rank type implementations in this set */ + private final Map<RankType, NativeRankTypeDefinition> typeDefinitions; + + /** Returns the default rank type (about) */ + public static RankType getDefaultRankType() { return RankType.ABOUT; } + + public NativeRankTypeDefinitionSet(String name) { + this.name = name; + + Map<RankType, NativeRankTypeDefinition> typeDefinitions = new java.util.LinkedHashMap<>(); + typeDefinitions.put(RankType.IDENTITY, createIdentityRankType(RankType.IDENTITY)); + typeDefinitions.put(RankType.ABOUT, createAboutRankType(RankType.ABOUT)); + typeDefinitions.put(RankType.TAGS, createTagsRankType(RankType.TAGS)); + typeDefinitions.put(RankType.EMPTY, createEmptyRankType(RankType.EMPTY)); + this.typeDefinitions = Collections.unmodifiableMap(typeDefinitions); + } + + private NativeRankTypeDefinition createEmptyRankType(RankType type) { + NativeRankTypeDefinition rank = new NativeRankTypeDefinition(type); + rank.addTable(new NativeTable(NativeTable.Type.FIRST_OCCURRENCE, "linear(0,0)")); + rank.addTable(new NativeTable(NativeTable.Type.OCCURRENCE_COUNT, "linear(0,0)")); + rank.addTable(new NativeTable(NativeTable.Type.PROXIMITY, "linear(0,0)")); + rank.addTable(new NativeTable(NativeTable.Type.REVERSE_PROXIMITY, "linear(0,0)")); + rank.addTable(new NativeTable(NativeTable.Type.WEIGHT, "linear(0,0)")); + return rank; + } + + private NativeRankTypeDefinition createAboutRankType(RankType type) { + NativeRankTypeDefinition rank = new NativeRankTypeDefinition(type); + rank.addTable(new NativeTable(NativeTable.Type.FIRST_OCCURRENCE, "expdecay(8000,12.50)")); + rank.addTable(new NativeTable(NativeTable.Type.OCCURRENCE_COUNT, "loggrowth(1500,4000,19)")); + rank.addTable(new NativeTable(NativeTable.Type.PROXIMITY, "expdecay(500,3)")); + rank.addTable(new NativeTable(NativeTable.Type.REVERSE_PROXIMITY, "expdecay(400,3)")); + rank.addTable(new NativeTable(NativeTable.Type.WEIGHT, "linear(1,0)")); + return rank; + } + + private NativeRankTypeDefinition createIdentityRankType(RankType type) { + NativeRankTypeDefinition rank = new NativeRankTypeDefinition(type); + rank.addTable(new NativeTable(NativeTable.Type.FIRST_OCCURRENCE, "expdecay(100,12.50)")); + rank.addTable(new NativeTable(NativeTable.Type.OCCURRENCE_COUNT, "loggrowth(1500,4000,19)")); + rank.addTable(new NativeTable(NativeTable.Type.PROXIMITY, "expdecay(5000,3)")); + rank.addTable(new NativeTable(NativeTable.Type.REVERSE_PROXIMITY, "expdecay(3000,3)")); + rank.addTable(new NativeTable(NativeTable.Type.WEIGHT, "linear(1,0)")); + return rank; + } + + private NativeRankTypeDefinition createTagsRankType(RankType type) { + NativeRankTypeDefinition rank = new NativeRankTypeDefinition(type); + rank.addTable(new NativeTable(NativeTable.Type.FIRST_OCCURRENCE, "expdecay(8000,12.50)")); + rank.addTable(new NativeTable(NativeTable.Type.OCCURRENCE_COUNT, "loggrowth(1500,4000,19)")); + rank.addTable(new NativeTable(NativeTable.Type.PROXIMITY, "expdecay(500,3)")); + rank.addTable(new NativeTable(NativeTable.Type.REVERSE_PROXIMITY, "expdecay(400,3)")); + rank.addTable(new NativeTable(NativeTable.Type.WEIGHT, "loggrowth(38,50,1)")); + return rank; + } + + /** + * Returns a rank type definition if given an existing rank type name, + * or null if given a rank type which has no native implementation (meaning somebody forgot to add it), + */ + public NativeRankTypeDefinition getRankTypeDefinition(RankType type) { + if (type == RankType.DEFAULT) + type = getDefaultRankType(); + return typeDefinitions.get(type); + } + + /** Returns an unmodifiable map of the type definitions in this */ + public Map<RankType, NativeRankTypeDefinition> types() { return typeDefinitions; } + + public String toString() { + return "native rank type definitions " + name; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/NativeTable.java b/config-model/src/main/java/com/yahoo/schema/derived/NativeTable.java new file mode 100644 index 00000000000..6eff2487bca --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/NativeTable.java @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +/** + * A named rank table of a certain type. + * + * @author geirst + */ +public class NativeTable { + + private String name; + + private Type type; + + /** A table type enumeration */ + public static class Type { + + public static Type FIRST_OCCURRENCE = new Type("firstOccurrenceTable"); + public static Type OCCURRENCE_COUNT = new Type("occurrenceCountTable"); + public static Type WEIGHT = new Type("weightTable"); + public static Type PROXIMITY = new Type("proximityTable"); + public static Type REVERSE_PROXIMITY = new Type("reverseProximityTable"); + + private String name; + + private Type(String name) { + this.name = name; + } + + public String getName() { return name; } + + public boolean equals(Object object) { + if (!(object instanceof Type)) { + return false; + } + Type other = (Type)object; + return this.name.equals(other.name); + } + + public int hashCode() { + return name.hashCode(); + } + + public String toString() { + return getName(); + } + } + + public NativeTable(Type type, String name) { + this.type = type; + this.name = name; + } + + public Type getType() { return type; } + + public String getName() { return name; } + + public int hashCode() { + return type.hashCode() + 17*name.hashCode(); + } + + public boolean equals(Object object) { + if (! (object instanceof NativeTable)) return false; + NativeTable other = (NativeTable)object; + return other.getName().equals(this.getName()) && other.getType().equals(this.getType()); + } + + public String toString() { + return getType() + ": " + getName(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RankProfileList.java b/config-model/src/main/java/com/yahoo/schema/derived/RankProfileList.java new file mode 100644 index 00000000000..98815410876 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/RankProfileList.java @@ -0,0 +1,210 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import ai.vespa.rankingexpression.importer.configmodelview.ImportedMlModels; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.schema.LargeRankExpressions; +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.vespa.config.search.RankProfilesConfig; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.config.search.core.OnnxModelsConfig; +import com.yahoo.vespa.config.search.core.RankingConstantsConfig; +import com.yahoo.vespa.config.search.core.RankingExpressionsConfig; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; + +/** + * The derived rank profiles of a schema + * + * @author bratseth + */ +public class RankProfileList extends Derived implements RankProfilesConfig.Producer { + + private final Map<String, RawRankProfile> rankProfiles; + private final FileDistributedConstants constants; + private final LargeRankExpressions largeRankExpressions; + private final FileDistributedOnnxModels onnxModels; + + public static final RankProfileList empty = new RankProfileList(); + + private RankProfileList() { + constants = new FileDistributedConstants(null, List.of()); + largeRankExpressions = new LargeRankExpressions(null); + onnxModels = new FileDistributedOnnxModels(null, List.of()); + rankProfiles = Map.of(); + } + + /** + * Creates a rank profile list + * + * @param schema the schema this is a rank profile from + * @param attributeFields the attribute fields to create a ranking for + */ + public RankProfileList(Schema schema, + LargeRankExpressions largeRankExpressions, + AttributeFields attributeFields, + DeployState deployState) { + setName(schema == null ? "default" : schema.getName()); + this.largeRankExpressions = largeRankExpressions; + this.rankProfiles = deriveRankProfiles(schema, attributeFields, deployState); + this.constants = deriveFileDistributedConstants(schema, rankProfiles.values(), deployState); + this.onnxModels = deriveFileDistributedOnnxModels(schema, rankProfiles.values(), deployState); + } + + private boolean areDependenciesReady(RankProfile rank, RankProfileRegistry registry, Set<String> processedProfiles) { + return rank.inheritedNames().isEmpty() || + processedProfiles.containsAll(rank.inheritedNames()) || + (rank.schema() != null && rank.inheritedNames().stream().allMatch(name -> registry.resolve(rank.schema().getDocument(), name) != null)); + } + + private Map<String, RawRankProfile> deriveRankProfiles(Schema schema, + AttributeFields attributeFields, + DeployState deployState) { + Map<String, RawRankProfile> rawRankProfiles = new LinkedHashMap<>(); + if (schema != null) { // profiles belonging to a schema have a default profile + RawRankProfile rawRank = new RawRankProfile(deployState.rankProfileRegistry().get(schema, "default"), + largeRankExpressions, + deployState.getQueryProfiles().getRegistry(), + deployState.getImportedModels(), + attributeFields, + deployState.getProperties()); + rawRankProfiles.put(rawRank.getName(), rawRank); + } + + Map<String, RankProfile> remaining = new LinkedHashMap<>(); + deployState.rankProfileRegistry().rankProfilesOf(schema).forEach(rank -> remaining.put(rank.name(), rank)); + remaining.remove("default"); + while (!remaining.isEmpty()) { + List<RankProfile> ready = new ArrayList<>(); + remaining.forEach((name, profile) -> { + if (areDependenciesReady(profile, deployState.rankProfileRegistry(), rawRankProfiles.keySet())) + ready.add(profile); + }); + rawRankProfiles.putAll(processRankProfiles(ready, + deployState.getQueryProfiles().getRegistry(), + deployState.getImportedModels(), + attributeFields, + deployState.getProperties(), + deployState.getExecutor())); + ready.forEach(rank -> remaining.remove(rank.name())); + } + return rawRankProfiles; + } + + private Map<String, RawRankProfile> processRankProfiles(List<RankProfile> profiles, + QueryProfileRegistry queryProfiles, + ImportedMlModels importedModels, + AttributeFields attributeFields, + ModelContext.Properties deployProperties, + ExecutorService executor) { + Map<String, Future<RawRankProfile>> futureRawRankProfiles = new LinkedHashMap<>(); + for (RankProfile profile : profiles) { + futureRawRankProfiles.put(profile.name(), executor.submit(() -> new RawRankProfile(profile, largeRankExpressions, queryProfiles, importedModels, + attributeFields, deployProperties))); + } + try { + Map<String, RawRankProfile> rawRankProfiles = new LinkedHashMap<>(); + for (Future<RawRankProfile> rawFuture : futureRawRankProfiles.values()) { + RawRankProfile rawRank = rawFuture.get(); + rawRankProfiles.put(rawRank.getName(), rawRank); + } + return rawRankProfiles; + } catch (InterruptedException | ExecutionException e) { + throw new IllegalStateException(e); + } + } + + private static FileDistributedConstants deriveFileDistributedConstants(Schema schema, + Collection<RawRankProfile> rankProfiles, + DeployState deployState) { + Map<Reference, RankProfile.Constant> allFileConstants = new HashMap<>(); + addFileConstants(schema != null ? schema.constants().values() : List.of(), + allFileConstants, + schema != null ? schema.toString() : "[global]"); + for (var profile : rankProfiles) + addFileConstants(profile.compiled().constants().values(), allFileConstants, profile.toString()); + return new FileDistributedConstants(deployState.getFileRegistry(), allFileConstants.values()); + } + + private static void addFileConstants(Collection<RankProfile.Constant> source, + Map<Reference, RankProfile.Constant> destination, + String sourceName) { + for (var constant : source) { + if (constant.valuePath().isEmpty()) continue; + var existing = destination.get(constant.name()); + if ( existing != null && ! constant.equals(existing)) { + throw new IllegalArgumentException("Duplicate constants: " + sourceName + " have " + constant + + ", but we already have " + existing + + ": Value reference constants must be unique across all rank profiles/models"); + } + destination.put(constant.name(), constant); + } + } + + private static FileDistributedOnnxModels deriveFileDistributedOnnxModels(Schema schema, + Collection<RawRankProfile> rankProfiles, + DeployState deployState) { + Map<String, OnnxModel> allModels = new LinkedHashMap<>(); + addOnnxModels(schema != null ? schema.onnxModels().values() : List.of(), + allModels, + schema != null ? schema.toString() : "[global]"); + for (var profile : rankProfiles) + addOnnxModels(profile.compiled().onnxModels().values(), allModels, profile.toString()); + return new FileDistributedOnnxModels(deployState.getFileRegistry(), allModels.values()); + } + + private static void addOnnxModels(Collection<OnnxModel> source, + Map<String, OnnxModel> destination, + String sourceName) { + for (var model : source) { + var existing = destination.get(model.getName()); + if ( existing != null && ! model.equals(existing)) { + throw new IllegalArgumentException("Duplicate onnx model: " + sourceName + " have " + model + + ", but we already have " + existing + + ": Onnx models must be unique across all rank profiles/models"); + } + destination.put(model.getName(), model); + } + } + + public Map<String, RawRankProfile> getRankProfiles() { return rankProfiles; } + public FileDistributedConstants constants() { return constants; } + public FileDistributedOnnxModels getOnnxModels() { return onnxModels; } + + @Override + public String getDerivedName() { return "rank-profiles"; } + + @Override + public void getConfig(RankProfilesConfig.Builder builder) { + for (RawRankProfile rank : rankProfiles.values() ) { + rank.getConfig(builder); + } + } + + public void getConfig(RankingExpressionsConfig.Builder builder) { + largeRankExpressions.expressions().forEach((expr) -> builder.expression.add(new RankingExpressionsConfig.Expression.Builder().name(expr.getName()).fileref(expr.getFileReference()))); + } + + public void getConfig(RankingConstantsConfig.Builder builder) { + constants.getConfig(builder); + } + + public void getConfig(OnnxModelsConfig.Builder builder) { + onnxModels.getConfig(builder); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java new file mode 100644 index 00000000000..a8a9b4c8755 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java @@ -0,0 +1,524 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import ai.vespa.rankingexpression.importer.configmodelview.ImportedMlModels; +import com.google.common.collect.ImmutableList; +import com.yahoo.collections.Pair; +import com.yahoo.compress.Compressor; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.schema.FeatureNames; +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.LargeRankExpressions; +import com.yahoo.schema.RankExpressionBody; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.expressiontransforms.OnnxModelTransformer; +import com.yahoo.searchlib.rankingexpression.ExpressionFunction; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.rule.SerializationContext; +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.config.search.RankProfilesConfig; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.OptionalDouble; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * A rank profile derived from a search definition, containing exactly the features available natively in the server + * + * @author bratseth + */ +public class RawRankProfile implements RankProfilesConfig.Producer { + + /** A reusable compressor with default settings */ + private static final Compressor compressor = new Compressor(); + + private static final String keyEndMarker = "\r="; + private static final String valueEndMarker = "\r\n"; + + private final String name; + private final Compressor.Compression compressedProperties; + + /** The compiled profile this is created from. */ + private final RankProfile compiled; + + /** Creates a raw rank profile from the given rank profile. */ + public RawRankProfile(RankProfile rankProfile, LargeRankExpressions largeExpressions, + QueryProfileRegistry queryProfiles, ImportedMlModels importedModels, + AttributeFields attributeFields, ModelContext.Properties deployProperties) { + this.name = rankProfile.name(); + compiled = rankProfile.compile(queryProfiles, importedModels); + compressedProperties = compress(new Deriver(compiled, attributeFields, deployProperties, queryProfiles) + .derive(largeExpressions)); + } + + public RankProfile compiled() { return compiled; } + + private Compressor.Compression compress(List<Pair<String, String>> properties) { + StringBuilder b = new StringBuilder(); + for (Pair<String, String> property : properties) + b.append(property.getFirst()).append(keyEndMarker).append(property.getSecond()).append(valueEndMarker); + return compressor.compress(b.toString().getBytes(StandardCharsets.UTF_8)); + } + + private List<Pair<String, String>> decompress(Compressor.Compression compression) { + String propertiesString = new String(compressor.decompress(compression), StandardCharsets.UTF_8); + if (propertiesString.isEmpty()) return ImmutableList.of(); + + ImmutableList.Builder<Pair<String, String>> properties = new ImmutableList.Builder<>(); + for (int pos = 0; pos < propertiesString.length();) { + int keyEndPos = propertiesString.indexOf(keyEndMarker, pos); + String key = propertiesString.substring(pos, keyEndPos); + pos = keyEndPos + keyEndMarker.length(); + int valueEndPos = propertiesString.indexOf(valueEndMarker, pos); + String value = propertiesString.substring(pos, valueEndPos); + pos = valueEndPos + valueEndMarker.length(); + properties.add(new Pair<>(key, value)); + } + return properties.build(); + } + + public String getName() { return name; } + + private void getRankProperties(RankProfilesConfig.Rankprofile.Builder b) { + RankProfilesConfig.Rankprofile.Fef.Builder fefB = new RankProfilesConfig.Rankprofile.Fef.Builder(); + for (Pair<String, String> p : decompress(compressedProperties)) + fefB.property(new RankProfilesConfig.Rankprofile.Fef.Property.Builder().name(p.getFirst()).value(p.getSecond())); + b.fef(fefB); + } + + /** + * Returns the properties of this as an unmodifiable list. + * Note: This method is expensive. + */ + public List<Pair<String, String>> configProperties() { return decompress(compressedProperties); } + + @Override + public void getConfig(RankProfilesConfig.Builder builder) { + RankProfilesConfig.Rankprofile.Builder b = new RankProfilesConfig.Rankprofile.Builder().name(getName()); + getRankProperties(b); + builder.rankprofile(b); + } + + @Override + public String toString() { + return " rank profile " + name; + } + + private static class Deriver { + + private final Map<String, FieldRankSettings> fieldRankSettings = new java.util.LinkedHashMap<>(); + private final Set<ReferenceNode> summaryFeatures; + private final Set<ReferenceNode> matchFeatures; + private final Set<ReferenceNode> rankFeatures; + private final Map<String, String> featureRenames = new java.util.LinkedHashMap<>(); + private final List<RankProfile.RankProperty> rankProperties; + + /** + * Rank properties for weight settings to make these available to feature executors + */ + private final List<RankProfile.RankProperty> boostAndWeightRankProperties = new ArrayList<>(); + + private final boolean ignoreDefaultRankFeatures; + private final RankProfile.MatchPhaseSettings matchPhaseSettings; + private final int rerankCount; + private final int keepRankCount; + private final int numThreadsPerSearch; + private final int minHitsPerThread; + private final int numSearchPartitions; + private final double termwiseLimit; + private final OptionalDouble postFilterThreshold; + private final OptionalDouble approximateThreshold; + private final double rankScoreDropLimit; + private final boolean mapBackRankingExpressionFeatures; + + /** + * The rank type definitions used to derive settings for the native rank features + */ + private final NativeRankTypeDefinitionSet nativeRankTypeDefinitions = new NativeRankTypeDefinitionSet("default"); + private final Map<String, String> attributeTypes; + private final Map<Reference, RankProfile.Input> inputs; + private final Set<String> filterFields = new java.util.LinkedHashSet<>(); + private final String rankprofileName; + + private RankingExpression firstPhaseRanking; + private RankingExpression secondPhaseRanking; + + /** + * Creates a raw rank profile from the given rank profile + */ + Deriver(RankProfile compiled, + AttributeFields attributeFields, + ModelContext.Properties deployProperties, + QueryProfileRegistry queryProfiles) { + rankprofileName = compiled.name(); + attributeTypes = compiled.getAttributeTypes(); + inputs = compiled.inputs(); + firstPhaseRanking = compiled.getFirstPhaseRanking(); + secondPhaseRanking = compiled.getSecondPhaseRanking(); + summaryFeatures = new LinkedHashSet<>(compiled.getSummaryFeatures()); + matchFeatures = new LinkedHashSet<>(compiled.getMatchFeatures()); + rankFeatures = compiled.getRankFeatures(); + rerankCount = compiled.getRerankCount(); + matchPhaseSettings = compiled.getMatchPhaseSettings(); + numThreadsPerSearch = compiled.getNumThreadsPerSearch(); + minHitsPerThread = compiled.getMinHitsPerThread(); + numSearchPartitions = compiled.getNumSearchPartitions(); + termwiseLimit = compiled.getTermwiseLimit().orElse(deployProperties.featureFlags().defaultTermwiseLimit()); + postFilterThreshold = compiled.getPostFilterThreshold(); + approximateThreshold = compiled.getApproximateThreshold(); + keepRankCount = compiled.getKeepRankCount(); + rankScoreDropLimit = compiled.getRankScoreDropLimit(); + mapBackRankingExpressionFeatures = deployProperties.featureFlags().avoidRenamingSummaryFeatures(); + ignoreDefaultRankFeatures = compiled.getIgnoreDefaultRankFeatures(); + rankProperties = new ArrayList<>(compiled.getRankProperties()); + + Map<String, RankProfile.RankingExpressionFunction> functions = compiled.getFunctions(); + List<ExpressionFunction> functionExpressions = functions.values().stream().map(f -> f.function()).collect(Collectors.toList()); + Map<String, String> functionProperties = new LinkedHashMap<>(); + SerializationContext functionSerializationContext = new SerializationContext(functionExpressions, + Map.of(), + compiled.typeContext(queryProfiles)); + + if (firstPhaseRanking != null) { + functionProperties.putAll(firstPhaseRanking.getRankProperties(functionSerializationContext)); + } + if (secondPhaseRanking != null) { + functionProperties.putAll(secondPhaseRanking.getRankProperties(functionSerializationContext)); + } + + derivePropertiesAndFeaturesFromFunctions(functions, functionProperties, functionSerializationContext); + deriveOnnxModelFunctionsAndFeatures(compiled); + + deriveRankTypeSetting(compiled, attributeFields); + deriveFilterFields(compiled); + deriveWeightProperties(compiled); + } + + private void deriveFilterFields(RankProfile rp) { + filterFields.addAll(rp.allFilterFields()); + } + + private void derivePropertiesAndFeaturesFromFunctions(Map<String, RankProfile.RankingExpressionFunction> functions, + Map<String, String> functionProperties, + SerializationContext functionContext) { + if (functions.isEmpty()) return; + + replaceFunctionFeatures(summaryFeatures, functionContext); + replaceFunctionFeatures(matchFeatures, functionContext); + + // First phase, second phase and summary features should add all required functions to the context. + // However, we need to add any functions not referenced in those anyway for model-evaluation. + deriveFunctionProperties(functions, functionProperties, functionContext); + + for (Map.Entry<String, String> e : functionProperties.entrySet()) { + rankProperties.add(new RankProfile.RankProperty(e.getKey(), e.getValue())); + } + } + + private void deriveFunctionProperties(Map<String, RankProfile.RankingExpressionFunction> functions, + Map<String, String> functionProperties, + SerializationContext context) { + for (Map.Entry<String, RankProfile.RankingExpressionFunction> e : functions.entrySet()) { + String propertyName = RankingExpression.propertyName(e.getKey()); + if (context.serializedFunctions().containsKey(propertyName)) continue; + + String expressionString = e.getValue().function().getBody().getRoot().toString(context).toString(); + + context.addFunctionSerialization(propertyName, expressionString); + for (Map.Entry<String, TensorType> argumentType : e.getValue().function().argumentTypes().entrySet()) + context.addArgumentTypeSerialization(e.getKey(), argumentType.getKey(), argumentType.getValue()); + if (e.getValue().function().returnType().isPresent()) + context.addFunctionTypeSerialization(e.getKey(), e.getValue().function().returnType().get()); + // else if (e.getValue().function().arguments().isEmpty()) TODO: Enable this check when we resolve all types + // throw new IllegalStateException("Type of function '" + e.getKey() + "' is not resolved"); + } + functionProperties.putAll(context.serializedFunctions()); + } + + private void replaceFunctionFeatures(Set<ReferenceNode> features, SerializationContext context) { + if (features == null) return; + Map<String, ReferenceNode> functionFeatures = new LinkedHashMap<>(); + for (Iterator<ReferenceNode> i = features.iterator(); i.hasNext(); ) { + ReferenceNode referenceNode = i.next(); + // Is the feature a function? + ExpressionFunction function = context.getFunction(referenceNode.getName()); + if (function != null) { + String propertyName = RankingExpression.propertyName(referenceNode.getName()); + String expressionString = function.getBody().getRoot().toString(context).toString(); + context.addFunctionSerialization(propertyName, expressionString); + ReferenceNode backendReferenceNode = new ReferenceNode("rankingExpression(" + referenceNode.getName() + ")", + referenceNode.getArguments().expressions(), + referenceNode.getOutput()); + if (mapBackRankingExpressionFeatures) { + // tell backend to map back to the name the user expects: + featureRenames.put(backendReferenceNode.toString(), referenceNode.toString()); + } + functionFeatures.put(referenceNode.getName(), backendReferenceNode); + i.remove(); // Will add the expanded one in next block + } + } + // Then, replace the features that were functions + for (Map.Entry<String, ReferenceNode> e : functionFeatures.entrySet()) { + features.add(e.getValue()); + } + } + + private void deriveWeightProperties(RankProfile rankProfile) { + + for (RankProfile.RankSetting setting : rankProfile.rankSettings()) { + if (setting.getType() != RankProfile.RankSetting.Type.WEIGHT) continue; + boostAndWeightRankProperties.add(new RankProfile.RankProperty("vespa.fieldweight." + setting.getFieldName(), + String.valueOf(setting.getIntValue()))); + } + } + + /** + * Adds the type boosts from a rank profile + */ + private void deriveRankTypeSetting(RankProfile rankProfile, AttributeFields attributeFields) { + for (Iterator<RankProfile.RankSetting> i = rankProfile.rankSettingIterator(); i.hasNext(); ) { + RankProfile.RankSetting setting = i.next(); + if (setting.getType() != RankProfile.RankSetting.Type.RANKTYPE) continue; + + deriveNativeRankTypeSetting(setting.getFieldName(), (RankType) setting.getValue(), attributeFields, + hasDefaultRankTypeSetting(rankProfile, setting.getFieldName())); + } + } + + private void deriveNativeRankTypeSetting(String fieldName, RankType rankType, AttributeFields attributeFields, + boolean isDefaultSetting) { + if (isDefaultSetting) return; + + NativeRankTypeDefinition definition = nativeRankTypeDefinitions.getRankTypeDefinition(rankType); + if (definition == null) throw new IllegalArgumentException("In field '" + fieldName + "': " + + rankType + " is known but has no implementation. " + + "Supported rank types: " + + nativeRankTypeDefinitions.types().keySet()); + + FieldRankSettings settings = deriveFieldRankSettings(fieldName); + for (Iterator<NativeTable> i = definition.rankSettingIterator(); i.hasNext(); ) { + NativeTable table = i.next(); + // only add index field tables if we are processing an index field and + // only add attribute field tables if we are processing an attribute field + if ((FieldRankSettings.isIndexFieldTable(table) && attributeFields.getAttribute(fieldName) == null) || + (FieldRankSettings.isAttributeFieldTable(table) && attributeFields.getAttribute(fieldName) != null)) { + settings.addTable(table); + } + } + } + + private boolean hasDefaultRankTypeSetting(RankProfile rankProfile, String fieldName) { + RankProfile.RankSetting setting = + rankProfile.getRankSetting(fieldName, RankProfile.RankSetting.Type.RANKTYPE); + return setting != null && setting.getValue().equals(RankType.DEFAULT); + } + + private FieldRankSettings deriveFieldRankSettings(String fieldName) { + FieldRankSettings settings = fieldRankSettings.get(fieldName); + if (settings == null) { + settings = new FieldRankSettings(fieldName); + fieldRankSettings.put(fieldName, settings); + } + return settings; + } + + /** Derives the properties this produces */ + public List<Pair<String, String>> derive(LargeRankExpressions largeRankExpressions) { + List<Pair<String, String>> properties = new ArrayList<>(); + for (RankProfile.RankProperty property : rankProperties) { + if (RankingExpression.propertyName(RankProfile.FIRST_PHASE).equals(property.getName())) { + // Could have been set by function expansion. Set expressions, then skip this property. + try { + firstPhaseRanking = new RankingExpression(property.getValue()); + } catch (ParseException e) { + throw new IllegalArgumentException("Could not parse first phase expression", e); + } + } + else if (RankingExpression.propertyName(RankProfile.SECOND_PHASE).equals(property.getName())) { + try { + secondPhaseRanking = new RankingExpression(property.getValue()); + } catch (ParseException e) { + throw new IllegalArgumentException("Could not parse second phase expression", e); + } + } + else { + properties.add(new Pair<>(property.getName(), property.getValue())); + } + } + properties.addAll(deriveRankingPhaseRankProperties(firstPhaseRanking, RankProfile.FIRST_PHASE)); + properties.addAll(deriveRankingPhaseRankProperties(secondPhaseRanking, RankProfile.SECOND_PHASE)); + for (FieldRankSettings settings : fieldRankSettings.values()) { + properties.addAll(settings.deriveRankProperties()); + } + for (RankProfile.RankProperty property : boostAndWeightRankProperties) { + properties.add(new Pair<>(property.getName(), property.getValue())); + } + for (ReferenceNode feature : summaryFeatures) { + properties.add(new Pair<>("vespa.summary.feature", feature.toString())); + } + for (ReferenceNode feature : matchFeatures) { + properties.add(new Pair<>("vespa.match.feature", feature.toString())); + } + for (ReferenceNode feature : rankFeatures) { + properties.add(new Pair<>("vespa.dump.feature", feature.toString())); + } + for (var entry : featureRenames.entrySet()) { + properties.add(new Pair<>("vespa.feature.rename", entry.getKey())); + properties.add(new Pair<>("vespa.feature.rename", entry.getValue())); + } + if (numThreadsPerSearch > 0) { + properties.add(new Pair<>("vespa.matching.numthreadspersearch", numThreadsPerSearch + "")); + } + if (minHitsPerThread > 0) { + properties.add(new Pair<>("vespa.matching.minhitsperthread", minHitsPerThread + "")); + } + if (numSearchPartitions >= 0) { + properties.add(new Pair<>("vespa.matching.numsearchpartitions", numSearchPartitions + "")); + } + if (termwiseLimit < 1.0) { + properties.add(new Pair<>("vespa.matching.termwise_limit", termwiseLimit + "")); + } + if (postFilterThreshold.isPresent()) { + properties.add(new Pair<>("vespa.matching.global_filter.upper_limit", String.valueOf(postFilterThreshold.getAsDouble()))); + } + if (approximateThreshold.isPresent()) { + properties.add(new Pair<>("vespa.matching.global_filter.lower_limit", String.valueOf(approximateThreshold.getAsDouble()))); + } + if (matchPhaseSettings != null) { + properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute())); + properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + "")); + properties.add(new Pair<>("vespa.matchphase.degradation.maxhits", matchPhaseSettings.getMaxHits() + "")); + properties.add(new Pair<>("vespa.matchphase.degradation.maxfiltercoverage", matchPhaseSettings.getMaxFilterCoverage() + "")); + properties.add(new Pair<>("vespa.matchphase.degradation.samplepercentage", matchPhaseSettings.getEvaluationPoint() + "")); + properties.add(new Pair<>("vespa.matchphase.degradation.postfiltermultiplier", matchPhaseSettings.getPrePostFilterTippingPoint() + "")); + RankProfile.DiversitySettings diversitySettings = matchPhaseSettings.getDiversity(); + if (diversitySettings != null) { + properties.add(new Pair<>("vespa.matchphase.diversity.attribute", diversitySettings.getAttribute())); + properties.add(new Pair<>("vespa.matchphase.diversity.mingroups", String.valueOf(diversitySettings.getMinGroups()))); + properties.add(new Pair<>("vespa.matchphase.diversity.cutoff.factor", String.valueOf(diversitySettings.getCutoffFactor()))); + properties.add(new Pair<>("vespa.matchphase.diversity.cutoff.strategy", String.valueOf(diversitySettings.getCutoffStrategy()))); + } + } + if (rerankCount > -1) { + properties.add(new Pair<>("vespa.hitcollector.heapsize", rerankCount + "")); + } + if (keepRankCount > -1) { + properties.add(new Pair<>("vespa.hitcollector.arraysize", keepRankCount + "")); + } + if (rankScoreDropLimit > -Double.MAX_VALUE) { + properties.add(new Pair<>("vespa.hitcollector.rankscoredroplimit", rankScoreDropLimit + "")); + } + if (ignoreDefaultRankFeatures) { + properties.add(new Pair<>("vespa.dump.ignoredefaultfeatures", String.valueOf(true))); + } + for (String fieldName : filterFields) { + properties.add(new Pair<>("vespa.isfilterfield." + fieldName, String.valueOf(true))); + } + for (Map.Entry<String, String> attributeType : attributeTypes.entrySet()) { + properties.add(new Pair<>("vespa.type.attribute." + attributeType.getKey(), attributeType.getValue())); + } + + for (var input : inputs.values()) { + if (FeatureNames.isQueryFeature(input.name())) { + if (input.type().rank() > 0) // Proton does not like representing the double type as a rank 0 tensor + properties.add(new Pair<>("vespa.type.query." + input.name().arguments().expressions().get(0), + input.type().toString())); + if (input.defaultValue().isPresent()) { + properties.add(new Pair<>(input.name().toString(), + input.type().rank() == 0 ? + String.valueOf(input.defaultValue().get().asDouble()) : + input.defaultValue().get().toString(true, false))); + } + } + } + if (properties.size() >= 1000000) throw new IllegalArgumentException("Too many rank properties"); + distributeLargeExpressionsAsFiles(properties, largeRankExpressions); + return properties; + } + + private void distributeLargeExpressionsAsFiles(List<Pair<String, String>> properties, LargeRankExpressions largeRankExpressions) { + for (ListIterator<Pair<String, String>> iter = properties.listIterator(); iter.hasNext();) { + Pair<String, String> property = iter.next(); + String expression = property.getSecond(); + if (expression.length() > largeRankExpressions.limit()) { + String propertyName = property.getFirst(); + String functionName = RankingExpression.extractScriptName(propertyName); + if (functionName != null) { + String mangledName = rankprofileName + "." + functionName; + largeRankExpressions.add(new RankExpressionBody(mangledName, ByteBuffer.wrap(expression.getBytes(StandardCharsets.UTF_8)))); + iter.set(new Pair<>(RankingExpression.propertyExpressionName(functionName), mangledName)); + } + } + } + } + + private List<Pair<String, String>> deriveRankingPhaseRankProperties(RankingExpression expression, String phase) { + List<Pair<String, String>> properties = new ArrayList<>(); + if (expression == null) return properties; + + String name = expression.getName(); + if ("".equals(name)) + name = phase; + + if (expression.getRoot() instanceof ReferenceNode) { + properties.add(new Pair<>("vespa.rank." + phase, expression.getRoot().toString())); + } else { + properties.add(new Pair<>("vespa.rank." + phase, "rankingExpression(" + name + ")")); + properties.add(new Pair<>(RankingExpression.propertyName(name), expression.getRoot().toString())); + } + return properties; + } + + private void deriveOnnxModelFunctionsAndFeatures(RankProfile rankProfile) { + if (rankProfile.schema() == null) return; + if (rankProfile.onnxModels().isEmpty()) return; + replaceOnnxFunctionInputs(rankProfile); + replaceImplicitOnnxConfigFeatures(summaryFeatures, rankProfile); + replaceImplicitOnnxConfigFeatures(matchFeatures, rankProfile); + } + + private void replaceOnnxFunctionInputs(RankProfile rankProfile) { + Set<String> functionNames = rankProfile.getFunctions().keySet(); + if (functionNames.isEmpty()) return; + for (OnnxModel onnxModel: rankProfile.onnxModels().values()) { + for (Map.Entry<String, String> mapping : onnxModel.getInputMap().entrySet()) { + String source = mapping.getValue(); + if (functionNames.contains(source)) { + onnxModel.addInputNameMapping(mapping.getKey(), "rankingExpression(" + source + ")"); + } + } + } + } + + private void replaceImplicitOnnxConfigFeatures(Set<ReferenceNode> features, RankProfile rankProfile) { + if (features == null || features.isEmpty()) return; + Set<ReferenceNode> replacedFeatures = new HashSet<>(); + for (Iterator<ReferenceNode> i = features.iterator(); i.hasNext(); ) { + ReferenceNode referenceNode = i.next(); + ReferenceNode replacedNode = (ReferenceNode) OnnxModelTransformer.transformFeature(referenceNode, rankProfile); + if (referenceNode != replacedNode) { + replacedFeatures.add(replacedNode); + i.remove(); + } + } + features.addAll(replacedFeatures); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java new file mode 100644 index 00000000000..18c6f335787 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java @@ -0,0 +1,129 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.search.config.SchemaInfoConfig; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Information about a schema. + * + * @author bratseth + */ +public final class SchemaInfo extends Derived implements SchemaInfoConfig.Producer { + + private final Schema schema; + + // Info about profiles needed in memory after build. + // The rank profile registry itself is not kept around due to its size. + private final Map<String, RankProfileInfo> rankProfiles; + + private final Summaries summaries; + private final SummaryMap summaryMap; + + public SchemaInfo(Schema schema, RankProfileRegistry rankProfileRegistry, + Summaries summaries, SummaryMap summaryMap) { + this.schema = schema; + this.rankProfiles = Collections.unmodifiableMap(toRankProfiles(rankProfileRegistry.rankProfilesOf(schema))); + this.summaries = summaries; + this.summaryMap = summaryMap; + } + + public String name() { return schema.getName(); } + + @Override + public String getDerivedName() { return "schema-info"; } + + public Schema fullSchema() { return schema; } + + public Map<String, RankProfileInfo> rankProfiles() { return rankProfiles; } + + private Map<String, RankProfileInfo> toRankProfiles(Collection<RankProfile> rankProfiles) { + Map<String, RankProfileInfo> rankProfileInfos = new LinkedHashMap<>(); + rankProfiles.forEach(profile -> rankProfileInfos.put(profile.name(), new RankProfileInfo(profile))); + return rankProfileInfos; + } + + @Override + public void getConfig(SchemaInfoConfig.Builder builder) { + var schemaBuilder = new SchemaInfoConfig.Schema.Builder(); + schemaBuilder.name(schema.getName()); + addSummaryConfig(schemaBuilder); + addRankProfilesConfig(schemaBuilder); + builder.schema(schemaBuilder); + } + + private void addSummaryConfig(SchemaInfoConfig.Schema.Builder schemaBuilder) { + for (var summary : summaries.asList()) { + var summaryBuilder = new SchemaInfoConfig.Schema.Summaryclass.Builder(); + summaryBuilder.name(summary.getName()); + for (var field : summary.fields().values()) { + var fieldsBuilder = new SchemaInfoConfig.Schema.Summaryclass.Fields.Builder(); + fieldsBuilder.name(field.getName()) + .type(field.getType().getName()) + .dynamic(isDynamic(field.getName())); + summaryBuilder.fields(fieldsBuilder); + } + schemaBuilder.summaryclass(summaryBuilder); + } + } + + /** Returns whether the given field is a dynamic summary field. */ + private boolean isDynamic(String fieldName) { + if (summaryMap == null) return false; // not know for streaming, but also not used + + var fieldTransform = summaryMap.resultTransforms().get(fieldName); + if (fieldTransform == null) return false; + // TODO: Move this into SummaryTransform and call it something else than "dynamic" + return fieldTransform.getTransform().isDynamic() || + fieldTransform.getTransform() == SummaryTransform.MATCHED_ELEMENTS_FILTER || + fieldTransform.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER; + } + + private void addRankProfilesConfig(SchemaInfoConfig.Schema.Builder schemaBuilder) { + for (RankProfileInfo rankProfile : rankProfiles().values()) { + var rankProfileConfig = new SchemaInfoConfig.Schema.Rankprofile.Builder(); + rankProfileConfig.name(rankProfile.name()); + rankProfileConfig.hasSummaryFeatures(rankProfile.hasSummaryFeatures()); + rankProfileConfig.hasRankFeatures(rankProfile.hasRankFeatures()); + for (var input : rankProfile.inputs().entrySet()) { + var inputConfig = new SchemaInfoConfig.Schema.Rankprofile.Input.Builder(); + inputConfig.name(input.getKey().toString()); + inputConfig.type(input.getValue().type().toString()); + rankProfileConfig.input(inputConfig); + } + schemaBuilder.rankprofile(rankProfileConfig); + } + } + + /** A store of a *small* (in memory) amount of rank profile info. */ + public static final class RankProfileInfo { + + private final String name; + private final boolean hasSummaryFeatures; + private final boolean hasRankFeatures; + private final Map<Reference, RankProfile.Input> inputs; + + public RankProfileInfo(RankProfile profile) { + this.name = profile.name(); + this.hasSummaryFeatures = ! profile.getSummaryFeatures().isEmpty(); + this.hasRankFeatures = ! profile.getRankFeatures().isEmpty(); + this.inputs = profile.inputs(); + } + + public String name() { return name; } + public boolean hasSummaryFeatures() { return hasSummaryFeatures; } + public boolean hasRankFeatures() { return hasRankFeatures; } + public Map<Reference, RankProfile.Input> inputs() { return inputs; } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SearchOrderer.java b/config-model/src/main/java/com/yahoo/schema/derived/SearchOrderer.java new file mode 100644 index 00000000000..3bab808beff --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SearchOrderer.java @@ -0,0 +1,123 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.DataTypeName; +import com.yahoo.schema.DocumentReference; +import com.yahoo.schema.DocumentReferences; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDDocumentType; + +import java.util.*; + +/** + * <p>A class which can reorder a list of search definitions such that any supertype + * always preceed any subtype. Subject to this condition the given order + * is preserved (the minimal reordering is done).</p> + * + * <p>This class is <b>not</b> multithread safe. Only one ordering must be done + * at the time in any instance.</p> + * + * @author bratseth + * @author bjorncs + */ +public class SearchOrderer { + + /** A map from DataTypeName to the Search defining them */ + private final Map<DataTypeName, Schema> documentNameToSearch = new HashMap<>(); + + /** + * Reorders the given list of search definitions such that any supertype + * always preceed any subtype. Subject to this condition the given order + * is preserved (the minimal reordering is done). + * + * @return a new list containing the same search instances in the right order + */ + public List<Schema> order(List<Schema> unordered) { + // Description above state that the original order should be preserved, except for the dependency constraint. + // Yet we botch that guarantee by sorting the list... + unordered.sort(Comparator.comparing(Schema::getName)); + + // No, this is not a fast algorithm... + indexOnDocumentName(unordered); + List<Schema> ordered = new ArrayList<>(unordered.size()); + List<Schema> moveOutwards = new ArrayList<>(); + for (Schema schema : unordered) { + if (allDependenciesAlreadyEmitted(ordered, schema)) { + addOrdered(ordered, schema, moveOutwards); + } + else { + moveOutwards.add(schema); + } + } + + // Any leftovers means we have search definitions with undefined inheritants. + // This is warned about elsewhere. + ordered.addAll(moveOutwards); + + documentNameToSearch.clear(); + return ordered; + } + + private void addOrdered(List<Schema> ordered, Schema schema, List<Schema> moveOutwards) { + ordered.add(schema); + Schema eligibleMove; + do { + eligibleMove = removeFirstEntryWithFullyEmittedDependencies(moveOutwards, ordered); + if (eligibleMove != null) { + ordered.add(eligibleMove); + } + } while (eligibleMove != null); + } + + /** Removes and returns the first search from the move list which can now be added, or null if none */ + private Schema removeFirstEntryWithFullyEmittedDependencies(List<Schema> moveOutwards, List<Schema> ordered) { + for (Schema move : moveOutwards) { + if (allDependenciesAlreadyEmitted(ordered, move)) { + moveOutwards.remove(move); + return move; + } + } + return null; + } + + private boolean allDependenciesAlreadyEmitted(List<Schema> alreadyOrdered, Schema schema) { + if (schema.getDocument() == null) { + return true; + } + SDDocumentType document = schema.getDocument(); + return allInheritedDependenciesEmitted(alreadyOrdered, document) && allReferenceDependenciesEmitted(alreadyOrdered, document); + } + + private boolean allInheritedDependenciesEmitted(List<Schema> alreadyOrdered, SDDocumentType document) { + for (SDDocumentType sdoc : document.getInheritedTypes() ) { + DataTypeName inheritedName = sdoc.getDocumentName(); + if ("document".equals(inheritedName.getName())) { + continue; + } + Schema inheritedSchema = documentNameToSearch.get(inheritedName); + if (!alreadyOrdered.contains(inheritedSchema)) { + return false; + } + } + return true; + } + + private static boolean allReferenceDependenciesEmitted(List<Schema> alreadyOrdered, SDDocumentType document) { + DocumentReferences documentReferences = document.getDocumentReferences() + .orElseThrow(() -> new IllegalStateException("Missing document references. Should have been processed by now.")); + return documentReferences.stream() + .map(Map.Entry::getValue) + .map(DocumentReference::targetSearch) + .allMatch(alreadyOrdered::contains); + } + + private void indexOnDocumentName(List<Schema> schemas) { + documentNameToSearch.clear(); + for (Schema schema : schemas) { + if (schema.getDocument() != null) { + documentNameToSearch.put(schema.getDocument().getDocumentName(), schema); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Summaries.java b/config-model/src/main/java/com/yahoo/schema/derived/Summaries.java new file mode 100644 index 00000000000..2b41fbb3b1a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Summaries.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.config.search.SummaryConfig; + +import java.util.ArrayList; +import java.util.List; + +/** + * A list of derived summaries + * + * @author bratseth + */ +public class Summaries extends Derived implements SummaryConfig.Producer { + + private final boolean useV8GeoPositions; + private final List<SummaryClass> summaries; + + public Summaries(Schema schema, DeployLogger deployLogger, ModelContext.FeatureFlags featureFlags) { + super(); + this.useV8GeoPositions = featureFlags.useV8GeoPositions(); + + // Make sure the default is first + List<SummaryClass> summaries = new ArrayList<>(); + summaries.add(new SummaryClass(schema, schema.getSummary("default"), deployLogger)); + for (DocumentSummary summary : schema.getSummaries().values()) { + if (!summary.getName().equals("default")) + summaries.add(new SummaryClass(schema, summary, deployLogger)); + } + this.summaries = List.copyOf(summaries); + } + + public List<SummaryClass> asList() { return summaries; } + + @Override + protected String getDerivedName() { return "summary"; } + + @Override + public void getConfig(SummaryConfig.Builder builder) { + builder.defaultsummaryid(summaries.isEmpty() ? -1 : summaries.get(0).hashCode()); + builder.usev8geopositions(useV8GeoPositions); + for (SummaryClass summaryClass : summaries) { + builder.classes(summaryClass.getSummaryClassConfig()); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java new file mode 100644 index 00000000000..193c6893203 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java @@ -0,0 +1,133 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.prelude.fastsearch.DocsumDefinitionSet; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.config.search.SummaryConfig; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +import java.util.Collections; +import java.util.Map; +import java.util.logging.Level; + +/** + * A summary derived from a search definition. + * Each summary definition have at least one summary, the default + * which has the same name as the search definition. + * + * @author bratseth + */ +public class SummaryClass extends Derived { + + public static final String DOCUMENT_ID_FIELD = "documentid"; + + private final int id; + + /** True if this summary class needs to access summary information on disk */ + private boolean accessingDiskSummary = false; + private final boolean rawAsBase64; + private final boolean omitSummaryFeatures; + + /** The summary fields of this indexed by name */ + private final Map<String, SummaryClassField> fields; + + private final DeployLogger deployLogger; + + /** + * Creates a summary class from a search definition summary + * + * @param deployLogger a {@link DeployLogger} + */ + public SummaryClass(Schema schema, DocumentSummary summary, DeployLogger deployLogger) { + super(summary.getName()); + this.deployLogger = deployLogger; + this.rawAsBase64 = schema.isRawAsBase64(); + this.omitSummaryFeatures = summary.omitSummaryFeatures(); + Map<String, SummaryClassField> fields = new java.util.LinkedHashMap<>(); + deriveFields(schema, summary, fields); + deriveImplicitFields(summary, fields); + this.fields = Collections.unmodifiableMap(fields); + this.id = deriveId(summary.getName(), fields); + } + + public int id() { return id; } + + /** MUST be called after all other fields are added */ + private void deriveImplicitFields(DocumentSummary summary, Map<String, SummaryClassField> fields) { + if (summary.getName().equals("default")) { + addField(SummaryClass.DOCUMENT_ID_FIELD, DataType.STRING, fields); + } + } + + private void deriveFields(Schema schema, DocumentSummary summary, Map<String, SummaryClassField> fields) { + for (SummaryField summaryField : summary.getSummaryFields().values()) { + if (!accessingDiskSummary && schema.isAccessingDiskSummary(summaryField)) { + accessingDiskSummary = true; + } + addField(summaryField.getName(), summaryField.getDataType(), summaryField.getTransform(), fields); + } + } + + private void addField(String name, DataType type, Map<String, SummaryClassField> fields) { + addField(name, type, null, fields); + } + + private void addField(String name, DataType type, + SummaryTransform transform, + Map<String, SummaryClassField> fields) { + if (fields.containsKey(name)) { + SummaryClassField sf = fields.get(name); + if ( SummaryClassField.convertDataType(type, transform, rawAsBase64) != sf.getType()) { + deployLogger.logApplicationPackage(Level.WARNING, "Conflicting definition of field " + name + + ". " + "Declared as type " + sf.getType() + " and " + type); + } + } else { + fields.put(name, new SummaryClassField(name, type, transform, rawAsBase64)); + } + } + + public Map<String, SummaryClassField> fields() { return fields; } + + private static int deriveId(String name, Map<String, SummaryClassField> fields) { + int hash = name.hashCode(); + int number = 1; + for (var field : fields.values()) { + hash += number++ * (field.getName().hashCode() + + 17 * field.getType().getName().hashCode()); + } + hash = Math.abs(hash); + if (hash == DocsumDefinitionSet.SLIME_MAGIC_ID) + hash++; + return hash; + } + + public SummaryConfig.Classes.Builder getSummaryClassConfig() { + SummaryConfig.Classes.Builder classBuilder = new SummaryConfig.Classes.Builder(); + classBuilder. + id(id). + name(getName()). + omitsummaryfeatures(omitSummaryFeatures); + for (SummaryClassField field : fields.values() ) { + classBuilder.fields(new SummaryConfig.Classes.Fields.Builder(). + name(field.getName()). + type(field.getType().getName())); + } + return classBuilder; + } + + @Override + public int hashCode() { return id; } + + @Override + protected String getDerivedName() { return "summary"; } + + @Override + public String toString() { + return "summary class '" + getName() + "'"; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java new file mode 100644 index 00000000000..f042054a0b5 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java @@ -0,0 +1,132 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.MapDataType; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.document.datatypes.BoolFieldValue; +import com.yahoo.document.datatypes.ByteFieldValue; +import com.yahoo.document.datatypes.DoubleFieldValue; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.Float16FieldValue; +import com.yahoo.document.datatypes.FloatFieldValue; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.document.datatypes.PredicateFieldValue; +import com.yahoo.document.datatypes.Raw; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.Struct; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +/** + * A summary field derived from a search definition + * + * @author bratseth + */ +public class SummaryClassField { + + private final String name; + + private final Type type; + + /** The summary field type enumeration */ + public enum Type { + + BOOL("bool"), + BYTE("byte"), + SHORT("short"), + INTEGER("integer"), + INT64("int64"), + FLOAT16("float16"), + FLOAT("float"), + DOUBLE("double"), + STRING("string"), + DATA("data"), + RAW("raw"), + LONGSTRING("longstring"), + LONGDATA("longdata"), + XMLSTRING("xmlstring"), + FEATUREDATA("featuredata"), + JSONSTRING("jsonstring"), + TENSOR("tensor"); + + private final String name; + + Type(String name) { + this.name = name; + } + + /** Returns the name of this type */ + public String getName() { + return name; + } + + public String toString() { + return "type: " + name; + } + } + + public SummaryClassField(String name, DataType type, SummaryTransform transform, boolean rawAsBase64) { + this.name = name; + this.type = convertDataType(type, transform, rawAsBase64); + } + + public String getName() { return name; } + + public Type getType() { return type; } + + /** Converts to the right summary field type from a field datatype and a transform*/ + public static Type convertDataType(DataType fieldType, SummaryTransform transform, boolean rawAsBase64) { + FieldValue fval = fieldType.createFieldValue(); + if (fval instanceof StringFieldValue) { + if (transform != null && transform.equals(SummaryTransform.RANKFEATURES)) { + return Type.FEATUREDATA; + } else if (transform != null && transform.equals(SummaryTransform.SUMMARYFEATURES)) { + return Type.FEATUREDATA; + } else { + return Type.LONGSTRING; + } + } else if (fval instanceof IntegerFieldValue) { + return Type.INTEGER; + } else if (fval instanceof LongFieldValue) { + return Type.INT64; + } else if (fval instanceof Float16FieldValue) { + return Type.FLOAT16; + } else if (fval instanceof FloatFieldValue) { + return Type.FLOAT; + } else if (fval instanceof DoubleFieldValue) { + return Type.DOUBLE; + } else if (fval instanceof BoolFieldValue) { + return Type.BOOL; + } else if (fval instanceof ByteFieldValue) { + return Type.BYTE; + } else if (fval instanceof Raw) { + return rawAsBase64 ? Type.RAW : Type.DATA; + } else if (fval instanceof Struct) { + return Type.JSONSTRING; + } else if (fval instanceof PredicateFieldValue) { + return Type.STRING; + } else if (fval instanceof TensorFieldValue) { + return Type.TENSOR; + } else if (fieldType instanceof CollectionDataType) { + if (transform != null && transform.equals(SummaryTransform.POSITIONS)) { + return Type.XMLSTRING; + } else { + return Type.JSONSTRING; + } + } else if (fieldType instanceof MapDataType) { + return Type.JSONSTRING; + } else if (fieldType instanceof NewDocumentReferenceDataType) { + return Type.LONGSTRING; + } else { + throw new IllegalArgumentException("Don't know which summary type to convert " + fieldType + " to"); + } + } + + public String toString() { + return "summary class field " + name; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryMap.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryMap.java new file mode 100644 index 00000000000..df9174a12ed --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryMap.java @@ -0,0 +1,120 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.config.search.SummarymapConfig; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +import java.util.Collections; +import java.util.Map; + +/** + * A summary map (describing search-time summary field transformations) + * derived from a Schema. + * + * @author bratseth + */ +public class SummaryMap extends Derived implements SummarymapConfig.Producer { + + private final Map<String, FieldResultTransform> resultTransforms = new java.util.LinkedHashMap<>(); + + /** Creates a summary map from a search definition */ + SummaryMap(Schema schema) { + derive(schema); + } + + protected void derive(Schema schema) { + for (DocumentSummary documentSummary : schema.getSummaries().values()) { + derive(documentSummary); + } + super.derive(schema); + } + + @Override + protected void derive(ImmutableSDField field, Schema schema) { + } + + private void derive(DocumentSummary documentSummary) { + for (SummaryField summaryField : documentSummary.getSummaryFields().values()) { + if (summaryField.getTransform()== SummaryTransform.NONE) continue; + + if (summaryField.getTransform()==SummaryTransform.ATTRIBUTE || + summaryField.getTransform()==SummaryTransform.DISTANCE || + summaryField.getTransform()==SummaryTransform.GEOPOS || + summaryField.getTransform()==SummaryTransform.POSITIONS || + summaryField.getTransform()==SummaryTransform.MATCHED_ELEMENTS_FILTER || + summaryField.getTransform()==SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER) + { + resultTransforms.put(summaryField.getName(), new FieldResultTransform(summaryField.getName(), + summaryField.getTransform(), + summaryField.getSingleSource())); + } else { + // Note: Currently source mapping is handled in the indexing statement, + // by creating a summary field for each of the values + // This works, but is suboptimal. We could consolidate to a minimal set and + // use the right value from the minimal set as the third parameter here, + // and add "override" commands to multiple static values + resultTransforms.put(summaryField.getName(), new FieldResultTransform(summaryField.getName(), + summaryField.getTransform(), + summaryField.getName())); + } + } + } + + /** Returns a read-only iterator of the FieldResultTransforms of this summary map */ + public Map<String, FieldResultTransform> resultTransforms() { + return Collections.unmodifiableMap(resultTransforms); + } + + protected String getDerivedName() { return "summarymap"; } + + /** Returns the command name of a transform */ + private String getCommand(SummaryTransform transform) { + if (transform == SummaryTransform.DISTANCE) + return "absdist"; + else if (transform.isDynamic()) + return "dynamicteaser"; + else + return transform.getName(); + } + + /** + * Does this summary command name stand for a dynamic transform? + * We need this because some model information is shared through configs instead of model - see usage + * A dynamic transform needs the query to perform its computations. + */ + // TODO/Note: "dynamic" here means something else than in SummaryTransform + public static boolean isDynamicCommand(String commandName) { + return (commandName.equals("dynamicteaser") || + commandName.equals(SummaryTransform.MATCHED_ELEMENTS_FILTER.getName()) || + commandName.equals(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER.getName())); + } + + @Override + public void getConfig(SummarymapConfig.Builder builder) { + builder.defaultoutputclass(-1); + for (FieldResultTransform frt : resultTransforms.values()) { + SummarymapConfig.Override.Builder oB = new SummarymapConfig.Override.Builder() + .field(frt.getFieldName()) + .command(getCommand(frt.getTransform())); + if (frt.getTransform().isDynamic() || + frt.getTransform().equals(SummaryTransform.ATTRIBUTE) || + frt.getTransform().equals(SummaryTransform.DISTANCE) || + frt.getTransform().equals(SummaryTransform.GEOPOS) || + frt.getTransform().equals(SummaryTransform.POSITIONS) || + frt.getTransform().equals(SummaryTransform.TEXTEXTRACTOR) || + frt.getTransform().equals(SummaryTransform.MATCHED_ELEMENTS_FILTER) || + frt.getTransform().equals(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER)) + { + oB.arguments(frt.getArgument()); + } else { + oB.arguments(""); + } + builder.override(oB); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java new file mode 100644 index 00000000000..c8679b6166c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java @@ -0,0 +1,313 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.NumericDataType; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.document.datatypes.BoolFieldValue; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.PredicateFieldValue; +import com.yahoo.document.datatypes.Raw; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.schema.FieldSets; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.FieldSet; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.config.search.vsm.VsmfieldsConfig; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Vertical streaming matcher field specification + */ +public class VsmFields extends Derived implements VsmfieldsConfig.Producer { + + private final Map<String, StreamingField> fields=new LinkedHashMap<>(); + private final Map<String, StreamingDocumentType> doctypes=new LinkedHashMap<>(); + + public VsmFields(Schema schema) { + addSearchdefinition(schema); + } + + private void addSearchdefinition(Schema schema) { + derive(schema); + } + + @Override + protected void derive(SDDocumentType document, Schema schema) { + super.derive(document, schema); + StreamingDocumentType docType=getDocumentType(document.getName()); + if (docType == null) { + docType = new StreamingDocumentType(document.getName(), schema.fieldSets()); + doctypes.put(document.getName(), docType); + } + for (Object o : document.fieldSet()) { + derive(docType, (SDField) o); + } + } + + protected void derive(StreamingDocumentType document, SDField field) { + if (field.usesStructOrMap()) { + if (GeoPos.isAnyPos(field)) { + StreamingField streamingField = new StreamingField(field); + addField(streamingField.getName(), streamingField); + addFieldToIndices(document, field.getName(), streamingField); + } + for (SDField structField : field.getStructFields()) { + derive(document, structField); // Recursion + } + } else { + if (! (field.doesIndexing() || field.doesSummarying() || field.doesAttributing()) ) + return; + + StreamingField streamingField = new StreamingField(field); + addField(streamingField.getName(),streamingField); + deriveIndices(document, field, streamingField); + } + } + + private void deriveIndices(StreamingDocumentType document, SDField field, StreamingField streamingField) { + if (field.doesIndexing()) { + addFieldToIndices(document, field.getName(), streamingField); + } else if (field.doesAttributing()) { + for (String indexName : field.getAttributes().keySet()) { + addFieldToIndices(document, indexName, streamingField); + } + } + } + + private void addFieldToIndices(StreamingDocumentType document, String indexName, StreamingField streamingField) { + if (indexName.contains(".")) { + addFieldToIndices(document, indexName.substring(0,indexName.lastIndexOf(".")), streamingField); // Recursion + } + document.addIndexField(indexName, streamingField.getName()); + } + + private void addField(String name, StreamingField field) { + fields.put(name, field); + } + + /** Returns a streaming index, or null if there is none with this name */ + public StreamingDocumentType getDocumentType(String name) { + return doctypes.get(name); + } + + public String getDerivedName() { + return "vsmfields"; + } + + @Override + public void getConfig(VsmfieldsConfig.Builder vsB) { + for (StreamingField streamingField : fields.values()) { + vsB.fieldspec(streamingField.getFieldSpecConfig()); + } + for (StreamingDocumentType streamingDocType : doctypes.values()) { + vsB.documenttype(streamingDocType.getDocTypeConfig()); + } + } + + private static class StreamingField { + + private final String name; + + /** Whether this field does prefix matching by default */ + private final Matching matching; + + /** The type of this field */ + private final Type type; + + private final boolean isAttribute; + + /** The streaming field type enumeration */ + public static class Type { + + public static Type INT8 = new Type("int8","INT8"); + public static Type INT16 = new Type("int16","INT16"); + public static Type INT32 = new Type("int32","INT32"); + public static Type INT64 = new Type("int64","INT64"); + public static Type FLOAT16 = new Type("float16", "FLOAT16"); + public static Type FLOAT = new Type("float","FLOAT"); + public static Type DOUBLE = new Type("double","DOUBLE"); + public static Type STRING = new Type("string","AUTOUTF8"); + public static Type BOOL = new Type("bool","BOOL"); + public static Type UNSEARCHABLESTRING = new Type("string","NONE"); + public static Type GEO_POSITION = new Type("position", "GEOPOS"); + + private String name; + + private String searchMethod; + + private Type(String name, String searchMethod) { + this.name = name; + this.searchMethod = searchMethod; + } + + @Override + public int hashCode() { + return name.hashCode(); + } + + /** Returns the name of this type */ + public String getName() { return name; } + + public String getSearchMethod() { return searchMethod; } + + @Override + public boolean equals(Object other) { + if ( ! (other instanceof Type)) return false; + return this.name.equals(((Type)other).name); + } + + @Override + public String toString() { + return "type: " + name; + } + + } + + public StreamingField(SDField field) { + this(field.getName(), field.getDataType(), field.getMatching(), field.doesAttributing()); + } + + private StreamingField(String name, DataType sourceType, Matching matching, boolean isAttribute) { + this.name = name; + this.type = convertType(sourceType); + this.matching = matching; + this.isAttribute = isAttribute; + } + + /** Converts to the right index type from a field datatype */ + private static Type convertType(DataType fieldType) { + FieldValue fval = fieldType.createFieldValue(); + if (fieldType.equals(DataType.FLOAT16)) { + return Type.FLOAT16; + } else if (fieldType.equals(DataType.FLOAT)) { + return Type.FLOAT; + } else if (fieldType.equals(DataType.LONG)) { + return Type.INT64; + } else if (fieldType.equals(DataType.DOUBLE)) { + return Type.DOUBLE; + } else if (fieldType.equals(DataType.BOOL)) { + return Type.BOOL; + } else if (fieldType.equals(DataType.BYTE)) { + return Type.INT8; + } else if (GeoPos.isAnyPos(fieldType)) { + return Type.GEO_POSITION; + } else if (fieldType instanceof NumericDataType) { + return Type.INT32; + } else if (fval instanceof StringFieldValue) { + return Type.STRING; + } else if (fval instanceof BoolFieldValue) { + return Type.BOOL; + } else if (fval instanceof Raw) { + return Type.STRING; + } else if (fval instanceof PredicateFieldValue) { + return Type.UNSEARCHABLESTRING; + } else if (fval instanceof TensorFieldValue) { + return Type.UNSEARCHABLESTRING; + } else if (fieldType instanceof CollectionDataType) { + return convertType(((CollectionDataType) fieldType).getNestedType()); + } else if (fieldType instanceof NewDocumentReferenceDataType) { + return Type.UNSEARCHABLESTRING; + } else { + throw new IllegalArgumentException("Don't know which streaming field type to convert " + + fieldType + " to"); + } + } + + public String getName() { return name; } + + public VsmfieldsConfig.Fieldspec.Builder getFieldSpecConfig() { + VsmfieldsConfig.Fieldspec.Builder fB = new VsmfieldsConfig.Fieldspec.Builder(); + String matchingName = matching.getType().getName(); + if (matching.getType().equals(MatchType.TEXT)) + matchingName = ""; + if (matching.getType() != MatchType.EXACT) { + if (matching.isPrefix()) { + matchingName = "prefix"; + } else if (matching.isSubstring()) { + matchingName = "substring"; + } else if (matching.isSuffix()) { + matchingName = "suffix"; + } + } + if (type != Type.STRING) { + matchingName = ""; + } + fB.name(getName()) + .searchmethod(VsmfieldsConfig.Fieldspec.Searchmethod.Enum.valueOf(type.getSearchMethod())) + .arg1(matchingName) + .fieldtype(isAttribute + ? VsmfieldsConfig.Fieldspec.Fieldtype.ATTRIBUTE + : VsmfieldsConfig.Fieldspec.Fieldtype.INDEX); + if (matching.maxLength() != null) { + fB.maxlength(matching.maxLength()); + } + return fB; + } + + @Override + public boolean equals(Object o) { + if (o.getClass().equals(getClass())) { + StreamingField sf = (StreamingField)o; + return name.equals(sf.name) && + matching.equals(sf.matching) && + type.equals(sf.type); + } + return false; + } + + @Override public int hashCode() { + return java.util.Objects.hash(name, matching, type); + } + + } + + private static class StreamingDocumentType { + + private final String name; + private final Map<String, FieldSet> fieldSets = new LinkedHashMap<>(); + private final Map<String, FieldSet> userFieldSets; + + public StreamingDocumentType(String name, FieldSets fieldSets) { + this.name=name; + userFieldSets = fieldSets.userFieldSets(); + } + + public VsmfieldsConfig.Documenttype.Builder getDocTypeConfig() { + VsmfieldsConfig.Documenttype.Builder dtB = new VsmfieldsConfig.Documenttype.Builder(); + dtB.name(name); + Map<String, FieldSet> all = new LinkedHashMap<>(); + all.putAll(fieldSets); + all.putAll(userFieldSets); + for (Map.Entry<String, FieldSet> e : all.entrySet()) { + VsmfieldsConfig.Documenttype.Index.Builder indB = new VsmfieldsConfig.Documenttype.Index.Builder(); + indB.name(e.getValue().getName()); + for (String field : e.getValue().getFieldNames()) { + indB.field(new VsmfieldsConfig.Documenttype.Index.Field.Builder().name(field)); + } + dtB.index(indB); + } + return dtB; + } + + public String getName() { return name; } + + public void addIndexField(String indexName, String fieldName) { + FieldSet fs = fieldSets.get(indexName); + if (fs == null) { + fs = new FieldSet(indexName); + fieldSets.put(indexName, fs); + } + fs.addFieldName(fieldName); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/VsmSummary.java b/config-model/src/main/java/com/yahoo/schema/derived/VsmSummary.java new file mode 100644 index 00000000000..30ae9c97268 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/VsmSummary.java @@ -0,0 +1,109 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.config.search.vsm.VsmsummaryConfig; + +import java.util.*; + +/** + * Vertical streaming matcher summary specification + * + * @author bratseth + */ +public class VsmSummary extends Derived implements VsmsummaryConfig.Producer { + + private final Map<SummaryField, List<String>> summaryMap = new java.util.LinkedHashMap<>(1); + + public VsmSummary(Schema schema) { + derive(schema); + } + + @Override + protected void derive(Schema schema) { + // Use the default class, as it is the superset + derive(schema, schema.getSummary("default")); + } + + private void derive(Schema schema, DocumentSummary documentSummary) { + if (documentSummary == null) return; + for (SummaryField summaryField : documentSummary.getSummaryFields().values()) { + List<String> from = toStringList(summaryField.sourceIterator()); + + if (doMapField(schema, summaryField)) { + SDField sdField = schema.getConcreteField(summaryField.getName()); + if (sdField != null && GeoPos.isAnyPos(sdField)) { + summaryMap.put(summaryField, Collections.singletonList(summaryField.getName())); + } else { + summaryMap.put(summaryField, from); + } + } + } + } + + /** + * Don't include field in map if sources are the same as the struct sub fields for the SDField. + * But do map if not all do summarying. + * Don't map if not struct either. + * @param summaryField a {@link SummaryField} + */ + private boolean doMapField(Schema schema, SummaryField summaryField) { + SDField sdField = schema.getConcreteField(summaryField.getName()); + SDDocumentType document = schema.getDocument(); + if (sdField==null || ((document != null) && (document.getField(summaryField.getName()) == sdField))) { + return true; + } + if (summaryField.getVsmCommand().equals(SummaryField.VsmCommand.FLATTENJUNIPER)) { + return true; + } + if (!sdField.usesStructOrMap()) { + return !(sdField.getName().equals(summaryField.getName())); + } + if (summaryField.getSourceCount()==sdField.getStructFields().size()) { + for (SummaryField.Source source : summaryField.getSources()) { + if (!sdField.getStructFields().contains(new SDField(schema.getDocument(), source.getName(), sdField.getDataType()))) { // equals() uses just name + return true; + } + if (sdField.getStructField(source.getName())!=null && !sdField.getStructField(source.getName()).doesSummarying()) { + return true; + } + } + // The sources in the summary field are the same as the sub-fields in the SD field. + // All sub fields do summarying. + // Don't map. + return false; + } + return true; + } + + private List<String> toStringList(Iterator<SummaryField.Source> i) { + List<String> ret = new ArrayList<>(); + while (i.hasNext()) { + ret.add(i.next().getName()); + } + return ret; + } + + @Override + public String getDerivedName() { + return "vsmsummary"; + } + + @Override + public void getConfig(VsmsummaryConfig.Builder vB) { + for (Map.Entry<SummaryField, List<String>> entry : summaryMap.entrySet()) { + VsmsummaryConfig.Fieldmap.Builder fmB = new VsmsummaryConfig.Fieldmap.Builder().summary(entry.getKey().getName()); + for (String field : entry.getValue()) { + fmB.document(new VsmsummaryConfig.Fieldmap.Document.Builder().field(field)); + } + fmB.command(VsmsummaryConfig.Fieldmap.Command.Enum.valueOf(entry.getKey().getVsmCommand().toString())); + vB.fieldmap(fmB); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/package-info.java b/config-model/src/main/java/com/yahoo/schema/derived/package-info.java new file mode 100644 index 00000000000..370617ac6cc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/package-info.java @@ -0,0 +1,5 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.schema.derived; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/config-model/src/main/java/com/yahoo/schema/derived/validation/IndexStructureValidator.java b/config-model/src/main/java/com/yahoo/schema/derived/validation/IndexStructureValidator.java new file mode 100644 index 00000000000..512d9f742bf --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/validation/IndexStructureValidator.java @@ -0,0 +1,50 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived.validation; + +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.DerivedConfiguration; +import com.yahoo.schema.derived.IndexingScript; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; + +/** + * @author Mathias M Lidal + */ +public class IndexStructureValidator extends Validator { + + public IndexStructureValidator(DerivedConfiguration config, Schema schema) { + super(config, schema); + } + + public void validate() { + IndexingScript script = config.getIndexingScript(); + for (Expression exp : script.expressions()) { + new OutputVisitor(schema.getDocument(), exp).visit(exp); + } + } + + private static class OutputVisitor extends ExpressionVisitor { + + final SDDocumentType docType; + final Expression exp; + + public OutputVisitor(SDDocumentType docType, Expression exp) { + this.docType = docType; + this.exp = exp; + } + + @Override + protected void doVisit(Expression exp) { + if (!(exp instanceof OutputExpression)) return; + + String fieldName = ((OutputExpression)exp).getFieldName(); + if (docType.getField(fieldName) != null) return; + + throw new IllegalArgumentException("Indexing expression '" + this.exp + "' refers to field '" + + fieldName + "' which does not exist in the index structure."); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/validation/Validation.java b/config-model/src/main/java/com/yahoo/schema/derived/validation/Validation.java new file mode 100644 index 00000000000..dba4dce49f0 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/validation/Validation.java @@ -0,0 +1,12 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived.validation; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.DerivedConfiguration; + +public class Validation { + + public static void validate(DerivedConfiguration config, Schema schema) { + new IndexStructureValidator(config, schema).validate(); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/validation/Validator.java b/config-model/src/main/java/com/yahoo/schema/derived/validation/Validator.java new file mode 100644 index 00000000000..bf0f007841c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/validation/Validator.java @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived.validation; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.DerivedConfiguration; + +/** + * @author mathiasm + */ +public abstract class Validator { + + protected DerivedConfiguration config; + protected Schema schema; + + protected Validator(DerivedConfiguration config, Schema schema) { + this.config = config; + this.schema = schema; + } + + public abstract void validate(); + +} |