diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-05-19 12:03:06 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2022-05-19 12:03:06 +0200 |
commit | 5c24dc5c9642a8d9ed70aee4c950fd0678a1ebec (patch) | |
tree | bd9b74bf00c832456f0b83c1b2cd7010be387d68 /config-model/src/main/java/com/yahoo/schema | |
parent | f17c4fe7de4c55f5c4ee61897eab8c2f588d8405 (diff) |
Rename the 'searchdefinition' package to 'schema'
Diffstat (limited to 'config-model/src/main/java/com/yahoo/schema')
225 files changed, 23969 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/Application.java b/config-model/src/main/java/com/yahoo/schema/Application.java new file mode 100644 index 00000000000..aa47818ff99 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/Application.java @@ -0,0 +1,108 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.schema.derived.SearchOrderer; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.processing.Processing; +import com.yahoo.schema.processing.Processor; +import com.yahoo.vespa.documentmodel.DocumentModel; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * A collection of objects representing the content of an application package. + * This is created, then added to, and lastly validated when there is no more content to add. + * At that point it is ready to use for deriving configuration. + * + * @author bratseth + */ +public class Application { + + private final ApplicationPackage applicationPackage; + private final Map<String, Schema> schemas; + private final DocumentModel documentModel; + + public Application(ApplicationPackage applicationPackage, + List<Schema> schemas, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles, + ModelContext.Properties properties, + boolean documentsOnly, + boolean validate, + Set<Class<? extends Processor>> processorsToSkip, + DeployLogger logger) { + this.applicationPackage = applicationPackage; + + Map<String, Schema> schemaMap = new LinkedHashMap<>(); + for (Schema schema : schemas) { + if (schemaMap.containsKey(schema.getName())) + throw new IllegalArgumentException("Duplicate schema '" + schema.getName() + "' in " + this); + schemaMap.put(schema.getName(), schema); + } + this.schemas = Collections.unmodifiableMap(schemaMap); + + schemas.forEach(schema -> schema.setOwner(this)); + if (validate) + schemas.forEach(schema -> schema.validate(logger)); + + new TemporarySDTypeResolver(schemas, logger).process(); + + List<SDDocumentType> sdocs = new ArrayList<>(); + sdocs.add(SDDocumentType.VESPA_DOCUMENT); + for (Schema schema : schemas) { + if (schema.hasDocument()) { + sdocs.add(schema.getDocument()); + } + } + + var orderer = new SDDocumentTypeOrderer(sdocs, logger); + orderer.process(); + for (SDDocumentType sdoc : orderer.getOrdered()) { + new FieldOperationApplierForStructs().process(sdoc); + new FieldOperationApplier().process(sdoc); + } + + var resolver = new DocumentReferenceResolver(schemas); + sdocs.forEach(resolver::resolveReferences); + sdocs.forEach(resolver::resolveInheritedReferences); + var importedFieldsEnumerator = new ImportedFieldsEnumerator(schemas); + sdocs.forEach(importedFieldsEnumerator::enumerateImportedFields); + + if (validate) + new DocumentGraphValidator().validateDocumentGraph(sdocs); + + List<Schema> schemasSomewhatOrdered = new ArrayList<>(schemas); + for (Schema schema : new SearchOrderer().order(schemasSomewhatOrdered)) { + new FieldOperationApplierForSearch().process(schema); // TODO: Why is this not in the regular list? + new Processing(properties).process(schema, + logger, + rankProfileRegistry, + queryProfiles, + validate, + documentsOnly, + processorsToSkip); + } + + this.documentModel = new DocumentModelBuilder().build(schemasSomewhatOrdered); + } + + public ApplicationPackage applicationPackage() { return applicationPackage; } + + /** Returns an unmodifiable list of the schemas of this application */ + public Map<String, Schema> schemas() { return schemas; } + + public DocumentModel documentModel() { return documentModel; } + + @Override + public String toString() { return "application " + applicationPackage.getApplicationId(); } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/ApplicationBuilder.java b/config-model/src/main/java/com/yahoo/schema/ApplicationBuilder.java new file mode 100644 index 00000000000..d2c7035bac6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/ApplicationBuilder.java @@ -0,0 +1,513 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.config.model.application.provider.MockFileRegistry; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.path.Path; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.config.QueryProfileXMLReader; +import com.yahoo.schema.parser.ConvertSchemaCollection; +import com.yahoo.schema.parser.IntermediateCollection; +import com.yahoo.schema.parser.ParseException; +import com.yahoo.schema.processing.Processor; +import com.yahoo.vespa.documentmodel.DocumentModel; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Application builder. Usage: + * 1) Add all schemas, using the addXXX() methods, + * 2) provide the available rank types and rank expressions, using the setRankXXX() methods, + * 3) invoke the {@link #build} method + * + * @author bratseth + */ +public class ApplicationBuilder { + + private final IntermediateCollection mediator; + private final ApplicationPackage applicationPackage; + private final List<Schema> schemas = new ArrayList<>(); + private final DocumentTypeManager documentTypeManager = new DocumentTypeManager(); + private final RankProfileRegistry rankProfileRegistry; + private final QueryProfileRegistry queryProfileRegistry; + private final FileRegistry fileRegistry; + private final DeployLogger deployLogger; + private final ModelContext.Properties properties; + /** True to build the document aspect only, skipping instantiation of rank profiles */ + private final boolean documentsOnly; + + private Application application; + + private final Set<Class<? extends Processor>> processorsToSkip = new HashSet<>(); + + /** For testing only */ + public ApplicationBuilder() { + this(new RankProfileRegistry(), new QueryProfileRegistry()); + } + + /** For testing only */ + public ApplicationBuilder(DeployLogger deployLogger) { + this(MockApplicationPackage.createEmpty(), deployLogger); + } + + /** For testing only */ + public ApplicationBuilder(DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry) { + this(MockApplicationPackage.createEmpty(), deployLogger, rankProfileRegistry); + } + + /** Used for generating documents for typed access to document fields in Java */ + public ApplicationBuilder(boolean documentsOnly) { + this(MockApplicationPackage.createEmpty(), new MockFileRegistry(), new BaseDeployLogger(), new TestProperties(), new RankProfileRegistry(), new QueryProfileRegistry(), documentsOnly); + } + + /** For testing only */ + public ApplicationBuilder(ApplicationPackage app, DeployLogger deployLogger) { + this(app, new MockFileRegistry(), deployLogger, new TestProperties(), new RankProfileRegistry(), new QueryProfileRegistry()); + } + + /** For testing only */ + public ApplicationBuilder(ApplicationPackage app, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry) { + this(app, new MockFileRegistry(), deployLogger, new TestProperties(), rankProfileRegistry, new QueryProfileRegistry()); + } + + /** For testing only */ + public ApplicationBuilder(RankProfileRegistry rankProfileRegistry) { + this(rankProfileRegistry, new QueryProfileRegistry()); + } + + /** For testing only */ + public ApplicationBuilder(RankProfileRegistry rankProfileRegistry, QueryProfileRegistry queryProfileRegistry) { + this(rankProfileRegistry, queryProfileRegistry, new TestProperties()); + } + + /** For testing only */ + public ApplicationBuilder(ModelContext.Properties properties) { + this(new RankProfileRegistry(), new QueryProfileRegistry(), properties); + } + + /** For testing only */ + public ApplicationBuilder(RankProfileRegistry rankProfileRegistry, QueryProfileRegistry queryProfileRegistry, ModelContext.Properties properties) { + this(MockApplicationPackage.createEmpty(), new MockFileRegistry(), new BaseDeployLogger(), properties, rankProfileRegistry, queryProfileRegistry); + } + + /** normal constructor */ + public ApplicationBuilder(ApplicationPackage app, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryProfileRegistry) { + this(app, fileRegistry, deployLogger, properties, rankProfileRegistry, queryProfileRegistry, false); + } + + private ApplicationBuilder(ApplicationPackage applicationPackage, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryProfileRegistry, + boolean documentsOnly) { + this.mediator = new IntermediateCollection(deployLogger, properties); + this.applicationPackage = applicationPackage; + this.rankProfileRegistry = rankProfileRegistry; + this.queryProfileRegistry = queryProfileRegistry; + this.fileRegistry = fileRegistry; + this.deployLogger = deployLogger; + this.properties = properties; + this.documentsOnly = documentsOnly; + for (NamedReader reader : applicationPackage.getSchemas()) + addSchema(reader); + } + + /** + * Adds a schema to this application. + * + * @param fileName the name of the file to import + * @throws IOException thrown if the file can not be read for some reason + * @throws ParseException thrown if the file does not contain a valid search definition + */ + public void addSchemaFile(String fileName) throws IOException, ParseException { + var parsedName = mediator.addSchemaFromFile(fileName); + addRankProfileFiles(parsedName); + } + + /** + * Reads and parses the schema string provided by the given reader. Once all schemas have been + * imported, call {@link #build}. + * + * @param reader the reader whose content to import + */ + public void addSchema(NamedReader reader) { + try { + var parsedName = mediator.addSchemaFromReader(reader); + addRankProfileFiles(parsedName); + } catch (ParseException e) { + throw new IllegalArgumentException("Could not parse schema file '" + reader.getName() + "'", e); + } + } + + /** + * Adds a schema to this + * + * @param schemaString the content of the schema + */ + public void addSchema(String schemaString) throws ParseException { + var parsed = mediator.addSchemaFromString(schemaString); + addRankProfileFiles(parsed.name()); + } + + /** + * Registers the given schema to the application to be built during {@link #build}. A + * {@link Schema} object is considered to be "raw" if it has not already been processed. This is the case for most + * programmatically constructed schemas used in unit tests. + * + * @param schema the object to import + * @throws IllegalArgumentException if the given search object has already been processed + */ + public Schema add(Schema schema) { + if (schema.getName() == null) + throw new IllegalArgumentException("Schema has no name"); + schemas.add(schema); + return schema; + } + + private void addRankProfileFiles(String schemaName) throws ParseException { + if (applicationPackage == null) return; + + Path legacyRankProfilePath = ApplicationPackage.SEARCH_DEFINITIONS_DIR.append(schemaName); + for (NamedReader reader : applicationPackage.getFiles(legacyRankProfilePath, ".profile")) { + mediator.addRankProfileFile(schemaName, reader); + } + + Path rankProfilePath = ApplicationPackage.SCHEMAS_DIR.append(schemaName); + for (NamedReader reader : applicationPackage.getFiles(rankProfilePath, ".profile", true)) { + System.out.println("Got " + reader); + mediator.addRankProfileFile(schemaName, reader); + } + } + + /** + * Processes and finalizes the schemas of this. + * + * @throws IllegalStateException thrown if this method has already been called + */ + public Application build(boolean validate) { + if (application != null) throw new IllegalStateException("Application already built"); + var converter = new ConvertSchemaCollection(mediator, + documentTypeManager, + applicationPackage, + fileRegistry, + deployLogger, + properties, + rankProfileRegistry, + documentsOnly); + for (var schema : converter.convertToSchemas()) + add(schema); + application = new Application(applicationPackage, + schemas, + rankProfileRegistry, + new QueryProfiles(queryProfileRegistry, deployLogger), + properties, + documentsOnly, + validate, + processorsToSkip, + deployLogger); + return application; + } + + /** Returns a modifiable set of processors we should skip for these schemas. Useful for testing. */ + public Set<Class<? extends Processor>> processorsToSkip() { return processorsToSkip; } + + /** + * Convenience method to call {@link #getSchema(String)} when there is only a single {@link Schema} object + * built. This method will never return null. + * + * @return the built object + * @throws IllegalStateException if there is not exactly one search. + */ + public Schema getSchema() { + if (application == null) throw new IllegalStateException("Application not built"); + if (application.schemas().size() != 1) + throw new IllegalStateException("This call only works if we have 1 schema. Schemas: " + + application.schemas().values()); + + return application.schemas().values().stream().findAny().get(); + } + + public DocumentModel getModel() { return application.documentModel(); } + + /** + * Returns the built {@link Schema} object that has the given name. If the name is unknown, this method will simply + * return null. + * + * @param name the name of the schema to return, + * or null to return the only one or throw an exception if there are multiple to choose from + * @return the built object, or null if none with this name + * @throws IllegalStateException if {@link #build} has not been called. + */ + public Schema getSchema(String name) { + if (application == null) throw new IllegalStateException("Application not built"); + if (name == null) return getSchema(); + return application.schemas().get(name); + } + + public Application application() { return application; } + + /** + * Convenience method to return a list of all built {@link Schema} objects. + * + * @return the list of built searches + */ + public List<Schema> getSchemaList() { + return new ArrayList<>(application.schemas().values()); + } + + /** + * Convenience factory method to import and build a {@link Schema} object from a string. + * + * @param sd the string to build from + * @return the built {@link ApplicationBuilder} object + * @throws ParseException thrown if there is a problem parsing the string + */ + public static ApplicationBuilder createFromString(String sd) throws ParseException { + return createFromString(sd, new BaseDeployLogger()); + } + + public static ApplicationBuilder createFromString(String sd, DeployLogger logger) throws ParseException { + ApplicationBuilder builder = new ApplicationBuilder(logger); + builder.addSchema(sd); + builder.build(true); + return builder; + } + + public static ApplicationBuilder createFromStrings(DeployLogger logger, String ... schemas) throws ParseException { + ApplicationBuilder builder = new ApplicationBuilder(logger); + for (var schema : schemas) + builder.addSchema(schema); + builder.build(true); + return builder; + } + + /** + * Convenience factory method to import and build a {@link Schema} object from a file. Only for testing. + * + * @param fileName the file to build from + * @return the built {@link ApplicationBuilder} object + * @throws IOException if there was a problem reading the file. + * @throws ParseException if there was a problem parsing the file content. + */ + public static ApplicationBuilder createFromFile(String fileName) throws IOException, ParseException { + return createFromFile(fileName, new BaseDeployLogger()); + } + + /** + * Convenience factory methods to create a SearchBuilder from multiple SD files. Only for testing. + */ + public static ApplicationBuilder createFromFiles(Collection<String> fileNames) throws IOException, ParseException { + return createFromFiles(fileNames, new BaseDeployLogger()); + } + + public static ApplicationBuilder createFromFile(String fileName, DeployLogger logger) throws IOException, ParseException { + return createFromFile(fileName, logger, new RankProfileRegistry(), new QueryProfileRegistry()); + } + + private static ApplicationBuilder createFromFiles(Collection<String> fileNames, DeployLogger logger) throws IOException, ParseException { + return createFromFiles(fileNames, new MockFileRegistry(), logger, new TestProperties(), new RankProfileRegistry(), new QueryProfileRegistry()); + } + + /** + * Convenience factory method to import and build a {@link Schema} object from a file. + * + * @param fileName the file to build from. + * @param deployLogger logger for deploy messages. + * @param rankProfileRegistry registry for rank profiles. + * @return the built {@link ApplicationBuilder} object. + * @throws IOException if there was a problem reading the file. + * @throws ParseException if there was a problem parsing the file content. + */ + private static ApplicationBuilder createFromFile(String fileName, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryprofileRegistry) + throws IOException, ParseException { + return createFromFiles(Collections.singletonList(fileName), new MockFileRegistry(), deployLogger, new TestProperties(), + rankProfileRegistry, queryprofileRegistry); + } + + /** + * Convenience factory methdd to create a SearchBuilder from multiple SD files.. + */ + private static ApplicationBuilder createFromFiles(Collection<String> fileNames, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryprofileRegistry) + throws IOException, ParseException { + ApplicationBuilder builder = new ApplicationBuilder(MockApplicationPackage.createEmpty(), + fileRegistry, + deployLogger, + properties, + rankProfileRegistry, + queryprofileRegistry); + for (String fileName : fileNames) { + builder.addSchemaFile(fileName); + } + builder.build(true); + return builder; + } + + + public static ApplicationBuilder createFromDirectory(String dir, FileRegistry fileRegistry, DeployLogger logger, ModelContext.Properties properties) throws IOException, ParseException { + return createFromDirectory(dir, fileRegistry, logger, properties, new RankProfileRegistry()); + } + public static ApplicationBuilder createFromDirectory(String dir, + FileRegistry fileRegistry, + DeployLogger logger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry) throws IOException, ParseException { + return createFromDirectory(dir, fileRegistry, logger, properties, rankProfileRegistry, createQueryProfileRegistryFromDirectory(dir)); + } + private static ApplicationBuilder createFromDirectory(String dir, + FileRegistry fileRegistry, + DeployLogger logger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryProfileRegistry) throws IOException, ParseException { + return createFromDirectory(dir, MockApplicationPackage.fromSearchDefinitionAndRootDirectory(dir), fileRegistry, logger, properties, + rankProfileRegistry, queryProfileRegistry); + } + + private static ApplicationBuilder createFromDirectory(String dir, + ApplicationPackage applicationPackage, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryProfileRegistry) throws IOException, ParseException { + ApplicationBuilder builder = new ApplicationBuilder(applicationPackage, + fileRegistry, + deployLogger, + properties, + rankProfileRegistry, + queryProfileRegistry); + for (var i = Files.list(new File(dir).toPath()).filter(p -> p.getFileName().toString().endsWith(".sd")).iterator(); i.hasNext(); ) { + builder.addSchemaFile(i.next().toString()); + } + builder.build(true); + return builder; + } + + private static QueryProfileRegistry createQueryProfileRegistryFromDirectory(String dir) { + File queryProfilesDir = new File(dir, "query-profiles"); + if ( ! queryProfilesDir.exists()) return new QueryProfileRegistry(); + return new QueryProfileXMLReader().read(queryProfilesDir.toString()); + } + + // TODO: The build methods below just call the create methods above - remove + + /** + * Convenience factory method to import and build a {@link Schema} object from a file. Only for testing. + * + * @param fileName the file to build from + * @return the built {@link Schema} object + * @throws IOException thrown if there was a problem reading the file + * @throws ParseException thrown if there was a problem parsing the file content + */ + public static Schema buildFromFile(String fileName) throws IOException, ParseException { + return buildFromFile(fileName, new BaseDeployLogger(), new RankProfileRegistry(), new QueryProfileRegistry()); + } + + /** + * Convenience factory method to import and build a {@link Schema} object from a file. + * + * @param fileName the file to build from + * @param rankProfileRegistry registry for rank profiles + * @return the built {@link Schema} object + * @throws IOException thrown if there was a problem reading the file + * @throws ParseException thrown if there was a problem parsing the file content + */ + public static Schema buildFromFile(String fileName, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryProfileRegistry) + throws IOException, ParseException { + return buildFromFile(fileName, new BaseDeployLogger(), rankProfileRegistry, queryProfileRegistry); + } + + /** + * Convenience factory method to import and build a {@link Schema} from a file. + * + * @param fileName the file to build from + * @param deployLogger logger for deploy messages + * @param rankProfileRegistry registry for rank profiles + * @return the built {@link Schema} object + * @throws IOException thrown if there was a problem reading the file + * @throws ParseException thrown if there was a problem parsing the file content + */ + public static Schema buildFromFile(String fileName, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryProfileRegistry) + throws IOException, ParseException { + return createFromFile(fileName, deployLogger, rankProfileRegistry, queryProfileRegistry).getSchema(); + } + + /** + * Convenience factory method to import and build a {@link Schema} object from a raw object. + * + * @param rawSchema the raw object to build from + * @return the built {@link ApplicationBuilder} object + * @see #add(Schema) + */ + public static ApplicationBuilder createFromRawSchema(Schema rawSchema, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryProfileRegistry) { + ApplicationBuilder builder = new ApplicationBuilder(rankProfileRegistry, queryProfileRegistry); + builder.add(rawSchema); + builder.build(true); + return builder; + } + + /** + * Convenience factory method to import and build a {@link Schema} object from a raw object. + * + * @param rawSchema the raw object to build from + * @return the built {@link Schema} object + * @see #add(Schema) + */ + public static Schema buildFromRawSchema(Schema rawSchema, + RankProfileRegistry rankProfileRegistry, + QueryProfileRegistry queryProfileRegistry) { + return createFromRawSchema(rawSchema, rankProfileRegistry, queryProfileRegistry).getSchema(); + } + + public RankProfileRegistry getRankProfileRegistry() { + return rankProfileRegistry; + } + + public QueryProfileRegistry getQueryProfileRegistry() { + return queryProfileRegistry; + } + + public ModelContext.Properties getProperties() { return properties; } + + public DeployLogger getDeployLogger() { return deployLogger; } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/DefaultRankProfile.java b/config-model/src/main/java/com/yahoo/schema/DefaultRankProfile.java new file mode 100644 index 00000000000..9ab03b8c4a1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/DefaultRankProfile.java @@ -0,0 +1,128 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.schema.document.ImmutableSDField; + +import java.util.LinkedHashSet; +import java.util.Set; + +/** + * The rank profile containing default settings. This is derived from the fields + * whenever this is accessed. + * + * @author bratseth + */ +public class DefaultRankProfile extends RankProfile { + + /** + * Creates a new rank profile + * + * @param rankProfileRegistry the {@link com.yahoo.schema.RankProfileRegistry} + * to use for storing and looking up rank profiles + */ + public DefaultRankProfile(Schema schema, RankProfileRegistry rankProfileRegistry) { + super("default", schema, rankProfileRegistry); + } + + /** Ignore self inheriting of default as some applications may use that for historical reasons. */ + public void inherit(String inheritedName) { + if (inheritedName.equals("default")) return; + super.inherit(inheritedName); + } + + @Override + public RankSetting getRankSetting(String fieldOrIndex, RankSetting.Type type) { + RankSetting setting = super.getRankSetting(fieldOrIndex, type); + if (setting != null) return setting; + + ImmutableSDField field = schema().getConcreteField(fieldOrIndex); + if (field != null) { + setting = toRankSetting(field, type); + if (setting != null) + return setting; + } + + Index index = schema().getIndex(fieldOrIndex); + if (index != null) { + setting = toRankSetting(index, type); + if (setting != null) + return setting; + } + + return null; + } + + private RankSetting toRankSetting(ImmutableSDField field, RankSetting.Type type) { + if (type.equals(RankSetting.Type.WEIGHT) && field.getWeight() > 0 && field.getWeight() != 100) + return new RankSetting(field.getName(), type, field.getWeight()); + if (type.equals(RankSetting.Type.RANKTYPE)) + return new RankSetting(field.getName(), type, field.getRankType()); + if (type.equals(RankSetting.Type.LITERALBOOST) && field.getLiteralBoost() > 0) + return new RankSetting(field.getName(), type, field.getLiteralBoost()); + + // Index level setting really + if (type.equals(RankSetting.Type.PREFERBITVECTOR) && field.getRanking().isFilter()) { + return new RankSetting(field.getName(), type, true); + } + + return null; + } + + private RankSetting toRankSetting(Index index, RankSetting.Type type) { + /* TODO: Add support for indexes by adding a ranking object to the index + if (type.equals(RankSetting.Type.PREFERBITVECTOR) && index.isPreferBitVector()) { + return new RankSetting(index.getName(), type, new Boolean(true)); + } + */ + return null; + } + + /** + * Returns the names of the fields which have a rank boost setting + * explicitly in this profile or in fields + */ + @Override + public Set<RankSetting> rankSettings() { + Set<RankSetting> settings = new LinkedHashSet<>(20); + settings.addAll(this.rankSettings); + for (ImmutableSDField field : schema().allConcreteFields() ) { + addSetting(field, RankSetting.Type.WEIGHT, settings); + addSetting(field, RankSetting.Type.RANKTYPE, settings); + addSetting(field, RankSetting.Type.LITERALBOOST, settings); + addSetting(field, RankSetting.Type.PREFERBITVECTOR, settings); + } + + // For settings that really pertains to indexes do the explicit indexes too + for (Index index : schema().getExplicitIndices()) { + addSetting(index, RankSetting.Type.PREFERBITVECTOR, settings); + } + return settings; + } + + private void addSetting(ImmutableSDField field, RankSetting.Type type, Set<RankSetting> settings) { + if (type.isIndexLevel()) { + addIndexSettings(field, type, settings); + } + else { + RankSetting setting = toRankSetting(field, type); + if (setting == null) return; + settings.add(setting); + } + } + + private void addIndexSettings(ImmutableSDField field, RankSetting.Type type, Set<RankSetting> settings) { + String indexName = field.getName(); + + // TODO: Make a ranking object in the index override the field level ranking object + if (type.equals(RankSetting.Type.PREFERBITVECTOR) && field.getRanking().isFilter()) { + settings.add(new RankSetting(indexName, type, true)); + } + } + + private void addSetting(Index index, RankSetting.Type type, Set<RankSetting> settings) { + RankSetting setting = toRankSetting(index, type); + if (setting == null) return; + settings.add(setting); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/DistributableResource.java b/config-model/src/main/java/com/yahoo/schema/DistributableResource.java new file mode 100644 index 00000000000..7a8a3963ba4 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/DistributableResource.java @@ -0,0 +1,95 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.FileReference; +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.path.Path; + +import java.nio.ByteBuffer; +import java.util.Objects; + +public class DistributableResource implements Comparable <DistributableResource> { + + public enum PathType { FILE, URI, BLOB } + + /** The search definition-unique name of this constant */ + private final String name; + // TODO: Make path/pathType final + private PathType pathType; + private String path; + private FileReference fileReference = new FileReference(""); + + public PathType getPathType() { + return pathType; + } + + public DistributableResource(String name) { + this(name, null, PathType.FILE); + } + public DistributableResource(String name, String path) { + this(name, path, PathType.FILE); + } + public DistributableResource(String name, String path, PathType type) { + this.name = name; + this.path = path; + this.pathType = type; + } + + // TODO: Remove and make path/pathType final + public void setFileName(String fileName) { + Objects.requireNonNull(fileName, "Filename cannot be null"); + this.path = fileName; + this.pathType = PathType.FILE; + } + + // TODO: Remove and make path/pathType final + public void setUri(String uri) { + Objects.requireNonNull(uri, "uri cannot be null"); + this.path = uri; + this.pathType = PathType.URI; + } + + public String getName() { return name; } + public String getFileName() { return path; } + public Path getFilePath() { return Path.fromString(path); } + public String getUri() { return path; } + public String getFileReference() { return fileReference.value(); } + + public void validate() { + switch (pathType) { + case FILE: + case URI: + if (path == null || path.isEmpty()) + throw new IllegalArgumentException("Distributable URI/FILE resource must have a file or uri."); + break; + } + } + + public void register(FileRegistry fileRegistry) { + switch (pathType) { + case FILE: + fileReference = fileRegistry.addFile(path); + break; + case URI: + fileReference = fileRegistry.addUri(path); + break; + default: + throw new IllegalArgumentException("Unknown path type " + pathType); + } + } + + protected void register(FileRegistry fileRegistry, ByteBuffer blob) { + fileReference = fileRegistry.addBlob(path, blob); + } + + @Override + public String toString() { + return "resource '" + name + " of type '" + pathType + "' with ref '" + fileReference + "'"; + } + + @Override + public int compareTo(DistributableResource o) { + return name.compareTo(o.getName()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/DocumentGraphValidator.java b/config-model/src/main/java/com/yahoo/schema/DocumentGraphValidator.java new file mode 100644 index 00000000000..648cdf18c5b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/DocumentGraphValidator.java @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.schema.document.SDDocumentType; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.List; + +import static java.util.stream.Collectors.joining; + +/** + * Validates that there are no cycles between document types (exception: self-reference is allowed). + * Example: if document B inherits A, then A cannot have a document reference to B. + * + * @author bjorncs + */ +public class DocumentGraphValidator { + + public void validateDocumentGraph(List<SDDocumentType> documents) { + for (SDDocumentType document : documents) { + validateRoot(document); + } + } + + private static void validateRoot(SDDocumentType root) { + validateChildren(root, root); + } + + private static void validateChildren(SDDocumentType root, SDDocumentType currentDocument) { + try { + currentDocument.getDocumentReferences().get() + .forEach(entry -> { + SDDocumentType referencedDocument = entry.getValue().targetSearch().getDocument(); + validateDocument(root, referencedDocument); + }); + currentDocument.getInheritedTypes() + .forEach(inheritedDocument -> { + if (!isRootDocument(inheritedDocument)) { + validateDocument(root, inheritedDocument); + } + }); + } catch (DocumentGraphException e) { + e.addParentDocument(currentDocument); + throw e; + } + } + + private static void validateDocument(SDDocumentType root, SDDocumentType currentDocument) { + if (root.equals(currentDocument)) { + throw new DocumentGraphException(currentDocument); + } + validateChildren(root, currentDocument); + } + + private static boolean isRootDocument(SDDocumentType doc) { + return doc.getName().equals("document"); + } + + public static class DocumentGraphException extends IllegalArgumentException { + private final Deque<SDDocumentType> deque = new ArrayDeque<>(); + + public DocumentGraphException(SDDocumentType document) { + deque.addLast(document); + } + + public void addParentDocument(SDDocumentType document) { + deque.addFirst(document); + } + + @Override + public String getMessage() { + return deque.stream() + .map(SDDocumentType::getName) + .collect(joining("->", "Document dependency cycle detected: ", ".")); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/DocumentModelBuilder.java b/config-model/src/main/java/com/yahoo/schema/DocumentModelBuilder.java new file mode 100644 index 00000000000..1a6134b410f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/DocumentModelBuilder.java @@ -0,0 +1,639 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.DocumentType; +import com.yahoo.document.Field; +import com.yahoo.document.MapDataType; +import com.yahoo.document.StructDataType; +import com.yahoo.document.TemporaryStructuredDataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.document.annotation.AnnotationReferenceDataType; +import com.yahoo.document.annotation.AnnotationType; +import com.yahoo.documentmodel.DataTypeCollection; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.documentmodel.NewDocumentType; +import com.yahoo.documentmodel.OwnedStructDataType; +import com.yahoo.documentmodel.OwnedTemporaryType; +import com.yahoo.documentmodel.TemporaryUnknownType; +import com.yahoo.documentmodel.VespaDocumentType; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.TemporaryImportedFields; +import com.yahoo.schema.document.annotation.SDAnnotationType; +import com.yahoo.schema.document.annotation.TemporaryAnnotationReferenceDataType; +import com.yahoo.vespa.documentmodel.DocumentModel; +import com.yahoo.vespa.documentmodel.FieldView; +import com.yahoo.vespa.documentmodel.SearchDef; +import com.yahoo.vespa.documentmodel.SearchField; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * @author baldersheim + */ +public class DocumentModelBuilder { + + private final DocumentModel model; + + public DocumentModelBuilder() { + this.model = new DocumentModel(); + this.model.getDocumentManager().add(VespaDocumentType.INSTANCE); + } + + public DocumentModel build(Collection<Schema> schemaList) { + List<SDDocumentType> docList = new LinkedList<>(); + for (Schema schema : schemaList) { + docList.add(schema.getDocument()); + } + docList = sortDocumentTypes(docList); + addDocumentTypes(docList); + for (Collection<Schema> toAdd = tryAdd(schemaList); + ! toAdd.isEmpty() && (toAdd.size() < schemaList.size()); + toAdd = tryAdd(schemaList)) { + schemaList = toAdd; + } + return model; + } + + private List<SDDocumentType> sortDocumentTypes(List<SDDocumentType> docList) { + Set<String> doneNames = new HashSet<>(); + doneNames.add(SDDocumentType.VESPA_DOCUMENT.getName()); + List<SDDocumentType> doneList = new LinkedList<>(); + List<SDDocumentType> prevList = null; + List<SDDocumentType> nextList = docList; + while (prevList == null || nextList.size() < prevList.size()) { + prevList = nextList; + nextList = new LinkedList<>(); + for (SDDocumentType doc : prevList) { + boolean isDone = true; + for (SDDocumentType inherited : doc.getInheritedTypes()) { + if (!doneNames.contains(inherited.getName())) { + isDone = false; + break; + } + } + if (isDone) { + doneNames.add(doc.getName()); + doneList.add(doc); + } else { + nextList.add(doc); + } + } + } + if (!nextList.isEmpty()) { + throw new IllegalArgumentException("Could not resolve inheritance of document types " + + toString(prevList) + "."); + } + return doneList; + } + + private static String toString(List<SDDocumentType> lst) { + StringBuilder out = new StringBuilder(); + for (int i = 0, len = lst.size(); i < len; ++i) { + out.append("'").append(lst.get(i).getName()).append("'"); + if (i < len - 2) { + out.append(", "); + } else if (i < len - 1) { + out.append(" and "); + } + } + return out.toString(); + } + + private Collection<Schema> tryAdd(Collection<Schema> schemaList) { + Collection<Schema> left = new ArrayList<>(); + for (Schema schema : schemaList) { + try { + addToModel(schema); + } catch (RetryLaterException e) { + left.add(schema); + } + } + return left; + } + + private void addToModel(Schema schema) { + // Then we add the search specific stuff + SearchDef searchDef = new SearchDef(schema.getName()); + addSearchFields(schema.extraFieldList(), searchDef); + for (Field f : schema.getDocument().fieldSet()) { + addSearchField((SDField) f, searchDef); + } + for (SDField field : schema.allConcreteFields()) { + for (Attribute attribute : field.getAttributes().values()) { + if ( ! searchDef.getFields().containsKey(attribute.getName())) { + searchDef.add(new SearchField(new Field(attribute.getName(), field), !field.getIndices().isEmpty(), true)); + } + } + } + + for (Field f : schema.getDocument().fieldSet()) { + addAlias((SDField) f, searchDef); + } + model.getSearchManager().add(searchDef); + } + + private static void addSearchFields(Collection<SDField> fields, SearchDef searchDef) { + for (SDField field : fields) { + addSearchField(field, searchDef); + } + } + + private static void addSearchField(SDField field, SearchDef searchDef) { + SearchField searchField = + new SearchField(field, + field.getIndices().containsKey(field.getName()) && field.getIndices().get(field.getName()).getType().equals(Index.Type.VESPA), + field.getAttributes().containsKey(field.getName())); + searchDef.add(searchField); + + // Add field to views + addToView(field.getIndices().keySet(), searchField, searchDef); + } + + private static void addAlias(SDField field, SearchDef searchDef) { + for (Map.Entry<String, String> entry : field.getAliasToName().entrySet()) { + searchDef.addAlias(entry.getKey(), entry.getValue()); + } + } + + private static void addToView(Collection<String> views, Field field, SearchDef searchDef) { + for (String viewName : views) { + addToView(viewName, field, searchDef); + } + } + + private static void addToView(String viewName, Field field, SearchDef searchDef) { + if (searchDef.getViews().containsKey(viewName)) { + searchDef.getViews().get(viewName).add(field); + } else { + if (!searchDef.getFields().containsKey(viewName)) { + FieldView view = new FieldView(viewName); + view.add(field); + searchDef.add(view); + } + } + } + + private static String descT(DataType type) { + if (type == null) { return "<null>"; } + return "'" + type.getName() + "' [" + type.getId() + "] {"+type.getClass() + "}"; + } + + private void addDocumentTypes(List<SDDocumentType> docList) { + LinkedList<NewDocumentType> lst = new LinkedList<>(); + for (SDDocumentType doc : docList) { + lst.add(convert(doc)); + model.getDocumentManager().add(lst.getLast()); + } + Map<DataType, DataType> replacements = new IdentityHashMap<>(); + for(NewDocumentType doc : lst) { + resolveTemporaries(doc.getAllTypes(), lst, replacements); + resolveTemporariesRecurse(doc.getContentStruct(), doc.getAllTypes(), lst, replacements); + } + for(NewDocumentType doc : lst) { + for (var entry : replacements.entrySet()) { + var old = entry.getKey(); + if (doc.getDataType(old.getId()) == old) { + doc.replace(entry.getValue()); + } + } + } + } + + private static void resolveTemporaries(DataTypeCollection dtc, + Collection<NewDocumentType> docs, + Map<DataType, DataType> replacements) { + for (DataType type : dtc.getTypes()) { + resolveTemporariesRecurse(type, dtc, docs, replacements); + } + } + + @SuppressWarnings("deprecation") + private static DataType resolveTemporariesRecurse(DataType type, DataTypeCollection repo, + Collection<NewDocumentType> docs, + Map<DataType, DataType> replacements) { + if (replacements.containsKey(type)) { + return replacements.get(type); + } + DataType original = type; + if (type instanceof TemporaryStructuredDataType) { + throw new IllegalArgumentException("Cannot handle temporary: " + type); + } + if (type instanceof TemporaryUnknownType) { + // must be a known struct or document type + DataType other = repo.getDataType(type.getId()); + if (other == null || other == type) { + // maybe it is the name of a document type: + other = getDocumentType(docs, type.getName()); + } + if (other == null) { + throw new IllegalArgumentException("No replacement found for temporary type: " + type); + } + type = other; + } else if (type instanceof OwnedTemporaryType) { + // must be replaced with the real struct type + DataType other = repo.getDataType(type.getId()); + if (other == null || other == type) { + throw new IllegalArgumentException("No replacement found for temporary type: " + type); + } + if (other instanceof OwnedStructDataType) { + var owned = (OwnedTemporaryType) type; + String ownedBy = owned.getOwnerName(); + var otherOwned = (OwnedStructDataType) other; + String otherOwnedBy = otherOwned.getOwnerName(); + if (! ownedBy.equals(otherOwnedBy)) { + throw new IllegalArgumentException("Wrong document for type: " + otherOwnedBy + " but expected " + ownedBy); + } + } else { + throw new IllegalArgumentException("Found wrong sort of type: " + other + " [" + other.getClass() + "]"); + } + type = other; + } else if (type instanceof DocumentType) { + DataType other = getDocumentType(docs, type.getName()); + if (other != null) { + type = other; + } else if (type != DataType.DOCUMENT) { + throw new IllegalArgumentException + ("Can not handle nested document definitions. Undefined document type: " + type.toString()); + } + } else if (type instanceof NewDocumentType) { + DataType other = getDocumentType(docs, type.getName()); + if (other != null) { + type = other; + } + } else if (type instanceof StructDataType) { + // trick avoids infinite recursion: + var old = replacements.put(original, type); + assert(old == null); + StructDataType dt = (StructDataType) type; + for (com.yahoo.document.Field field : dt.getFields()) { + var ft = field.getDataType(); + var newft = resolveTemporariesRecurse(ft, repo, docs, replacements); + if (ft != newft) { + // XXX deprecated: + field.setDataType(newft); + } + } + old = replacements.remove(original); + assert(old == type); + } + else if (type instanceof MapDataType) { + MapDataType t = (MapDataType) type; + var old_kt = t.getKeyType(); + var old_vt = t.getValueType(); + var kt = resolveTemporariesRecurse(old_kt, repo, docs, replacements); + var vt = resolveTemporariesRecurse(old_vt, repo, docs, replacements); + if (kt != old_kt || vt != old_vt) { + type = new MapDataType(kt, vt, t.getId()); + } + } + else if (type instanceof ArrayDataType) { + ArrayDataType t = (ArrayDataType) type; + var old_nt = t.getNestedType(); + var nt = resolveTemporariesRecurse(old_nt, repo, docs, replacements); + if (nt != old_nt) { + type = new ArrayDataType(nt, t.getId()); + } + } + else if (type instanceof WeightedSetDataType) { + WeightedSetDataType t = (WeightedSetDataType) type; + var old_nt = t.getNestedType(); + var nt = resolveTemporariesRecurse(old_nt, repo, docs, replacements); + if (nt != old_nt) { + boolean c = t.createIfNonExistent(); + boolean r = t.removeIfZero(); + type = new WeightedSetDataType(nt, c, r, t.getId()); + } + } + else if (type instanceof NewDocumentReferenceDataType) { + var t = (NewDocumentReferenceDataType) type; + var doc = getDocumentType(docs, t.getTargetTypeName()); + type = doc.getReferenceDataType(); + } + if (type != original) { + replacements.put(original, type); + } + return type; + } + + private static NewDocumentType getDocumentType(Collection<NewDocumentType> docs, String name) { + for (NewDocumentType doc : docs) { + if (doc.getName().equals(name)) { + return doc; + } + } + return null; + } + + private static boolean anyParentsHavePayLoad(SDAnnotationType sa, SDDocumentType sdoc) { + if (sa.getInherits() != null) { + AnnotationType tmp = sdoc.findAnnotation(sa.getInherits()); + SDAnnotationType inherited = (SDAnnotationType) tmp; + return ((inherited.getSdDocType() != null) || anyParentsHavePayLoad(inherited, sdoc)); + } + return false; + } + + private NewDocumentType convert(SDDocumentType sdoc) { + NewDocumentType dt = new NewDocumentType(new NewDocumentType.Name(sdoc.getName()), + sdoc.getDocumentType().contentStruct(), + sdoc.getFieldSets(), + convertDocumentReferencesToNames(sdoc.getDocumentReferences()), + convertTemporaryImportedFieldsToNames(sdoc.getTemporaryImportedFields())); + for (SDDocumentType n : sdoc.getInheritedTypes()) { + NewDocumentType.Name name = new NewDocumentType.Name(n.getName()); + NewDocumentType inherited = model.getDocumentManager().getDocumentType(name); + if (inherited != null) { + dt.inherit(inherited); + } + } + var extractor = new TypeExtractor(dt); + extractor.extract(sdoc); + return dt; + } + + static class TypeExtractor { + private final NewDocumentType targetDt; + Map<AnnotationType, String> annotationInheritance = new LinkedHashMap<>(); + Map<StructDataType, String> structInheritance = new LinkedHashMap<>(); + private final Map<Object, Object> inProgress = new IdentityHashMap<>(); + TypeExtractor(NewDocumentType target) { + this.targetDt = target; + } + + void extract(SDDocumentType sdoc) { + for (SDDocumentType type : sdoc.getTypes()) { + if (type.isStruct()) { + handleStruct(type); + } else { + throw new IllegalArgumentException("Data type '" + type.getName() + "' is not a struct => tostring='" + type.toString() + "'."); + } + } + for (SDDocumentType type : sdoc.getTypes()) { + for (SDDocumentType proxy : type.getInheritedTypes()) { + var inherited = (StructDataType) targetDt.getDataTypeRecursive(proxy.getName()); + var converted = (StructDataType) targetDt.getDataType(type.getName()); + assert(converted instanceof OwnedStructDataType); + assert(inherited instanceof OwnedStructDataType); + if (! converted.inherits(inherited)) { + converted.inherit(inherited); + } + } + } + for (AnnotationType annotation : sdoc.getAnnotations().values()) { + targetDt.add(annotation); + } + for (AnnotationType annotation : sdoc.getAnnotations().values()) { + SDAnnotationType sa = (SDAnnotationType) annotation; + if (annotation.getInheritedTypes().isEmpty() && (sa.getInherits() != null) ) { + annotationInheritance.put(annotation, sa.getInherits()); + } + if (annotation.getDataType() == null) { + if (sa.getSdDocType() != null) { + StructDataType s = handleStruct(sa.getSdDocType()); + annotation.setDataType(s); + if ((sa.getInherits() != null)) { + structInheritance.put(s, "annotation." + sa.getInherits()); + } + } else if (sa.getInherits() != null) { + StructDataType s = new OwnedStructDataType("annotation." + annotation.getName(), sdoc.getName()); + if (anyParentsHavePayLoad(sa, sdoc)) { + annotation.setDataType(s); + addType(s); + } + structInheritance.put(s, "annotation." + sa.getInherits()); + } + } else { + var dt = annotation.getDataType(); + if (dt instanceof StructDataType) { + handleStruct((StructDataType) dt); + } + } + } + for (Map.Entry<AnnotationType, String> e : annotationInheritance.entrySet()) { + e.getKey().inherit(targetDt.getAnnotationType(e.getValue())); + } + for (Map.Entry<StructDataType, String> e : structInheritance.entrySet()) { + StructDataType s = (StructDataType)targetDt.getDataType(e.getValue()); + if (s != null) { + e.getKey().inherit(s); + } + } + handleStruct(sdoc.getDocumentType().contentStruct()); + extractDataTypesFromFields(sdoc.fieldSet()); + } + + private void extractDataTypesFromFields(Collection<Field> fields) { + for (Field f : fields) { + DataType type = f.getDataType(); + if (testAddType(type)) { + extractNestedTypes(type); + addType(type); + } + } + } + + private void extractNestedTypes(DataType type) { + if (inProgress.containsKey(type)) { + return; + } + inProgress.put(type, this); + if (type instanceof StructDataType) { + StructDataType tmp = (StructDataType) type; + extractDataTypesFromFields(tmp.getFieldsThisTypeOnly()); + } else if (type instanceof CollectionDataType) { + CollectionDataType tmp = (CollectionDataType) type; + extractNestedTypes(tmp.getNestedType()); + addType(tmp.getNestedType()); + } else if (type instanceof MapDataType) { + MapDataType tmp = (MapDataType) type; + extractNestedTypes(tmp.getKeyType()); + extractNestedTypes(tmp.getValueType()); + addType(tmp.getKeyType()); + addType(tmp.getValueType()); + } else if (type instanceof TemporaryAnnotationReferenceDataType) { + throw new IllegalArgumentException(type.toString()); + } + } + + private boolean testAddType(DataType type) { return internalAddType(type, true); } + + private boolean addType(DataType type) { return internalAddType(type, false); } + + private boolean internalAddType(DataType type, boolean dryRun) { + DataType oldType = targetDt.getDataTypeRecursive(type.getId()); + if (oldType == null) { + if ( ! dryRun) { + targetDt.add(type); + } + return true; + } + if (oldType == type) { + return false; + } + if (targetDt.getDataType(type.getId()) == null) { + if ((oldType instanceof OwnedStructDataType) + && (type instanceof OwnedStructDataType)) + { + var oldOwned = (OwnedStructDataType) oldType; + var newOwned = (OwnedStructDataType) type; + if (newOwned.getOwnerName().equals(targetDt.getName()) && + ! oldOwned.getOwnerName().equals(targetDt.getName())) + { + if ( ! dryRun) { + targetDt.add(type); + } + return true; + } + } + } + if ((type instanceof StructDataType) && (oldType instanceof StructDataType)) { + StructDataType s = (StructDataType) type; + StructDataType os = (StructDataType) oldType; + if ((os.getFieldCount() == 0) && (s.getFieldCount() > os.getFieldCount())) { + if ( ! dryRun) { + targetDt.replace(type); + } + return true; + } + } + return false; + } + + + @SuppressWarnings("deprecation") + private void specialHandleAnnotationReference(Field field) { + DataType fieldType = specialHandleAnnotationReferenceRecurse(field.getName(), field.getDataType()); + if (fieldType == null) { + return; + } + field.setDataType(fieldType); // XXX deprecated + } + + private DataType specialHandleAnnotationReferenceRecurse(String fieldName, + DataType dataType) { + if (dataType instanceof TemporaryAnnotationReferenceDataType) { + TemporaryAnnotationReferenceDataType refType = (TemporaryAnnotationReferenceDataType)dataType; + if (refType.getId() != 0) { + return null; + } + AnnotationType target = targetDt.getAnnotationType(refType.getTarget()); + if (target == null) { + throw new RetryLaterException("Annotation '" + refType.getTarget() + "' in reference '" + fieldName + + "' does not exist."); + } + dataType = new AnnotationReferenceDataType(target); + addType(dataType); + return dataType; + } + else if (dataType instanceof MapDataType) { + MapDataType t = (MapDataType)dataType; + DataType valueType = specialHandleAnnotationReferenceRecurse(fieldName, t.getValueType()); + if (valueType == null) { + return null; + } + var mapType = new MapDataType(t.getKeyType(), valueType, t.getId()); + addType(mapType); + return mapType; + } + else if (dataType instanceof ArrayDataType) { + ArrayDataType t = (ArrayDataType) dataType; + DataType nestedType = specialHandleAnnotationReferenceRecurse(fieldName, t.getNestedType()); + if (nestedType == null) { + return null; + } + var lstType = new ArrayDataType(nestedType, t.getId()); + addType(lstType); + return lstType; + } + else if (dataType instanceof WeightedSetDataType) { + WeightedSetDataType t = (WeightedSetDataType) dataType; + DataType nestedType = specialHandleAnnotationReferenceRecurse(fieldName, t.getNestedType()); + if (nestedType == null) { + return null; + } + boolean c = t.createIfNonExistent(); + boolean r = t.removeIfZero(); + var lstType = new WeightedSetDataType(nestedType, c, r, t.getId()); + addType(lstType); + return lstType; + } + return null; + } + + @SuppressWarnings("deprecation") + private StructDataType handleStruct(SDDocumentType type) { + if (type.isStruct()) { + var st = type.getStruct(); + if (st.getName().equals(type.getName()) && + (st instanceof StructDataType) && + (! (st instanceof TemporaryUnknownType)) && + (! (st instanceof OwnedTemporaryType))) + { + return handleStruct((StructDataType) st); + } + } + StructDataType s = new OwnedStructDataType(type.getName(), targetDt.getName()); + for (Field f : type.getDocumentType().contentStruct().getFieldsThisTypeOnly()) { + specialHandleAnnotationReference(f); + s.addField(f); + } + for (StructDataType inherited : type.getDocumentType().contentStruct().getInheritedTypes()) { + s.inherit(inherited); + } + extractNestedTypes(s); + addType(s); + return s; + } + + private StructDataType handleStruct(StructDataType s) { + for (Field f : s.getFieldsThisTypeOnly()) { + specialHandleAnnotationReference(f); + } + extractNestedTypes(s); + addType(s); + return s; + } + + } + + private static Set<NewDocumentType.Name> convertDocumentReferencesToNames(Optional<DocumentReferences> documentReferences) { + if (!documentReferences.isPresent()) { + return Set.of(); + } + return documentReferences.get().referenceMap().values().stream() + .map(documentReference -> documentReference.targetSearch().getDocument()) + .map(documentType -> new NewDocumentType.Name(documentType.getName())) + .collect(Collectors.toCollection(() -> new LinkedHashSet<>())); + } + + private static Set<String> convertTemporaryImportedFieldsToNames(TemporaryImportedFields importedFields) { + if (importedFields == null) { + return Set.of(); + } + return Collections.unmodifiableSet(importedFields.fields().keySet()); + } + + public static class RetryLaterException extends IllegalArgumentException { + public RetryLaterException(String message) { + super(message); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/DocumentOnlySchema.java b/config-model/src/main/java/com/yahoo/schema/DocumentOnlySchema.java new file mode 100644 index 00000000000..f9005c7b775 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/DocumentOnlySchema.java @@ -0,0 +1,33 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.schema.document.SDDocumentType; + +/** + * A search that was derived from an sd file containing no search element(s), only + * document specifications, so the name of this is decided by parsing and adding the document instance. + * + * @author vegardh + */ +public class DocumentOnlySchema extends Schema { + + public DocumentOnlySchema(ApplicationPackage applicationPackage, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties) { + super(applicationPackage, fileRegistry, deployLogger, properties); + } + + @Override + public void addDocument(SDDocumentType docType) { + if (getName() == null) { + setName(docType.getName()); + } + super.addDocument(docType); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/DocumentReference.java b/config-model/src/main/java/com/yahoo/schema/DocumentReference.java new file mode 100644 index 00000000000..048035ffef8 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/DocumentReference.java @@ -0,0 +1,28 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.document.Field; + +/** + * Represents a document reference. Contains the document reference field and the search instance of the referred document. + * + * @author bjorncs + */ +public class DocumentReference { + + private final Field referenceField; + private final Schema targetSchema; + + public DocumentReference(Field referenceField, Schema targetSchema) { + this.referenceField = referenceField; + this.targetSchema = targetSchema; + } + + public Field referenceField() { + return referenceField; + } + + public Schema targetSearch() { + return targetSchema; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/DocumentReferenceResolver.java b/config-model/src/main/java/com/yahoo/schema/DocumentReferenceResolver.java new file mode 100644 index 00000000000..b3e06fd5e02 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/DocumentReferenceResolver.java @@ -0,0 +1,94 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.document.Field; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; + +import java.util.Collection; +import java.util.Map; +import java.util.stream.Stream; + +import static java.util.function.Function.identity; +import static java.util.stream.Collectors.toMap; + +/** + * Resolves all document references in the search definitions + * + * Iterates through all document fields having a {@link NewDocumentReferenceDataType} and uses {@link NewDocumentReferenceDataType#getTargetTypeName()} + * to determine the referenced document. This information is aggregated into a {@link DocumentReferences} object. + * + * @author bjorncs + */ +public class DocumentReferenceResolver { + + private final Map<String, Schema> schemaMapping; + + public DocumentReferenceResolver(Collection<Schema> schemas) { + this.schemaMapping = createDocumentNameToSearchMapping(schemas); + } + + public void resolveReferences(SDDocumentType documentType) { + var references = new DocumentReferences(createFieldToDocumentReferenceMapping(documentType)); + documentType.setDocumentReferences(references); + } + + public void resolveInheritedReferences(SDDocumentType documentType) { + resolveInheritedReferencesRecursive(documentType, documentType.getInheritedTypes()); + } + + private void resolveInheritedReferencesRecursive(SDDocumentType documentType, + Collection<SDDocumentType> inheritedTypes) { + for (var inheritedType : inheritedTypes) { + documentType.getDocumentReferences().get().mergeFrom(inheritedType.getDocumentReferences().get()); + } + for (var inheritedType : inheritedTypes) { + resolveInheritedReferencesRecursive(documentType, inheritedType.getInheritedTypes()); + } + } + + private Map<String, DocumentReference> createFieldToDocumentReferenceMapping(SDDocumentType documentType) { + return fieldStream(documentType) + .filter(field -> field.getDataType() instanceof NewDocumentReferenceDataType) + .collect(toMap(Field::getName, this::createDocumentReference)); + } + + private DocumentReference createDocumentReference(Field field) { + if (!isAttribute(field)) { + throw new IllegalArgumentException( + String.format( + "The field '%s' is an invalid document reference. The field must be an attribute.", + field.getName())); + } + NewDocumentReferenceDataType reference = (NewDocumentReferenceDataType) field.getDataType(); + String targetDocumentName = getTargetDocumentName(reference); + Schema schema = schemaMapping.get(targetDocumentName); + if (schema == null) { + throw new IllegalArgumentException( + String.format("Invalid document reference '%s': " + + "Could not find document type '%s'", field.getName(), targetDocumentName)); + } + return new DocumentReference(field, schema); + } + + private static boolean isAttribute(Field field) { + SDField sdField = (SDField) field; // Ugly, but SDDocumentType only expose the fields as the super class Field + return sdField.doesAttributing(); + } + + private static Map<String, Schema> createDocumentNameToSearchMapping(Collection<Schema> schemaDefintions) { + return schemaDefintions.stream() + .filter(search -> search.getDocument() != null) + .collect(toMap(search -> search.getDocument().getName(), identity())); + } + + private static Stream<Field> fieldStream(SDDocumentType documentType) { + return documentType.getDocumentType().getFields().stream(); + } + + private static String getTargetDocumentName(NewDocumentReferenceDataType reference) { + return reference.getTargetTypeName(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/DocumentReferences.java b/config-model/src/main/java/com/yahoo/schema/DocumentReferences.java new file mode 100644 index 00000000000..3583a5134e0 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/DocumentReferences.java @@ -0,0 +1,37 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.stream.Stream; + +/** + * Contains all document references for a document mapped by field name + * + * @author bjorncs + */ +public class DocumentReferences implements Iterable<Map.Entry<String, DocumentReference>> { + private final Map<String, DocumentReference> references; + + public DocumentReferences(Map<String, DocumentReference> references) { + this.references = references; + } + + public void mergeFrom(DocumentReferences other) { + references.putAll(other.references); + } + + @Override + public Iterator<Map.Entry<String, DocumentReference>> iterator() { + return Collections.unmodifiableSet(references.entrySet()).iterator(); + } + + public Map<String, DocumentReference> referenceMap() { + return Collections.unmodifiableMap(references); + } + + public Stream<Map.Entry<String, DocumentReference>> stream() { + return references.entrySet().stream(); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/DocumentsOnlyRankProfile.java b/config-model/src/main/java/com/yahoo/schema/DocumentsOnlyRankProfile.java new file mode 100644 index 00000000000..ffd517cf241 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/DocumentsOnlyRankProfile.java @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import java.util.List; + +/** + * A rank profile which ignores all calls made to it which may fail in a document only setting. + * This is used by the search definition parser when it is requested to parse documents only, + * to avoid having to check for this in every method which adds to the rank profile. + * (And why do we ever want to parse documents only? Because it is used when generating Java classes + * from documents, where the full application package may not be available.) + * + * @author bratseth + */ +public class DocumentsOnlyRankProfile extends RankProfile { + + public DocumentsOnlyRankProfile(String name, Schema schema, RankProfileRegistry rankProfileRegistry) { + super(name, schema, rankProfileRegistry); + } + + @Override + public void setFirstPhaseRanking(String expression) { + // Ignore + } + + @Override + public void setSecondPhaseRanking(String expression) { + // Ignore + } + + @Override + public void addFunction(String name, List<String> arguments, String expression, boolean inline) { + // Ignore + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/FeatureNames.java b/config-model/src/main/java/com/yahoo/schema/FeatureNames.java new file mode 100644 index 00000000000..0671903194f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/FeatureNames.java @@ -0,0 +1,92 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.searchlib.rankingexpression.Reference; + +import java.util.Optional; + +/** + * Utility methods for query, document and constant rank feature names + * + * @author bratseth + */ +public class FeatureNames { + + public static Reference asConstantFeature(String constantName) { + return Reference.simple("constant", quoteIfNecessary(constantName)); + } + + public static Reference asAttributeFeature(String attributeName) { + return Reference.simple("attribute", attributeName); + } + + public static Reference asQueryFeature(String propertyName) { + return Reference.simple("query", quoteIfNecessary(propertyName)); + } + + /** Returns true if the given reference is an attribute, constant or query feature */ + public static boolean isSimpleFeature(Reference reference) { + if ( ! reference.isSimple()) return false; + String name = reference.name(); + return name.equals("attribute") || name.equals("constant") || name.equals("query"); + } + + /** Returns true if this is a constant */ + public static boolean isConstantFeature(Reference reference) { + if ( ! isSimpleFeature(reference)) return false; + return reference.name().equals("constant"); + } + + /** Returns true if this is a query feature */ + public static boolean isQueryFeature(Reference reference) { + if ( ! isSimpleFeature(reference)) return false; + return reference.name().equals("query"); + } + + /** Returns true if this is an attribute feature */ + public static boolean isAttributeFeature(Reference reference) { + if ( ! isSimpleFeature(reference)) return false; + return reference.name().equals("attribute"); + } + + /** + * Returns the single argument of the given feature name, without any quotes, + * or empty if it is not a valid query, attribute or constant feature name + */ + public static Optional<String> argumentOf(String feature) { + Optional<Reference> reference = Reference.simple(feature); + if ( reference.isEmpty()) return Optional.empty(); + if ( ! ( reference.get().name().equals("attribute") || + reference.get().name().equals("constant") || + reference.get().name().equals("query"))) + return Optional.empty(); + + return Optional.of(reference.get().arguments().expressions().get(0).toString()); + } + + private static String quoteIfNecessary(String s) { + if (notNeedQuotes(s)) + return s; + else + return "\"" + s + "\""; + } + + static boolean notNeedQuotes(String s) { + // Faster version of the regexp [A-Za-z0-9_][A-Za-z0-9_-]* + if (s.isEmpty()) return false; + if ( ! isValidFirst(s.charAt(0))) return false; + for (int i = 1; i < s.length(); i++) { + if (!isValidAny(s.charAt(i))) return false; + } + return true; + } + private static boolean isValidFirst(char c) { + // [A-Za-z0-9_] + return (c == '_') || ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || ((c >= '0') && (c <= '9')); + } + private static boolean isValidAny(char c) { + // [A-Za-z0-9_-]* + return c == '-' || isValidFirst(c); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/FieldOperationApplier.java b/config-model/src/main/java/com/yahoo/schema/FieldOperationApplier.java new file mode 100644 index 00000000000..b2f40b045ea --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/FieldOperationApplier.java @@ -0,0 +1,32 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.document.Field; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; + +/** + * @author Einar M R Rosenvinge + */ +public class FieldOperationApplier { + + public void process(SDDocumentType sdoc) { + if (!sdoc.isStruct()) { + apply(sdoc); + } + } + + protected void apply(SDDocumentType type) { + for (Field field : type.fieldSet()) { + apply(field); + } + } + + protected void apply(Field field) { + if (field instanceof SDField) { + SDField sdField = (SDField) field; + sdField.applyOperations(); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/FieldOperationApplierForSearch.java b/config-model/src/main/java/com/yahoo/schema/FieldOperationApplierForSearch.java new file mode 100644 index 00000000000..b107dbaea59 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/FieldOperationApplierForSearch.java @@ -0,0 +1,23 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.document.Field; +import com.yahoo.schema.document.SDDocumentType; + +/** + * @author Einar M R Rosenvinge + */ +public class FieldOperationApplierForSearch extends FieldOperationApplier { + + @Override + public void process(SDDocumentType sdoc) { + //Do nothing + } + + public void process(Schema schema) { + for (Field field : schema.extraFieldList()) { + apply(field); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/FieldOperationApplierForStructs.java b/config-model/src/main/java/com/yahoo/schema/FieldOperationApplierForStructs.java new file mode 100644 index 00000000000..1ec1de6a9c6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/FieldOperationApplierForStructs.java @@ -0,0 +1,20 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.schema.document.SDDocumentType; + +/** + * @author Einar M R Rosenvinge + */ +public class FieldOperationApplierForStructs extends FieldOperationApplier { + + @Override + public void process(SDDocumentType sdoc) { + for (SDDocumentType type : sdoc.getAllTypes()) { + if (type.isStruct()) { + apply(type); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/FieldSets.java b/config-model/src/main/java/com/yahoo/schema/FieldSets.java new file mode 100644 index 00000000000..0594056150c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/FieldSets.java @@ -0,0 +1,77 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; + +import com.yahoo.schema.document.FieldSet; + +/** + * The field sets owned by a {@link Schema} + * Both built in and user defined. + * + * @author vegardh + */ +public class FieldSets { + + private final Optional<Schema> owner; + private final Map<String, FieldSet> userFieldSets; + private final Map<String, FieldSet> builtInFieldSets; + + public FieldSets(Optional<Schema> owner) { + this.owner = owner; + userFieldSets = new LinkedHashMap<>(); + builtInFieldSets = new LinkedHashMap<>(); + } + + /** + * Adds an entry to user field sets, creating entries as needed + * + * @param setName name of a field set + * @param field field to add to field set + */ + public void addUserFieldSetItem(String setName, String field) { + if (userFieldSets.get(setName) == null) { + // First entry in this set + userFieldSets.put(setName, new FieldSet(setName)); + } + userFieldSets.get(setName).addFieldName(field); + } + + /** + * Adds an entry to built in field sets, creating entries as needed + * + * @param setName name of a field set + * @param field field to add to field set + */ + public void addBuiltInFieldSetItem(String setName, String field) { + if (builtInFieldSets.get(setName) == null) { + // First entry in this set + builtInFieldSets.put(setName, new FieldSet(setName)); + } + builtInFieldSets.get(setName).addFieldName(field); + } + + /** Returns the built in field sets, unmodifiable */ + public Map<String, FieldSet> builtInFieldSets() { + if (owner.isEmpty() || owner.get().inherited().isEmpty()) return Collections.unmodifiableMap(builtInFieldSets); + if (builtInFieldSets.isEmpty()) return owner.get().inherited().get().fieldSets().builtInFieldSets(); + + var fieldSets = new LinkedHashMap<>(owner.get().inherited().get().fieldSets().builtInFieldSets()); + fieldSets.putAll(builtInFieldSets); + return Collections.unmodifiableMap(fieldSets); + } + + /** Returns the user defined field sets, unmodifiable */ + public Map<String, FieldSet> userFieldSets() { + if (owner.isEmpty() || owner.get().inherited().isEmpty()) return Collections.unmodifiableMap(userFieldSets); + if (userFieldSets.isEmpty()) return owner.get().inherited().get().fieldSets().userFieldSets(); + + var fieldSets = new LinkedHashMap<>(owner.get().inherited().get().fieldSets().userFieldSets()); + fieldSets.putAll(userFieldSets); + return Collections.unmodifiableMap(fieldSets); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/ImmutableSchema.java b/config-model/src/main/java/com/yahoo/schema/ImmutableSchema.java new file mode 100644 index 00000000000..c352d9c417f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/ImmutableSchema.java @@ -0,0 +1,52 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.vespa.documentmodel.SummaryField; + +import java.io.Reader; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Stream; + +/** + * An interface containing the non-mutating methods of {@link Schema}. + * For description of the methods see {@link Schema}. + * + * @author bjorncs + */ +public interface ImmutableSchema { + + String getName(); + Optional<? extends ImmutableSchema> inherited(); + Index getIndex(String name); + ImmutableSDField getConcreteField(String name); + //TODO split in mutating/immutable by returning List<ImmutableSDField> + List<SDField> allConcreteFields(); + List<Index> getExplicitIndices(); + Reader getRankingExpression(String fileName); + ApplicationPackage applicationPackage(); + DeployLogger getDeployLogger(); + ModelContext.Properties getDeployProperties(); + Map<Reference, RankProfile.Constant> constants(); + LargeRankExpressions rankExpressionFiles(); + Map<String, OnnxModel> onnxModels(); + Stream<ImmutableSDField> allImportedFields(); + SDDocumentType getDocument(); + ImmutableSDField getField(String name); + + default Stream<ImmutableSDField> allFields() { + return allFieldsList().stream(); + } + List<ImmutableSDField> allFieldsList(); + + List<SummaryField> getSummaryFields(ImmutableSDField field); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/ImportedFieldsEnumerator.java b/config-model/src/main/java/com/yahoo/schema/ImportedFieldsEnumerator.java new file mode 100644 index 00000000000..0df79b30298 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/ImportedFieldsEnumerator.java @@ -0,0 +1,31 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.schema.document.SDDocumentType; + +import java.util.Collection; + +/** + * Enumerates and emplaces a set of all imported fields into a SDDocumentType from + * its corresponding Search instance. + */ +public class ImportedFieldsEnumerator { + + private final Collection<Schema> schemas; + + public ImportedFieldsEnumerator(Collection<Schema> schemas) { + this.schemas = schemas; + } + + public void enumerateImportedFields(SDDocumentType documentType) { + var search = this.schemas.stream() + .filter(s -> s.getDocument() != null) + .filter(s -> s.getDocument().getName().equals(documentType.getName())) + .findFirst(); + if (search.isEmpty()) { + return; // No imported fields present. + } + search.get().temporaryImportedFields().ifPresent(documentType::setTemporaryImportedFields); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/Index.java b/config-model/src/main/java/com/yahoo/schema/Index.java new file mode 100644 index 00000000000..190081cf80f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/Index.java @@ -0,0 +1,200 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.HnswIndexParams; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.Stemming; + +import java.io.Serializable; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; + +/** + * An index definition in a search definition. + * Two indices are equal if they have the same name and the same settings, except + * alias settings (which are excluded). + * + * @author bratseth + */ +public class Index implements Cloneable, Serializable { + + public enum Type { + + VESPA("vespa"); + private final String name; + Type(String name) { this.name = name; } + public String getName() { return name; } + + } + + // Please see hashCode, equals and copy when adding attributes to this + + /** The search definition-unique name of this index */ + private String name; + + /** The rank type of this index */ + private RankType rankType = null; + + /** Whether this index supports prefix search */ + private boolean prefix; + + /** The list of aliases (Strings) to this index name */ + private Set<String> aliases = new java.util.LinkedHashSet<>(1); + + /** + * The stemming setting of this field, or null to use the default. + * Default is determined by the owning search definition. + */ + private Stemming stemming = null; + + private Type type = Type.VESPA; + + /** The boolean index definition, if set */ + private BooleanIndexDefinition boolIndex; + + private Optional<HnswIndexParams> hnswIndexParams = Optional.empty(); + + /** Whether the posting lists of this index field should have interleaved features (num occs, field length) in document id stream. */ + private boolean interleavedFeatures = false; + + public Index(String name) { + this(name, false); + } + + public Index(String name, boolean prefix) { + this.name = name; + this.prefix = prefix; + } + + public void setName(String name) { this.name = name; } + + public String getName() { return name; } + + /** Sets the rank type of this field */ + public void setRankType(RankType rankType) { this.rankType = rankType; } + + /** Returns the rank type of this field, or null if nothing is set */ + public RankType getRankType() { return rankType; } + + /** Return the stemming setting of this index, may be null */ + public Stemming getStemming() { return stemming; } + + /** + * Whether this field should be stemmed in this search definition, + * this is never null + */ + public Stemming getStemming(Schema schema) { + if (stemming != null) + return stemming; + else + return schema.getStemming(); + } + + /** + * Sets how this field should be stemmed, or set to null to use the default. + */ + public void setStemming(Stemming stemming) { this.stemming = stemming; } + + /** Returns whether this index supports prefix search, default is false */ + public boolean isPrefix() { return prefix; } + + /** Sets whether this index supports prefix search */ + public void setPrefix(boolean prefix) { this.prefix=prefix; } + + /** Adds an alias to this index name */ + public void addAlias(String alias) { + aliases.add(alias); + } + + /** Returns a read-only iterator of the aliases (Strings) to this index name */ + public Iterator<String> aliasIterator() { + return Collections.unmodifiableSet(aliases).iterator(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Index index = (Index) o; + return prefix == index.prefix && + interleavedFeatures == index.interleavedFeatures && + Objects.equals(name, index.name) && + rankType == index.rankType && + Objects.equals(aliases, index.aliases) && + stemming == index.stemming && + type == index.type && + Objects.equals(boolIndex, index.boolIndex) && + Objects.equals(hnswIndexParams, index.hnswIndexParams); + } + + @Override + public int hashCode() { + return Objects.hash(name, rankType, prefix, aliases, stemming, type, boolIndex, hnswIndexParams, interleavedFeatures); + } + + public String toString() { + String rankTypeName = rankType == null ? "(none)" : rankType.name(); + return "index '" + name + + "' [ranktype: " + rankTypeName + + ", prefix: " + prefix + "]"; + } + + /** Makes a deep copy of this index */ + @Override + public Object clone() { + try { + Index copy = (Index)super.clone(); + copy.aliases = new LinkedHashSet<>(this.aliases); + return copy; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Programming error",e); + } + } + + public Index copy() { + return (Index)clone(); + } + + /** Returns the index engine type */ + public Type getType() { + return type; + } + + /** Sets the index engine type */ + public void setType(Type type) { + this.type = type; + } + + /** Returns the boolean index definition */ + public BooleanIndexDefinition getBooleanIndexDefiniton() { + return boolIndex; + } + + /** Sets the boolean index definition */ + public void setBooleanIndexDefiniton(BooleanIndexDefinition def) { + boolIndex = def; + } + + public Optional<HnswIndexParams> getHnswIndexParams() { + return hnswIndexParams; + } + + public void setHnswIndexParams(HnswIndexParams params) { + hnswIndexParams = Optional.of(params); + } + + public void setInterleavedFeatures(boolean value) { + interleavedFeatures = value; + } + + public boolean useInterleavedFeatures() { + return interleavedFeatures; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/LargeRankExpressions.java b/config-model/src/main/java/com/yahoo/schema/LargeRankExpressions.java new file mode 100644 index 00000000000..cfdd4729b97 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/LargeRankExpressions.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.application.api.FileRegistry; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +public class LargeRankExpressions { + private final Map<String, RankExpressionBody> expressions = new ConcurrentHashMap<>(); + private final FileRegistry fileRegistry; + private final int limit; + + public LargeRankExpressions(FileRegistry fileRegistry) { + this(fileRegistry, 8192); + } + public LargeRankExpressions(FileRegistry fileRegistry, int limit) { + this.fileRegistry = fileRegistry; + this.limit = limit; + } + + public void add(RankExpressionBody expression) { + String name = expression.getName(); + RankExpressionBody prev = expressions.putIfAbsent(name, expression); + if (prev == null) { + expression.validate(); + expression.register(fileRegistry); + } else { + if ( ! prev.getBlob().equals(expression.getBlob())) { + throw new IllegalArgumentException("Rank expression '" + name + + "' defined twice. Previous blob with " + prev.getBlob().remaining() + + " bytes, while current has " + expression.getBlob().remaining() + " bytes"); + } + } + } + public int limit() { return limit; } + + /** Returns a read-only list of ranking constants ordered by name */ + public Collection<RankExpressionBody> expressions() { + return expressions.values().stream().sorted().collect(Collectors.toUnmodifiableList()); + } + + // Note: Use by integration tests in internal repo + /** Returns a read-only map of the ranking constants in this indexed by name */ + public Map<String, RankExpressionBody> asMap() { + return Collections.unmodifiableMap(expressions); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/MapEvaluationTypeContext.java b/config-model/src/main/java/com/yahoo/schema/MapEvaluationTypeContext.java new file mode 100644 index 00000000000..c6c807f2dbb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/MapEvaluationTypeContext.java @@ -0,0 +1,361 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.google.common.collect.ImmutableMap; +import com.yahoo.schema.expressiontransforms.OnnxModelTransformer; +import com.yahoo.schema.expressiontransforms.TokenTransformer; +import com.yahoo.searchlib.rankingexpression.ExpressionFunction; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.rule.Arguments; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.FunctionReferenceContext; +import com.yahoo.searchlib.rankingexpression.rule.NameNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.evaluation.TypeContext; + +import java.util.ArrayDeque; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.stream.Collectors; + +/** + * A context which only contains type information. + * This returns empty tensor types (double) for unknown features which are not + * query, attribute or constant features, as we do not have information about which such + * features exist (but we know those that exist are doubles). + * + * This is not multithread safe. + * + * @author bratseth + */ +public class MapEvaluationTypeContext extends FunctionReferenceContext implements TypeContext<Reference> { + + private final Optional<MapEvaluationTypeContext> parent; + + private final Map<Reference, TensorType> featureTypes = new HashMap<>(); + + private final Map<Reference, TensorType> resolvedTypes = new HashMap<>(); + + /** To avoid re-resolving diamond-shaped dependencies */ + private final Map<Reference, TensorType> globallyResolvedTypes; + + /** For invocation loop detection */ + private final Deque<Reference> currentResolutionCallStack; + + private final SortedSet<Reference> queryFeaturesNotDeclared; + private boolean tensorsAreUsed; + + MapEvaluationTypeContext(ImmutableMap<String, ExpressionFunction> functions, Map<Reference, TensorType> featureTypes) { + super(functions); + this.parent = Optional.empty(); + this.featureTypes.putAll(featureTypes); + this.currentResolutionCallStack = new ArrayDeque<>(); + this.queryFeaturesNotDeclared = new TreeSet<>(); + tensorsAreUsed = false; + globallyResolvedTypes = new HashMap<>(); + } + + private MapEvaluationTypeContext(Map<String, ExpressionFunction> functions, + Map<String, String> bindings, + Optional<MapEvaluationTypeContext> parent, + Map<Reference, TensorType> featureTypes, + Deque<Reference> currentResolutionCallStack, + SortedSet<Reference> queryFeaturesNotDeclared, + boolean tensorsAreUsed, + Map<Reference, TensorType> globallyResolvedTypes) { + super(functions, bindings); + this.parent = parent; + this.featureTypes.putAll(featureTypes); + this.currentResolutionCallStack = currentResolutionCallStack; + this.queryFeaturesNotDeclared = queryFeaturesNotDeclared; + this.tensorsAreUsed = tensorsAreUsed; + this.globallyResolvedTypes = globallyResolvedTypes; + } + + public void setType(Reference reference, TensorType type) { + featureTypes.put(reference, type); + queryFeaturesNotDeclared.remove(reference); + } + + public Map<Reference, TensorType> featureTypes() { return Collections.unmodifiableMap(featureTypes); } + + @Override + public TensorType getType(String reference) { + throw new UnsupportedOperationException("Not able to parse general references from string form"); + } + + public void forgetResolvedTypes() { + resolvedTypes.clear(); + } + + private boolean referenceCanBeResolvedGlobally(Reference reference) { + Optional<ExpressionFunction> function = functionInvocation(reference); + return function.isPresent() && function.get().arguments().size() == 0; + // are there other cases we would like to resolve globally? + } + + @Override + public TensorType getType(Reference reference) { + // computeIfAbsent without concurrent modification due to resolve adding more resolved entries: + boolean canBeResolvedGlobally = referenceCanBeResolvedGlobally(reference); + + TensorType resolvedType = resolvedTypes.get(reference); + if (resolvedType == null && canBeResolvedGlobally) { + resolvedType = globallyResolvedTypes.get(reference); + } + if (resolvedType != null) { + return resolvedType; + } + + resolvedType = resolveType(reference); + if (resolvedType == null) + return defaultTypeOf(reference); // Don't store fallback to default as we may know more later + resolvedTypes.put(reference, resolvedType); + if (resolvedType.rank() > 0) + tensorsAreUsed = true; + + if (canBeResolvedGlobally) { + globallyResolvedTypes.put(reference, resolvedType); + } + + return resolvedType; + } + + MapEvaluationTypeContext getParent(String forArgument, String boundTo) { + return parent.orElseThrow( + () -> new IllegalArgumentException("argument "+forArgument+" is bound to "+boundTo+" but there is no parent context")); + } + + String resolveBinding(String argument) { + String bound = getBinding(argument); + if (bound == null) { + return argument; + } + return getParent(argument, bound).resolveBinding(bound); + } + + private TensorType resolveType(Reference reference) { + if (currentResolutionCallStack.contains(reference)) + throw new IllegalArgumentException("Invocation loop: " + + currentResolutionCallStack.stream().map(Reference::toString).collect(Collectors.joining(" -> ")) + + " -> " + reference); + + // Bound to a function argument? + Optional<String> binding = boundIdentifier(reference); + if (binding.isPresent()) { + try { + // This is not pretty, but changing to bind expressions rather + // than their string values requires deeper changes + var expr = new RankingExpression(binding.get()); + var type = expr.type(getParent(reference.name(), binding.get())); + return type; + } catch (ParseException e) { + throw new IllegalArgumentException(e); + } + } + + try { + currentResolutionCallStack.addLast(reference); + + // A reference to an attribute, query or constant feature? + if (FeatureNames.isSimpleFeature(reference)) { + // The argument may be a local identifier bound to the actual value + String argument = reference.simpleArgument().get(); + String argumentBinding = resolveBinding(argument); + reference = Reference.simple(reference.name(), argumentBinding); + return featureTypes.get(reference); + } + + // A reference to a function? + Optional<ExpressionFunction> function = functionInvocation(reference); + if (function.isPresent()) { + var body = function.get().getBody(); + var child = this.withBindings(bind(function.get().arguments(), reference.arguments())); + var type = body.type(child); + return type; + } + + // A reference to an ONNX model? + Optional<TensorType> onnxFeatureType = onnxFeatureType(reference); + if (onnxFeatureType.isPresent()) { + return onnxFeatureType.get(); + } + + // A reference to a feature for transformer token input? + Optional<TensorType> transformerTokensFeatureType = transformerTokensFeatureType(reference); + if (transformerTokensFeatureType.isPresent()) { + return transformerTokensFeatureType.get(); + } + + // A reference to a feature which returns a tensor? + Optional<TensorType> featureTensorType = tensorFeatureType(reference); + if (featureTensorType.isPresent()) { + return featureTensorType.get(); + } + + // A directly injected identifier? (Useful for stateless model evaluation) + if (reference.isIdentifier() && featureTypes.containsKey(reference)) { + return featureTypes.get(reference); + } + + // the name of a constant feature? + if (reference.isIdentifier()) { + Reference asConst = FeatureNames.asConstantFeature(reference.name()); + if (featureTypes.containsKey(asConst)) { + return featureTypes.get(asConst); + } + } + + // We do not know what this is - since we do not have complete knowledge about the match features + // in Java we must assume this is a match feature and return the double type - which is the type of + // all match features + return TensorType.empty; + } + finally { + currentResolutionCallStack.removeLast(); + } + } + + /** + * Returns the default type for this simple feature, or null if it does not have a default + */ + public TensorType defaultTypeOf(Reference reference) { + if ( ! FeatureNames.isSimpleFeature(reference)) + throw new IllegalArgumentException("This can only be called for simple references, not " + reference); + if (reference.name().equals("query")) { // we do not require all query features to be declared, only non-doubles + queryFeaturesNotDeclared.add(reference); + return TensorType.empty; + } + return null; + } + + /** + * Returns the binding if this reference is a simple identifier which is bound in this context. + * Returns empty otherwise. + */ + private Optional<String> boundIdentifier(Reference reference) { + if ( ! reference.arguments().isEmpty()) return Optional.empty(); + if ( reference.output() != null) return Optional.empty(); + return Optional.ofNullable(getBinding(reference.name())); + } + + private Optional<ExpressionFunction> functionInvocation(Reference reference) { + if (reference.output() != null) return Optional.empty(); + ExpressionFunction function = getFunctions().get(reference.name()); + if (function == null) return Optional.empty(); + if (function.arguments().size() != reference.arguments().size()) return Optional.empty(); + return Optional.of(function); + } + + private Optional<TensorType> onnxFeatureType(Reference reference) { + if ( ! reference.name().equals("onnxModel") && ! reference.name().equals("onnx")) + return Optional.empty(); + + if ( ! featureTypes.containsKey(reference)) { + String configOrFileName = reference.arguments().expressions().get(0).toString(); + + // Look up standardized format as added in RankProfile + String modelConfigName = OnnxModelTransformer.getModelConfigName(reference); + String modelOutput = OnnxModelTransformer.getModelOutput(reference, null); + + reference = new Reference("onnxModel", new Arguments(new ReferenceNode(modelConfigName)), modelOutput); + if ( ! featureTypes.containsKey(reference)) { + throw new IllegalArgumentException("Missing onnx-model config for '" + configOrFileName + "'"); + } + } + + return Optional.of(featureTypes.get(reference)); + } + + private Optional<TensorType> transformerTokensFeatureType(Reference reference) { + if ( ! reference.name().equals("tokenTypeIds") && + ! reference.name().equals("tokenInputIds") && + ! reference.name().equals("tokenAttentionMask")) + return Optional.empty(); + + if ( ! (reference.arguments().size() > 1)) + throw new IllegalArgumentException(reference.name() + " must have at least 2 arguments"); + + ExpressionNode size = reference.arguments().expressions().get(0); + return Optional.of(TokenTransformer.createTensorType(reference.name(), size)); + } + + /** + * There are two features which returns the (non-empty) tensor type: tensorFromLabels and tensorFromWeightedSet. + * This returns the type of those features if this is a reference to either of them, or empty otherwise. + */ + private Optional<TensorType> tensorFeatureType(Reference reference) { + if ( ! reference.name().equals("tensorFromLabels") && ! reference.name().equals("tensorFromWeightedSet")) + return Optional.empty(); + + if (reference.arguments().size() != 1 && reference.arguments().size() != 2) + throw new IllegalArgumentException(reference.name() + " must have one or two arguments"); + + ExpressionNode arg0 = reference.arguments().expressions().get(0); + if ( ! ( arg0 instanceof ReferenceNode) || ! FeatureNames.isSimpleFeature(((ReferenceNode)arg0).reference())) + throw new IllegalArgumentException("The first argument of " + reference.name() + + " must be a simple feature, not " + arg0); + + String dimension; + if (reference.arguments().size() > 1) { + ExpressionNode arg1 = reference.arguments().expressions().get(1); + if ( ( ! (arg1 instanceof ReferenceNode) || ! (((ReferenceNode)arg1).reference().isIdentifier())) + && + ( ! (arg1 instanceof NameNode))) + throw new IllegalArgumentException("The second argument of " + reference.name() + + " must be a dimension name, not " + arg1); + dimension = reference.arguments().expressions().get(1).toString(); + } + else { // default + dimension = ((ReferenceNode)arg0).reference().arguments().expressions().get(0).toString(); + } + + // TODO: Determine the type of the weighted set/vector and use that as value type + return Optional.of(new TensorType.Builder().mapped(dimension).build()); + } + + /** Binds the given list of formal arguments to their actual values */ + private Map<String, String> bind(List<String> formalArguments, + Arguments invocationArguments) { + Map<String, String> bindings = new HashMap<>(formalArguments.size()); + for (int i = 0; i < formalArguments.size(); i++) { + String identifier = invocationArguments.expressions().get(i).toString(); + bindings.put(formalArguments.get(i), identifier); + } + return bindings; + } + + /** + * Returns an unmodifiable view of the query features which was requested but for which we have no type info + * (such that they default to TensorType.empty), shared between all instances of this + * involved in resolving a particular rank profile. + */ + public SortedSet<Reference> queryFeaturesNotDeclared() { + return Collections.unmodifiableSortedSet(queryFeaturesNotDeclared); + } + + /** Returns true if any feature across all instances involved in resolving this rank profile resolves to a tensor */ + public boolean tensorsAreUsed() { return tensorsAreUsed; } + + @Override + public MapEvaluationTypeContext withBindings(Map<String, String> bindings) { + return new MapEvaluationTypeContext(getFunctions(), + bindings, + Optional.of(this), + featureTypes, + currentResolutionCallStack, + queryFeaturesNotDeclared, + tensorsAreUsed, + globallyResolvedTypes); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/OnnxModel.java b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java new file mode 100644 index 00000000000..26a0b3e595d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/OnnxModel.java @@ -0,0 +1,120 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.model.ml.OnnxModelInfo; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +/** + * A global ONNX model distributed using file distribution, similar to ranking constants. + * + * @author lesters + */ +public class OnnxModel extends DistributableResource { + + private OnnxModelInfo modelInfo = null; + private final Map<String, String> inputMap = new HashMap<>(); + private final Map<String, String> outputMap = new HashMap<>(); + + private String statelessExecutionMode = null; + private Integer statelessInterOpThreads = null; + private Integer statelessIntraOpThreads = null; + + public OnnxModel(String name) { + super(name); + } + + public OnnxModel(String name, String fileName) { + super(name, fileName); + validate(); + } + + @Override + public void setUri(String uri) { + throw new IllegalArgumentException("URI for ONNX models are not currently supported"); + } + + public void addInputNameMapping(String onnxName, String vespaName) { + addInputNameMapping(onnxName, vespaName, true); + } + + public void addInputNameMapping(String onnxName, String vespaName, boolean overwrite) { + Objects.requireNonNull(onnxName, "Onnx name cannot be null"); + Objects.requireNonNull(vespaName, "Vespa name cannot be null"); + if (overwrite || ! inputMap.containsKey(onnxName)) { + inputMap.put(onnxName, vespaName); + } + } + + public void addOutputNameMapping(String onnxName, String vespaName) { + addOutputNameMapping(onnxName, vespaName, true); + } + + public void addOutputNameMapping(String onnxName, String vespaName, boolean overwrite) { + Objects.requireNonNull(onnxName, "Onnx name cannot be null"); + Objects.requireNonNull(vespaName, "Vespa name cannot be null"); + if (overwrite || ! outputMap.containsKey(onnxName)) { + outputMap.put(onnxName, vespaName); + } + } + + public void setModelInfo(OnnxModelInfo modelInfo) { + Objects.requireNonNull(modelInfo, "Onnx model info cannot be null"); + for (String onnxName : modelInfo.getInputs()) { + addInputNameMapping(onnxName, OnnxModelInfo.asValidIdentifier(onnxName), false); + } + for (String onnxName : modelInfo.getOutputs()) { + addOutputNameMapping(onnxName, OnnxModelInfo.asValidIdentifier(onnxName), false); + } + this.modelInfo = modelInfo; + } + + public Map<String, String> getInputMap() { return Collections.unmodifiableMap(inputMap); } + public Map<String, String> getOutputMap() { return Collections.unmodifiableMap(outputMap); } + + public String getDefaultOutput() { + return modelInfo != null ? modelInfo.getDefaultOutput() : ""; + } + + TensorType getTensorType(String onnxName, Map<String, TensorType> inputTypes) { + return modelInfo != null ? modelInfo.getTensorType(onnxName, inputTypes) : TensorType.empty; + } + + public void setStatelessExecutionMode(String executionMode) { + if ("parallel".equalsIgnoreCase(executionMode)) { + this.statelessExecutionMode = "parallel"; + } else if ("sequential".equalsIgnoreCase(executionMode)) { + this.statelessExecutionMode = "sequential"; + } + } + + public Optional<String> getStatelessExecutionMode() { + return Optional.ofNullable(statelessExecutionMode); + } + + public void setStatelessInterOpThreads(int interOpThreads) { + if (interOpThreads >= 0) { + this.statelessInterOpThreads = interOpThreads; + } + } + + public Optional<Integer> getStatelessInterOpThreads() { + return Optional.ofNullable(statelessInterOpThreads); + } + + public void setStatelessIntraOpThreads(int intraOpThreads) { + if (intraOpThreads >= 0) { + this.statelessIntraOpThreads = intraOpThreads; + } + } + + public Optional<Integer> getStatelessIntraOpThreads() { + return Optional.ofNullable(statelessIntraOpThreads); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/RankExpressionBody.java b/config-model/src/main/java/com/yahoo/schema/RankExpressionBody.java new file mode 100644 index 00000000000..d383a25aecb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/RankExpressionBody.java @@ -0,0 +1,32 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.application.api.FileRegistry; + +import java.nio.ByteBuffer; + +import static java.util.Objects.requireNonNull; + +public class RankExpressionBody extends DistributableResource { + + private final ByteBuffer blob; + + public RankExpressionBody(String name, ByteBuffer blob) { + super(name, name + ".lz4", PathType.BLOB); + this.blob = requireNonNull(blob, "Blob cannot be null"); + } + + public ByteBuffer getBlob() { return blob; } + + public void validate() { + // Remove once pathType is final + if (getPathType() != PathType.BLOB) { + throw new IllegalArgumentException("PathType must be BLOB."); + } + } + + public void register(FileRegistry fileRegistry) { + register(fileRegistry, blob); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/RankProfile.java b/config-model/src/main/java/com/yahoo/schema/RankProfile.java new file mode 100644 index 00000000000..5479ecf323f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/RankProfile.java @@ -0,0 +1,1526 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import ai.vespa.rankingexpression.importer.configmodelview.ImportedMlModels; +import com.google.common.collect.ImmutableMap; +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.path.Path; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.ranking.Diversity; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.expressiontransforms.ExpressionTransforms; +import com.yahoo.schema.expressiontransforms.RankProfileTransformContext; +import com.yahoo.schema.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.ExpressionFunction; +import com.yahoo.searchlib.rankingexpression.FeatureList; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.rule.Arguments; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; + +import java.io.IOException; +import java.io.Reader; +import java.io.Serializable; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.OptionalDouble; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Predicate; +import java.util.function.Supplier; +import java.util.logging.Level; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Represents a rank profile - a named set of ranking settings + * + * @author bratseth + */ +public class RankProfile implements Cloneable { + + public final static String FIRST_PHASE = "firstphase"; + public final static String SECOND_PHASE = "secondphase"; + + /** The schema-unique name of this rank profile */ + private final String name; + + /** The schema owning this profile, or null if global (owned by a model) */ + private final ImmutableSchema schema; + + private final List<String> inheritedNames = new ArrayList<>(); + + /** The resolved inherited profiles, or null when not resolved. */ + private List<RankProfile> inherited; + + private MatchPhaseSettings matchPhaseSettings = null; + + protected Set<RankSetting> rankSettings = new java.util.LinkedHashSet<>(); + + /** The ranking expression to be used for first phase */ + private RankingExpressionFunction firstPhaseRanking = null; + + /** The ranking expression to be used for second phase */ + private RankingExpressionFunction secondPhaseRanking = null; + + /** Number of hits to be reranked in second phase, -1 means use default */ + private int rerankCount = -1; + + /** Mysterious attribute */ + private int keepRankCount = -1; + + private int numThreadsPerSearch = -1; + private int minHitsPerThread = -1; + private int numSearchPartitions = -1; + + private Double termwiseLimit = null; + private Double postFilterThreshold = null; + private Double approximateThreshold = null; + + /** The drop limit used to drop hits with rank score less than or equal to this value */ + private double rankScoreDropLimit = -Double.MAX_VALUE; + + private Set<ReferenceNode> summaryFeatures; + private String inheritedSummaryFeaturesProfileName; + + private Set<ReferenceNode> matchFeatures; + private String inheritedMatchFeaturesProfileName; + + private Set<ReferenceNode> rankFeatures; + + /** The properties of this - a multimap */ + private Map<String, List<RankProperty>> rankProperties = new LinkedHashMap<>(); + + private Boolean ignoreDefaultRankFeatures = null; + + private Map<String, RankingExpressionFunction> functions = new LinkedHashMap<>(); + // This cache must be invalidated every time modifications are done to 'functions'. + private CachedFunctions allFunctionsCached = null; + + private Map<Reference, Input> inputs = new LinkedHashMap<>(); + + private Map<Reference, Constant> constants = new LinkedHashMap<>(); + + private Map<String, OnnxModel> onnxModels = new LinkedHashMap<>(); + + private Set<String> filterFields = new HashSet<>(); + + private final RankProfileRegistry rankProfileRegistry; + + private final TypeSettings attributeTypes = new TypeSettings(); + + private List<ImmutableSDField> allFieldsList; + + private Boolean strict; + + private final ApplicationPackage applicationPackage; + private final DeployLogger deployLogger; + + /** + * Creates a new rank profile for a particular schema + * + * @param name the name of the new profile + * @param schema the schema owning this profile + * @param rankProfileRegistry the {@link com.yahoo.schema.RankProfileRegistry} to use for storing + * and looking up rank profiles. + */ + public RankProfile(String name, Schema schema, RankProfileRegistry rankProfileRegistry) { + this.name = Objects.requireNonNull(name, "name cannot be null"); + this.schema = Objects.requireNonNull(schema, "schema cannot be null"); + this.rankProfileRegistry = rankProfileRegistry; + this.applicationPackage = schema.applicationPackage(); + this.deployLogger = schema.getDeployLogger(); + } + + /** + * Creates a global rank profile + * + * @param name the name of the new profile + */ + public RankProfile(String name, ApplicationPackage applicationPackage, DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry) { + this.name = Objects.requireNonNull(name, "name cannot be null"); + this.schema = null; + this.rankProfileRegistry = rankProfileRegistry; + this.applicationPackage = applicationPackage; + this.deployLogger = deployLogger; + } + + public String name() { return name; } + + /** Returns the search definition owning this, or null if it is global */ + public ImmutableSchema schema() { return schema; } + + /** Returns the application this is part of */ + public ApplicationPackage applicationPackage() { + return applicationPackage; + } + + private Stream<ImmutableSDField> allFields() { + if (schema == null) return Stream.empty(); + if (allFieldsList == null) { + allFieldsList = schema.allFieldsList(); + } + return allFieldsList.stream(); + } + + private Stream<ImmutableSDField> allImportedFields() { + return schema != null ? schema.allImportedFields() : Stream.empty(); + } + + /** + * Returns whether type checking should fail if this profile accesses query features that are + * not defined in query profile types. + * + * Default is false. + */ + public boolean isStrict() { + Boolean declaredStrict = declaredStrict(); + if (declaredStrict != null) return declaredStrict; + return false; + } + + /** Returns the strict value declared in this or any parent profile. */ + public Boolean declaredStrict() { + if (strict != null) return strict; + return uniquelyInherited(p -> p.declaredStrict(), "strict").orElse(null); + } + + public void setStrict(Boolean strict) { + this.strict = strict; + } + + /** + * Adds a profile to those inherited by this. + * The profile must belong to this schema (directly or by inheritance). + */ + public void inherit(String inheritedName) { + inherited = null; + inheritedNames.add(inheritedName); + } + + /** Returns the names of the profiles this inherits, if any. */ + public List<String> inheritedNames() { return Collections.unmodifiableList(inheritedNames); } + + /** Returns the rank profiles inherited by this. */ + private List<RankProfile> inherited() { + if (inheritedNames.isEmpty()) return List.of(); + if (inherited != null) return inherited; + + inherited = resolveInheritedProfiles(schema); + List<String> children = new ArrayList<>(); + children.add(createFullyQualifiedName()); + inherited.forEach(profile -> verifyNoInheritanceCycle(children, profile)); + return inherited; + } + + private String createFullyQualifiedName() { + return (schema != null) + ? (schema.getName() + "." + name()) + : name(); + } + + private void verifyNoInheritanceCycle(List<String> children, RankProfile parent) { + children.add(parent.createFullyQualifiedName()); + String root = children.get(0); + if (root.equals(parent.createFullyQualifiedName())) + throw new IllegalArgumentException("There is a cycle in the inheritance for rank-profile '" + root + "' = " + children); + for (RankProfile parentInherited : parent.inherited()) + verifyNoInheritanceCycle(children, parentInherited); + } + + private List<RankProfile> resolveInheritedProfiles(ImmutableSchema schema) { + List<RankProfile> inherited = new ArrayList<>(); + for (String inheritedName : inheritedNames) { + RankProfile inheritedProfile = schema == null + ? rankProfileRegistry.getGlobal(inheritedName) + : resolveInheritedProfile(schema, inheritedName); + if (inheritedProfile == null) + throw new IllegalArgumentException("rank-profile '" + name() + "' inherits '" + inheritedName + + "', but this is not found in " + + ((schema() != null) ? schema() : " global rank profiles")); + inherited.add(inheritedProfile); + } + return inherited; + } + + private RankProfile resolveInheritedProfile(ImmutableSchema schema, String inheritedName) { + SDDocumentType documentType = schema.getDocument(); + if (documentType != null) { + if (name.equals(inheritedName)) { + // If you seemingly inherit yourself, you are actually referencing a rank-profile in one of your inherited schemas + for (SDDocumentType baseType : documentType.getInheritedTypes()) { + RankProfile resolvedFromBase = rankProfileRegistry.resolve(baseType, inheritedName); + if (resolvedFromBase != null) return resolvedFromBase; + } + } + return rankProfileRegistry.resolve(documentType, inheritedName); + } + return rankProfileRegistry.get(schema.getName(), inheritedName); + } + + /** Returns whether this profile inherits (directly or indirectly) the given profile name. */ + public boolean inherits(String name) { + for (RankProfile inheritedProfile : inherited()) { + if (inheritedProfile.name().equals(name)) return true; + if (inheritedProfile.inherits(name)) return true; + } + return false; + } + + public void setMatchPhaseSettings(MatchPhaseSettings settings) { + settings.checkValid(); + this.matchPhaseSettings = settings; + } + + public MatchPhaseSettings getMatchPhaseSettings() { + if (matchPhaseSettings != null) return matchPhaseSettings; + return uniquelyInherited(p -> p.getMatchPhaseSettings(), "match phase settings").orElse(null); + } + + /** Returns the uniquely determined property, where non-empty is defined as non-null */ + private <T> Optional<T> uniquelyInherited(Function<RankProfile, T> propertyRetriever, + String propertyDescription) { + return uniquelyInherited(propertyRetriever, p -> p != null, propertyDescription); + } + + /** + * Returns the property retrieved by the given function, if it is only present in a single unique variant + * among all profiled inherited by this, or empty if not present. + * Note that for properties that don't implement a values-based equals this reverts to the stricter condition that + * only one inherited profile can define a non-empty value at all. + * + * @throws IllegalArgumentException if the inherited profiles defines multiple different values of the property + */ + private <T> Optional<T> uniquelyInherited(Function<RankProfile, T> propertyRetriever, + Predicate<T> nonEmptyValueFilter, + String propertyDescription) { + Set<T> uniqueProperties = inherited().stream() + .map(p -> propertyRetriever.apply(p)) + .filter(p -> nonEmptyValueFilter.test(p)) + .collect(Collectors.toSet()); + if (uniqueProperties.isEmpty()) return Optional.empty(); + if (uniqueProperties.size() == 1) return Optional.of(uniqueProperties.stream().findAny().get()); + throw new IllegalArgumentException("Only one of the profiles inherited by " + this + " can contain " + + propertyDescription + ", but it is present in multiple"); + } + + public void addRankSetting(RankSetting rankSetting) { + rankSettings.add(rankSetting); + } + + public void addRankSetting(String fieldName, RankSetting.Type type, Object value) { + addRankSetting(new RankSetting(fieldName, type, value)); + } + + /** + * Returns the a rank setting of a field, or null if there is no such rank setting in this profile + * + * @param field the field whose settings to return + * @param type the type that the field is required to be + * @return the rank setting found, or null + */ + RankSetting getDeclaredRankSetting(String field, RankSetting.Type type) { + for (Iterator<RankSetting> i = declaredRankSettingIterator(); i.hasNext(); ) { + RankSetting setting = i.next(); + if (setting.getFieldName().equals(field) && setting.getType() == type) { + return setting; + } + } + return null; + } + + /** + * Returns a rank setting of field or index, or null if there is no such rank setting in this profile or one it + * inherits + * + * @param field the field whose settings to return + * @param type the type that the field is required to be + * @return the rank setting found, or null + */ + public RankSetting getRankSetting(String field, RankSetting.Type type) { + RankSetting rankSetting = getDeclaredRankSetting(field, type); + if (rankSetting != null) return rankSetting; + + return uniquelyInherited(p -> p.getRankSetting(field, type), "rank setting " + type + " on " + field).orElse(null); + } + + /** + * Returns the rank settings in this rank profile + * + * @return an iterator for the declared rank setting + */ + public Iterator<RankSetting> declaredRankSettingIterator() { + return Collections.unmodifiableSet(rankSettings).iterator(); + } + + /** + * Returns all settings in this profile or any profile it inherits + * + * @return an iterator for all rank settings of this + */ + public Iterator<RankSetting> rankSettingIterator() { + return rankSettings().iterator(); + } + + /** + * Returns a snapshot of the rank settings of this and everything it inherits. + * Changes to the returned set will not be reflected in this rank profile. + */ + public Set<RankSetting> rankSettings() { + Set<RankSetting> settings = new LinkedHashSet<>(); + for (RankProfile inheritedProfile : inherited()) { + for (RankSetting setting : inheritedProfile.rankSettings()) { + if (settings.contains(setting)) + throw new IllegalArgumentException(setting + " is present in " + inheritedProfile + " inherited by " + + this + ", but is also present in another profile inherited by it"); + settings.add(setting); + } + } + + // TODO: Here we do things in the wrong order to not break tests. Reverse this. + Set<RankSetting> finalSettings = new LinkedHashSet<>(rankSettings); + finalSettings.addAll(settings); + return finalSettings; + } + + public void add(Constant constant) { + constants.put(constant.name(), constant); + } + + /** Returns an unmodifiable view of the constants declared in this */ + public Map<Reference, Constant> declaredConstants() { return Collections.unmodifiableMap(constants); } + + /** Returns an unmodifiable view of the constants available in this */ + public Map<Reference, Constant> constants() { + Map<Reference, Constant> allConstants = new HashMap<>(); + for (var inheritedProfile : inherited()) { + for (var constant : inheritedProfile.constants().values()) { + if (allConstants.containsKey(constant.name())) + throw new IllegalArgumentException(constant + "' is present in " + + inheritedProfile + " inherited by " + + this + ", but is also present in another profile inherited by it"); + allConstants.put(constant.name(), constant); + } + } + + if (schema != null) + allConstants.putAll(schema.constants()); + allConstants.putAll(constants); + return allConstants; + } + + public void add(OnnxModel model) { + onnxModels.put(model.getName(), model); + } + + /** Returns an unmodifiable map of the onnx models declared in this. */ + public Map<String, OnnxModel> declaredOnnxModels() { return onnxModels; } + + /** Returns an unmodifiable map of the onnx models available in this. */ + public Map<String, OnnxModel> onnxModels() { + Map<String, OnnxModel> allModels = new HashMap<>(); + for (var inheritedProfile : inherited()) { + for (var model : inheritedProfile.onnxModels().values()) { + if (allModels.containsKey(model.getName())) + throw new IllegalArgumentException(model + "' is present in " + + inheritedProfile + " inherited by " + + this + ", but is also present in another profile inherited by it"); + allModels.put(model.getName(), model); + } + } + + if (schema != null) + allModels.putAll(schema.onnxModels()); + allModels.putAll(onnxModels); + return allModels; + } + + public void addAttributeType(String attributeName, String attributeType) { + attributeTypes.addType(attributeName, attributeType); + } + + public Map<String, String> getAttributeTypes() { + return attributeTypes.getTypes(); + } + + /** + * Returns the ranking expression to use by this. This expression must not be edited. + * Returns null if no expression is set. + */ + public RankingExpression getFirstPhaseRanking() { + RankingExpressionFunction function = getFirstPhase(); + if (function == null) return null; + return function.function.getBody(); + } + + public RankingExpressionFunction getFirstPhase() { + if (firstPhaseRanking != null) return firstPhaseRanking; + return uniquelyInherited(p -> p.getFirstPhase(), "first-phase expression").orElse(null); + } + + void setFirstPhaseRanking(RankingExpression rankingExpression) { + this.firstPhaseRanking = new RankingExpressionFunction(new ExpressionFunction(FIRST_PHASE, Collections.emptyList(), rankingExpression), false); + } + + public void setFirstPhaseRanking(String expression) { + try { + firstPhaseRanking = new RankingExpressionFunction(parseRankingExpression(FIRST_PHASE, Collections.emptyList(), expression), false); + } catch (ParseException e) { + throw new IllegalArgumentException("Illegal first phase ranking function", e); + } + } + + /** + * Returns the ranking expression to use by this. This expression must not be edited. + * Returns null if no expression is set. + */ + public RankingExpression getSecondPhaseRanking() { + RankingExpressionFunction function = getSecondPhase(); + if (function == null) return null; + return function.function().getBody(); + } + + public RankingExpressionFunction getSecondPhase() { + if (secondPhaseRanking != null) return secondPhaseRanking; + return uniquelyInherited(p -> p.getSecondPhase(), "second-phase expression").orElse(null); + } + + public void setSecondPhaseRanking(String expression) { + try { + secondPhaseRanking = new RankingExpressionFunction(parseRankingExpression(SECOND_PHASE, Collections.emptyList(), expression), false); + } + catch (ParseException e) { + throw new IllegalArgumentException("Illegal second phase ranking function", e); + } + } + + // TODO: Below we have duplicate methods for summary and match features: Encapsulate this in a single parametrized + // class instead (and probably make rank features work the same). + + /** + * Sets the name this should inherit the summary features of. + * Without setting this, this will either have the summary features of the single parent setting them, + * or if summary features are set in this, only have the summary features in this. + * With this set the resulting summary features of this will be the superset of those defined in this and + * the final (with inheritance included) summary features of the given parent. + * The profile must be one which is directly inherited by this. + */ + public void setInheritedSummaryFeatures(String parentProfile) { + if ( ! inheritedNames().contains(parentProfile)) + throw new IllegalArgumentException("This can only inherit the summary features of a directly inherited profile, '" + + ", but attempting to inherit '" + parentProfile); + this.inheritedSummaryFeaturesProfileName = parentProfile; + } + + /** + * Sets the name of a profile this should inherit the match features of. + * Without setting this, this will either have the match features of the single parent setting them, + * or if match features are set in this, only have the match features in this. + * With this set the resulting match features of this will be the superset of those defined in this and + * the final (with inheritance included) match features of the given parent. + * The profile must be one which which is directly inherited by this. + * + */ + public void setInheritedMatchFeatures(String parentProfile) { + if ( ! inheritedNames().contains(parentProfile)) + throw new IllegalArgumentException("This can only inherit the match features of a directly inherited profile, '" + + ", but attempting to inherit '" + parentProfile); + this.inheritedMatchFeaturesProfileName = parentProfile; + } + + /** Returns a read-only view of the summary features to use in this profile. This is never null */ + public Set<ReferenceNode> getSummaryFeatures() { + if (inheritedSummaryFeaturesProfileName != null && summaryFeatures != null) { + Set<ReferenceNode> combined = new HashSet<>(); + RankProfile inherited = inherited().stream() + .filter(p -> p.name().equals(inheritedSummaryFeaturesProfileName)) + .findAny() + .orElseThrow(); + combined.addAll(inherited.getSummaryFeatures()); + combined.addAll(summaryFeatures); + return Collections.unmodifiableSet(combined); + } + if (summaryFeatures != null) return Collections.unmodifiableSet(summaryFeatures); + return uniquelyInherited(p -> p.getSummaryFeatures(), f -> ! f.isEmpty(), "summary features") + .orElse(Set.of()); + } + + /** Returns a read-only view of the match features to use in this profile. This is never null */ + public Set<ReferenceNode> getMatchFeatures() { + if (inheritedMatchFeaturesProfileName != null && matchFeatures != null) { + Set<ReferenceNode> combined = new HashSet<>(); + RankProfile inherited = inherited().stream() + .filter(p -> p.name().equals(inheritedMatchFeaturesProfileName)) + .findAny() + .orElseThrow(); + combined.addAll(inherited.getMatchFeatures()); + combined.addAll(matchFeatures); + return Collections.unmodifiableSet(combined); + } + if (matchFeatures != null) return Collections.unmodifiableSet(matchFeatures); + return uniquelyInherited(p -> p.getMatchFeatures(), f -> ! f.isEmpty(), "match features") + .orElse(Set.of()); + } + + private void addSummaryFeature(ReferenceNode feature) { + if (summaryFeatures == null) + summaryFeatures = new LinkedHashSet<>(); + summaryFeatures.add(feature); + } + + private void addMatchFeature(ReferenceNode feature) { + if (matchFeatures == null) + matchFeatures = new LinkedHashSet<>(); + matchFeatures.add(feature); + } + + /** Adds the content of the given feature list to the internal list of summary features. */ + public void addSummaryFeatures(FeatureList features) { + for (ReferenceNode feature : features) { + addSummaryFeature(feature); + } + } + + /** Adds the content of the given feature list to the internal list of match features. */ + public void addMatchFeatures(FeatureList features) { + for (ReferenceNode feature : features) { + addMatchFeature(feature); + } + } + + /** Returns a read-only view of the rank features to use in this profile. This is never null */ + public Set<ReferenceNode> getRankFeatures() { + if (rankFeatures != null) return Collections.unmodifiableSet(rankFeatures); + return uniquelyInherited(p -> p.getRankFeatures(), f -> ! f.isEmpty(), "summary-features") + .orElse(Set.of()); + } + + private void addRankFeature(ReferenceNode feature) { + if (rankFeatures == null) + rankFeatures = new LinkedHashSet<>(); + rankFeatures.add(feature); + } + + /** + * Adds the content of the given feature list to the internal list of rank features. + * + * @param features The features to add. + */ + public void addRankFeatures(FeatureList features) { + for (ReferenceNode feature : features) { + addRankFeature(feature); + } + } + + /** Returns a read only flattened list view of the rank properties to use in this profile. This is never null. */ + public List<RankProperty> getRankProperties() { + List<RankProperty> properties = new ArrayList<>(); + for (List<RankProperty> propertyList : getRankPropertyMap().values()) { + properties.addAll(propertyList); + } + return Collections.unmodifiableList(properties); + } + + /** Returns a read only map view of the rank properties to use in this profile. This is never null. */ + public Map<String, List<RankProperty>> getRankPropertyMap() { + if (rankProperties.size() == 0 && inherited().isEmpty()) return Map.of(); + if (inherited().isEmpty()) return Collections.unmodifiableMap(rankProperties); + + var inheritedProperties = uniquelyInherited(p -> p.getRankPropertyMap(), m -> ! m.isEmpty(), "rank-properties") + .orElse(Map.of()); + if (rankProperties.isEmpty()) return inheritedProperties; + + // Neither is null + Map<String, List<RankProperty>> combined = new LinkedHashMap<>(inheritedProperties); + combined.putAll(rankProperties); // Don't combine values across inherited properties + return Collections.unmodifiableMap(combined); + } + + public void addRankProperty(String name, String parameter) { + addRankProperty(new RankProperty(name, parameter)); + } + + private void addRankProperty(RankProperty rankProperty) { + // Just the usual multimap semantics here + rankProperties.computeIfAbsent(rankProperty.getName(), (String key) -> new ArrayList<>(1)).add(rankProperty); + } + + public void setRerankCount(int rerankCount) { this.rerankCount = rerankCount; } + + public int getRerankCount() { + if (rerankCount >= 0) return rerankCount; + return uniquelyInherited(p -> p.getRerankCount(), c -> c >= 0, "rerank-count").orElse(-1); + } + + public void setNumThreadsPerSearch(int numThreads) { this.numThreadsPerSearch = numThreads; } + + public int getNumThreadsPerSearch() { + if (numThreadsPerSearch >= 0) return numThreadsPerSearch; + return uniquelyInherited(p -> p.getNumThreadsPerSearch(), n -> n >= 0, "num-threads-per-search") + .orElse(-1); + } + + public void setMinHitsPerThread(int minHits) { this.minHitsPerThread = minHits; } + + public int getMinHitsPerThread() { + if (minHitsPerThread >= 0) return minHitsPerThread; + return uniquelyInherited(p -> p.getMinHitsPerThread(), n -> n >= 0, "min-hits-per-search").orElse(-1); + } + + public void setNumSearchPartitions(int numSearchPartitions) { this.numSearchPartitions = numSearchPartitions; } + + public int getNumSearchPartitions() { + if (numSearchPartitions >= 0) return numSearchPartitions; + return uniquelyInherited(p -> p.getNumSearchPartitions(), n -> n >= 0, "num-search-partitions").orElse(-1); + } + + public void setTermwiseLimit(double termwiseLimit) { this.termwiseLimit = termwiseLimit; } + public void setPostFilterThreshold(double threshold) { this.postFilterThreshold = threshold; } + public void setApproximateThreshold(double threshold) { this.approximateThreshold = threshold; } + + public OptionalDouble getTermwiseLimit() { + if (termwiseLimit != null) return OptionalDouble.of(termwiseLimit); + return uniquelyInherited(p -> p.getTermwiseLimit(), l -> l.isPresent(), "termwise-limit") + .orElse(OptionalDouble.empty()); + } + + public OptionalDouble getPostFilterThreshold() { + if (postFilterThreshold != null) { + return OptionalDouble.of(postFilterThreshold); + } + return uniquelyInherited(p -> p.getPostFilterThreshold(), l -> l.isPresent(), "post-filter-threshold").orElse(OptionalDouble.empty()); + } + + public OptionalDouble getApproximateThreshold() { + if (approximateThreshold != null) { + return OptionalDouble.of(approximateThreshold); + } + return uniquelyInherited(p -> p.getApproximateThreshold(), l -> l.isPresent(), "approximate-threshold").orElse(OptionalDouble.empty()); + } + + /** Whether we should ignore the default rank features. Set to null to use inherited */ + public void setIgnoreDefaultRankFeatures(Boolean ignoreDefaultRankFeatures) { + this.ignoreDefaultRankFeatures = ignoreDefaultRankFeatures; + } + + public Boolean getIgnoreDefaultRankFeatures() { + if (ignoreDefaultRankFeatures != null) return ignoreDefaultRankFeatures; + return uniquelyInherited(p -> p.getIgnoreDefaultRankFeatures(), "ignore-default-rank-features").orElse(false); + } + + public void setKeepRankCount(int rerankArraySize) { this.keepRankCount = rerankArraySize; } + + public int getKeepRankCount() { + if (keepRankCount >= 0) return keepRankCount; + return uniquelyInherited(p -> p.getKeepRankCount(), c -> c >= 0, "keep-rank-count").orElse(-1); + } + + public void setRankScoreDropLimit(double rankScoreDropLimit) { this.rankScoreDropLimit = rankScoreDropLimit; } + + public double getRankScoreDropLimit() { + if (rankScoreDropLimit > -Double.MAX_VALUE) return rankScoreDropLimit; + return uniquelyInherited(p -> p.getRankScoreDropLimit(), c -> c > -Double.MAX_VALUE, "rank.score-drop-limit") + .orElse(rankScoreDropLimit); + } + + public void addFunction(String name, List<String> arguments, String expression, boolean inline) { + try { + addFunction(parseRankingExpression(name, arguments, expression), inline); + } + catch (ParseException e) { + throw new IllegalArgumentException("Could not parse function '" + name + "'", e); + } + } + + /** Adds a function and returns it */ + public RankingExpressionFunction addFunction(ExpressionFunction function, boolean inline) { + RankingExpressionFunction rankingExpressionFunction = new RankingExpressionFunction(function, inline); + if (functions.containsKey(function.getName())) { + deployLogger.log(Level.WARNING, "Function '" + function.getName() + "' is defined twice " + + "in rank profile '" + this.name + "'"); + } + functions.put(function.getName(), rankingExpressionFunction); + allFunctionsCached = null; + return rankingExpressionFunction; + } + + /** + * Adds the type of an input feature consumed by this profile. + * All inputs must either be declared through this or in query profile types, + * otherwise they are assumes to be scalars. + */ + public void addInput(Reference reference, Input input) { + if (inputs.containsKey(reference)) { + Input existing = inputs().get(reference); + if (! input.equals(existing)) + throw new IllegalArgumentException("Duplicate input: Has both " + input + " and existing"); + } + inputs.put(reference, input); + } + + /** Returns the inputs of this, which also includes all inputs of the parents of this. */ + // This is less restrictive than most other constructs in allowing inputs to be defined in all parent profiles + // because inputs are tied closer to functions than the profile itself. + public Map<Reference, Input> inputs() { + if (inputs.isEmpty() && inherited().isEmpty()) return Map.of(); + if (inherited().isEmpty()) return Collections.unmodifiableMap(inputs); + + // Combine + Map<Reference, Input> allInputs = new LinkedHashMap<>(); + for (var inheritedProfile : inherited()) { + for (var input : inheritedProfile.inputs().entrySet()) { + Input existing = allInputs.get(input.getKey()); + if (existing != null && ! existing.equals(input.getValue())) + throw new IllegalArgumentException(this + " inherits " + inheritedProfile + " which contains " + + input.getValue() + ", but this input is already defined as " + + existing + " in another profile this inherits"); + allInputs.put(input.getKey(), input.getValue()); + } + } + allInputs.putAll(inputs); + return Collections.unmodifiableMap(allInputs); + } + + public static class MutateOperation { + public enum Phase { on_match, on_first_phase, on_second_phase, on_summary} + final Phase phase; + final String attribute; + final String operation; + public MutateOperation(Phase phase, String attribute, String operation) { + this.phase = phase; + this.attribute = attribute; + this.operation = operation; + } + } + private final List<MutateOperation> mutateOperations = new ArrayList<>(); + + public void addMutateOperation(MutateOperation op) { + mutateOperations.add(op); + String prefix = "vespa.mutate." + op.phase.toString(); + addRankProperty(prefix + ".attribute", op.attribute); + addRankProperty(prefix + ".operation", op.operation); + } + public void addMutateOperation(MutateOperation.Phase phase, String attribute, String operation) { + addMutateOperation(new MutateOperation(phase, attribute, operation)); + } + public List<MutateOperation> getMutateOperations() { return mutateOperations; } + + public RankingExpressionFunction findFunction(String name) { + RankingExpressionFunction function = functions.get(name); + if (function != null) return function; + return uniquelyInherited(p -> p.findFunction(name), "function '" + name + "'").orElse(null); + } + + /** Returns an unmodifiable snapshot of the functions in this */ + public Map<String, RankingExpressionFunction> getFunctions() { + updateCachedFunctions(); + return allFunctionsCached.allRankingExpressionFunctions; + } + private ImmutableMap<String, ExpressionFunction> getExpressionFunctions() { + updateCachedFunctions(); + return allFunctionsCached.allExpressionFunctions; + } + private void updateCachedFunctions() { + if (needToUpdateFunctionCache()) { + allFunctionsCached = new CachedFunctions(gatherAllFunctions()); + } + } + + private Map<String, RankingExpressionFunction> gatherAllFunctions() { + if (functions.isEmpty() && inherited().isEmpty()) return Map.of(); + if (inherited().isEmpty()) return Collections.unmodifiableMap(new LinkedHashMap<>(functions)); + + // Combine + Map<String, RankingExpressionFunction> allFunctions = new LinkedHashMap<>(); + for (var inheritedProfile : inherited()) { + for (var function : inheritedProfile.getFunctions().entrySet()) { + if (allFunctions.containsKey(function.getKey())) + throw new IllegalArgumentException(this + " inherits " + inheritedProfile + " which contains " + + function.getValue() + ", but this function is already " + + "defined in another profile this inherits"); + allFunctions.put(function.getKey(), function.getValue()); + } + } + allFunctions.putAll(functions); + return Collections.unmodifiableMap(allFunctions); + } + + private boolean needToUpdateFunctionCache() { + if (inherited().stream().anyMatch(profile -> profile.needToUpdateFunctionCache())) return true; + return allFunctionsCached == null; + } + + public Set<String> filterFields() { return filterFields; } + + /** Returns all filter fields in this profile and any profile it inherits. */ + public Set<String> allFilterFields() { + Set<String> inheritedFilterFields = uniquelyInherited(p -> p.allFilterFields(), fields -> ! fields.isEmpty(), + "filter fields").orElse(Set.of()); + + if (inheritedFilterFields.isEmpty()) return Collections.unmodifiableSet(filterFields); + + Set<String> combined = new LinkedHashSet<>(inheritedFilterFields); + combined.addAll(filterFields()); + return combined; + } + + private ExpressionFunction parseRankingExpression(String name, List<String> arguments, String expression) throws ParseException { + if (expression.trim().length() == 0) + throw new ParseException("Encountered an empty ranking expression in " + name() + ", " + name + "."); + + try (Reader rankingExpressionReader = openRankingExpressionReader(name, expression.trim())) { + return new ExpressionFunction(name, arguments, new RankingExpression(name, rankingExpressionReader)); + } + catch (com.yahoo.searchlib.rankingexpression.parser.ParseException e) { + ParseException exception = new ParseException("Could not parse ranking expression '" + expression.trim() + + "' in " + name() + ", " + name + "."); + throw (ParseException)exception.initCause(e); + } + catch (IOException e) { + throw new RuntimeException("IOException parsing ranking expression '" + name + "'", e); + } + } + + private static String extractFileName(String expression) { + String fileName = expression.substring("file:".length()).trim(); + if ( ! fileName.endsWith(ApplicationPackage.RANKEXPRESSION_NAME_SUFFIX)) + fileName = fileName + ApplicationPackage.RANKEXPRESSION_NAME_SUFFIX; + + return fileName; + } + + private Reader openRankingExpressionReader(String expName, String expression) { + if (!expression.startsWith("file:")) return new StringReader(expression); + + String fileName = extractFileName(expression); + Path.fromString(fileName); // No ".." + if (fileName.contains("/")) // See ticket 4102122 + throw new IllegalArgumentException("In " + name() + ", " + expName + ", ranking references file '" + + fileName + "' in a different directory, which is not supported."); + + return schema.getRankingExpression(fileName); + } + + /** Shallow clones this */ + @Override + public RankProfile clone() { + try { + RankProfile clone = (RankProfile)super.clone(); + clone.rankSettings = new LinkedHashSet<>(this.rankSettings); + clone.matchPhaseSettings = this.matchPhaseSettings; // hmm? + clone.summaryFeatures = summaryFeatures != null ? new LinkedHashSet<>(this.summaryFeatures) : null; + clone.matchFeatures = matchFeatures != null ? new LinkedHashSet<>(this.matchFeatures) : null; + clone.rankFeatures = rankFeatures != null ? new LinkedHashSet<>(this.rankFeatures) : null; + clone.rankProperties = new LinkedHashMap<>(this.rankProperties); + clone.inputs = new LinkedHashMap<>(this.inputs); + clone.functions = new LinkedHashMap<>(this.functions); + clone.allFunctionsCached = null; + clone.filterFields = new HashSet<>(this.filterFields); + clone.constants = new HashMap<>(this.constants); + return clone; + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Won't happen", e); + } + } + + /** + * Returns a copy of this where the content is optimized for execution. + * Compiled profiles should never be modified. + */ + public RankProfile compile(QueryProfileRegistry queryProfiles, ImportedMlModels importedModels) { + try { + RankProfile compiled = this.clone(); + compiled.compileThis(queryProfiles, importedModels); + return compiled; + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Rank profile '" + name() + "' is invalid", e); + } + } + + private void compileThis(QueryProfileRegistry queryProfiles, ImportedMlModels importedModels) { + checkNameCollisions(getFunctions(), constants()); + ExpressionTransforms expressionTransforms = new ExpressionTransforms(); + + Map<Reference, TensorType> featureTypes = featureTypes(); + // Function compiling first pass: compile inline functions without resolving other functions + Map<String, RankingExpressionFunction> inlineFunctions = + compileFunctions(this::getInlineFunctions, queryProfiles, featureTypes, importedModels, Collections.emptyMap(), expressionTransforms); + + firstPhaseRanking = compile(this.getFirstPhase(), queryProfiles, featureTypes, importedModels, constants(), inlineFunctions, expressionTransforms); + secondPhaseRanking = compile(this.getSecondPhase(), queryProfiles, featureTypes, importedModels, constants(), inlineFunctions, expressionTransforms); + + // Function compiling second pass: compile all functions and insert previously compiled inline functions + // TODO: This merges all functions from inherited profiles too and erases inheritance information. Not good. + functions = compileFunctions(this::getFunctions, queryProfiles, featureTypes, importedModels, inlineFunctions, expressionTransforms); + allFunctionsCached = null; + } + + private void checkNameCollisions(Map<String, RankingExpressionFunction> functions, Map<Reference, Constant> constants) { + for (var functionEntry : functions.entrySet()) { + if (constants.containsKey(FeatureNames.asConstantFeature(functionEntry.getKey()))) + throw new IllegalArgumentException("Cannot have both a constant and function named '" + + functionEntry.getKey() + "'"); + } + } + + private Map<String, RankingExpressionFunction> getInlineFunctions() { + return getFunctions().entrySet().stream().filter(x -> x.getValue().inline()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + private Map<String, RankingExpressionFunction> compileFunctions(Supplier<Map<String, RankingExpressionFunction>> functions, + QueryProfileRegistry queryProfiles, + Map<Reference, TensorType> featureTypes, + ImportedMlModels importedModels, + Map<String, RankingExpressionFunction> inlineFunctions, + ExpressionTransforms expressionTransforms) { + Map<String, RankingExpressionFunction> compiledFunctions = new LinkedHashMap<>(); + Map.Entry<String, RankingExpressionFunction> entry; + // Compile all functions. Why iterate in such a complicated way? + // Because some functions (imported models adding generated functions) may add other functions during compiling. + // A straightforward iteration will either miss those functions, or may cause a ConcurrentModificationException + while (null != (entry = findUncompiledFunction(functions.get(), compiledFunctions.keySet()))) { + RankingExpressionFunction rankingExpressionFunction = entry.getValue(); + RankingExpressionFunction compiled = compile(rankingExpressionFunction, queryProfiles, featureTypes, + importedModels, constants(), inlineFunctions, + expressionTransforms); + compiledFunctions.put(entry.getKey(), compiled); + } + return compiledFunctions; + } + + private static Map.Entry<String, RankingExpressionFunction> findUncompiledFunction(Map<String, RankingExpressionFunction> functions, + Set<String> compiledFunctionNames) { + for (Map.Entry<String, RankingExpressionFunction> entry : functions.entrySet()) { + if ( ! compiledFunctionNames.contains(entry.getKey())) + return entry; + } + return null; + } + + private RankingExpressionFunction compile(RankingExpressionFunction function, + QueryProfileRegistry queryProfiles, + Map<Reference, TensorType> featureTypes, + ImportedMlModels importedModels, + Map<Reference, Constant> constants, + Map<String, RankingExpressionFunction> inlineFunctions, + ExpressionTransforms expressionTransforms) { + if (function == null) return null; + + RankProfileTransformContext context = new RankProfileTransformContext(this, + queryProfiles, + featureTypes, + importedModels, + constants, + inlineFunctions); + RankingExpression expression = expressionTransforms.transform(function.function().getBody(), context); + for (Map.Entry<String, String> rankProperty : context.rankProperties().entrySet()) { + addRankProperty(rankProperty.getKey(), rankProperty.getValue()); + } + return function.withExpression(expression); + } + + /** + * Creates a context containing the type information of all constants, attributes and query profiles + * referable from this rank profile. + */ + public MapEvaluationTypeContext typeContext(QueryProfileRegistry queryProfiles) { + return typeContext(queryProfiles, featureTypes()); + } + + public MapEvaluationTypeContext typeContext() { return typeContext(new QueryProfileRegistry()); } + + private Map<Reference, TensorType> featureTypes() { + Map<Reference, TensorType> featureTypes = inputs().values().stream() + .collect(Collectors.toMap(input -> input.name(), input -> input.type())); + allFields().forEach(field -> addAttributeFeatureTypes(field, featureTypes)); + allImportedFields().forEach(field -> addAttributeFeatureTypes(field, featureTypes)); + return featureTypes; + } + + public MapEvaluationTypeContext typeContext(QueryProfileRegistry queryProfiles, + Map<Reference, TensorType> featureTypes) { + MapEvaluationTypeContext context = new MapEvaluationTypeContext(getExpressionFunctions(), featureTypes); + + constants().forEach((k, v) -> context.setType(k, v.type())); + + // Add query features from all rank profile types + for (QueryProfileType queryProfileType : queryProfiles.getTypeRegistry().allComponents()) { + for (FieldDescription field : queryProfileType.declaredFields().values()) { + TensorType type = field.getType().asTensorType(); + Optional<Reference> feature = Reference.simple(field.getName()); + if ( feature.isEmpty() || ! feature.get().name().equals("query")) continue; + if (featureTypes.containsKey(feature.get())) continue; // Explicit feature types (from inputs) overrides + + TensorType existingType = context.getType(feature.get()); + if ( ! Objects.equals(existingType, context.defaultTypeOf(feature.get()))) + type = existingType.dimensionwiseGeneralizationWith(type).orElseThrow( () -> + new IllegalArgumentException(queryProfileType + " contains query feature " + feature.get() + + " with type " + field.getType().asTensorType() + + ", but this is already defined in another query profile with type " + + context.getType(feature.get()))); + context.setType(feature.get(), type); + } + } + + // Add output types for ONNX models + for (var model : onnxModels().values()) { + Arguments args = new Arguments(new ReferenceNode(model.getName())); + Map<String, TensorType> inputTypes = resolveOnnxInputTypes(model, context); + + TensorType defaultOutputType = model.getTensorType(model.getDefaultOutput(), inputTypes); + context.setType(new Reference("onnxModel", args, null), defaultOutputType); + + for (Map.Entry<String, String> mapping : model.getOutputMap().entrySet()) { + TensorType type = model.getTensorType(mapping.getKey(), inputTypes); + context.setType(new Reference("onnxModel", args, mapping.getValue()), type); + } + } + return context; + } + + private Map<String, TensorType> resolveOnnxInputTypes(OnnxModel model, MapEvaluationTypeContext context) { + Map<String, TensorType> inputTypes = new HashMap<>(); + for (String onnxInputName : model.getInputMap().keySet()) { + resolveOnnxInputType(onnxInputName, model, context).ifPresent(type -> inputTypes.put(onnxInputName, type)); + } + return inputTypes; + } + + private Optional<TensorType> resolveOnnxInputType(String onnxInputName, OnnxModel model, MapEvaluationTypeContext context) { + String source = model.getInputMap().get(onnxInputName); + if (source != null) { + // Source is either a simple reference (query/attribute/constant/rankingExpression)... + Optional<Reference> reference = Reference.simple(source); + if (reference.isPresent()) { + if (reference.get().name().equals("rankingExpression") && reference.get().simpleArgument().isPresent()) { + source = reference.get().simpleArgument().get(); // look up function below + } else { + return Optional.of(context.getType(reference.get())); + } + } + // ... or a function + ExpressionFunction func = context.getFunction(source); + if (func != null) { + return Optional.of(func.getBody().type(context)); + } + } + return Optional.empty(); // if this context does not contain this input + } + + private void addAttributeFeatureTypes(ImmutableSDField field, Map<Reference, TensorType> featureTypes) { + Attribute attribute = field.getAttribute(); + field.getAttributes().forEach((k, a) -> { + String name = k; + if (attribute == a) // this attribute should take the fields name + name = field.getName(); // switch to that - it is separate for imported fields + featureTypes.put(FeatureNames.asAttributeFeature(name), + a.tensorType().orElse(TensorType.empty)); + }); + } + + @Override + public String toString() { + return "rank profile '" + name() + "'"; + } + + /** + * A rank setting. The identity of a rank setting is its field name and type (not value). + * A rank setting is immutable. + */ + public static class RankSetting implements Serializable { + + private final String fieldName; + + private final Type type; + + /** The rank value */ + private final Object value; + + public enum Type { + + RANKTYPE("rank-type"), + LITERALBOOST("literal-boost"), + WEIGHT("weight"), + PREFERBITVECTOR("preferbitvector",true); + + private final String name; + + /** True if this setting really pertains to an index, not a field within an index */ + private final boolean isIndexLevel; + + Type(String name) { + this(name,false); + } + + Type(String name,boolean isIndexLevel) { + this.name = name; + this.isIndexLevel=isIndexLevel; + } + + /** True if this setting really pertains to an index, not a field within an index */ + public boolean isIndexLevel() { return isIndexLevel; } + + /** Returns the name of this type */ + public String getName() { + return name; + } + + @Override + public String toString() { + return "type " + name; + } + + } + + public RankSetting(String fieldName, RankSetting.Type type, Object value) { + this.fieldName = fieldName; + this.type = type; + this.value = value; + } + + public String getFieldName() { return fieldName; } + + public Type getType() { return type; } + + public Object getValue() { return value; } + + /** Returns the value as an int, or a negative value if it is not an integer */ + public int getIntValue() { + if (value instanceof Integer) { + return ((Integer)value); + } + else { + return -1; + } + } + + @Override + public int hashCode() { + return fieldName.hashCode() + 17 * type.hashCode(); + } + + @Override + public boolean equals(Object object) { + if (!(object instanceof RankSetting)) { + return false; + } + RankSetting other = (RankSetting)object; + return + fieldName.equals(other.fieldName) && + type.equals(other.type); + } + + @Override + public String toString() { + return type + " setting " + fieldName + ": " + value; + } + + } + + /** A rank property. Rank properties are Value Objects */ + public static class RankProperty implements Serializable { + + private final String name; + private final String value; + + public RankProperty(String name, String value) { + this.name = name; + this.value = value; + } + + public String getName() { return name; } + + public String getValue() { return value; } + + @Override + public int hashCode() { + return name.hashCode() + 17 * value.hashCode(); + } + + @Override + public boolean equals(Object object) { + if (! (object instanceof RankProperty)) return false; + RankProperty other=(RankProperty)object; + return (other.name.equals(this.name) && other.value.equals(this.value)); + } + + @Override + public String toString() { + return name + " = " + value; + } + + } + + /** A function in a rank profile */ + public static class RankingExpressionFunction { + + private ExpressionFunction function; + + /** True if this should be inlined into calling expressions. Useful for very cheap functions. */ + private final boolean inline; + + RankingExpressionFunction(ExpressionFunction function, boolean inline) { + this.function = function; + this.inline = inline; + } + + public void setReturnType(TensorType type) { + this.function = function.withReturnType(type); + } + + public ExpressionFunction function() { return function; } + + public boolean inline() { + return inline && function.arguments().isEmpty(); // only inline no-arg functions; + } + + RankingExpressionFunction withExpression(RankingExpression expression) { + return new RankingExpressionFunction(function.withBody(expression), inline); + } + + @Override + public String toString() { + return function.toString(); + } + + } + + public static final class DiversitySettings { + + private String attribute = null; + private int minGroups = 0; + private double cutoffFactor = 10; + private Diversity.CutoffStrategy cutoffStrategy = Diversity.CutoffStrategy.loose; + + public void setAttribute(String value) { attribute = value; } + public void setMinGroups(int value) { minGroups = value; } + public void setCutoffFactor(double value) { cutoffFactor = value; } + public void setCutoffStrategy(Diversity.CutoffStrategy strategy) { cutoffStrategy = strategy; } + public String getAttribute() { return attribute; } + public int getMinGroups() { return minGroups; } + public double getCutoffFactor() { return cutoffFactor; } + public Diversity.CutoffStrategy getCutoffStrategy() { return cutoffStrategy; } + + void checkValid() { + if (attribute == null || attribute.isEmpty()) { + throw new IllegalArgumentException("'diversity' did not set non-empty diversity attribute name."); + } + if (minGroups <= 0) { + throw new IllegalArgumentException("'diversity' did not set min-groups > 0"); + } + if (cutoffFactor < 1.0) { + throw new IllegalArgumentException("diversity.cutoff.factor must be larger or equal to 1.0."); + } + } + } + + public static class MatchPhaseSettings { + + private String attribute = null; + private boolean ascending = false; + private int maxHits = 0; // try to get this many hits before degrading the match phase + private double maxFilterCoverage = 0.2; // Max coverage of original corpus that will trigger the filter. + private DiversitySettings diversity = null; + private double evaluationPoint = 0.20; + private double prePostFilterTippingPoint = 1.0; + + public void setDiversity(DiversitySettings value) { + value.checkValid(); + diversity = value; + } + + public void setAscending(boolean value) { ascending = value; } + public void setAttribute(String value) { attribute = value; } + public void setMaxHits(int value) { maxHits = value; } + public void setMaxFilterCoverage(double value) { maxFilterCoverage = value; } + public void setEvaluationPoint(double evaluationPoint) { this.evaluationPoint = evaluationPoint; } + public void setPrePostFilterTippingPoint(double prePostFilterTippingPoint) { this.prePostFilterTippingPoint = prePostFilterTippingPoint; } + + public boolean getAscending() { return ascending; } + public String getAttribute() { return attribute; } + public int getMaxHits() { return maxHits; } + public double getMaxFilterCoverage() { return maxFilterCoverage; } + public DiversitySettings getDiversity() { return diversity; } + public double getEvaluationPoint() { return evaluationPoint; } + public double getPrePostFilterTippingPoint() { return prePostFilterTippingPoint; } + + public void checkValid() { + if (attribute == null) { + throw new IllegalArgumentException("match-phase did not set any attribute"); + } + if (! (maxHits > 0)) { + throw new IllegalArgumentException("match-phase did not set max-hits > 0"); + } + } + + } + + public static class TypeSettings { + + private final Map<String, String> types = new HashMap<>(); + + void addType(String name, String type) { + types.put(name, type); + } + + public Map<String, String> getTypes() { + return Collections.unmodifiableMap(types); + } + + } + + public static final class Input { + + private final Reference name; + private final TensorType type; + private final Optional<Tensor> defaultValue; + + public Input(Reference name, TensorType type, Optional<Tensor> defaultValue) { + this.name = name; + this.type = type; + this.defaultValue = defaultValue; + } + + public Reference name() { return name; } + public TensorType type() { return type; } + public Optional<Tensor> defaultValue() { return defaultValue; } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof Input)) return false; + Input other = (Input)o; + if ( ! other.name().equals(this.name())) return false; + if ( ! other.type().equals(this.type())) return false; + if ( ! other.defaultValue().equals(this.defaultValue())) return false; + return true; + } + + @Override + public int hashCode() { + return Objects.hash(name, type, defaultValue); + } + + @Override + public String toString() { + return "input '" + name + "' " + type + + (defaultValue().isPresent() ? ":" + defaultValue.get().toAbbreviatedString() : ""); + } + + } + + public static final class Constant { + + private final Reference name; + private final TensorType type; + + // One of these are non-empty + private final Optional<Tensor> value; + private final Optional<String> valuePath; + + // Always set only if valuePath is set + private final Optional<DistributableResource.PathType> pathType; + + public Constant(Reference name, Tensor value) { + this(name, value.type(), Optional.of(value), Optional.empty(), Optional.empty()); + } + + public Constant(Reference name, TensorType type, String valuePath) { + this(name, type, Optional.empty(), Optional.of(valuePath), Optional.of(DistributableResource.PathType.FILE)); + } + + public Constant(Reference name, TensorType type, String valuePath, DistributableResource.PathType pathType) { + this(name, type, Optional.empty(), Optional.of(valuePath), Optional.of(pathType)); + } + + private Constant(Reference name, TensorType type, Optional<Tensor> value, + Optional<String> valuePath, Optional<DistributableResource.PathType> pathType) { + this.name = Objects.requireNonNull(name); + this.type = Objects.requireNonNull(type); + this.value = Objects.requireNonNull(value); + this.valuePath = Objects.requireNonNull(valuePath); + this.pathType = Objects.requireNonNull(pathType); + + if (type.dimensions().stream().anyMatch(d -> d.isIndexed() && d.size().isEmpty())) + throw new IllegalArgumentException("Illegal type of constant " + name + " type " + type + + ": Dense tensor dimensions must have a size"); + } + + public Reference name() { return name; } + public TensorType type() { return type; } + + /** Returns the value of this, if its path is empty. */ + public Optional<Tensor> value() { return value; } + + /** Returns the path to the value of this, if its value is empty. */ + public Optional<String> valuePath() { return valuePath; } + + /** Returns the path type, if valuePath is set. */ + public Optional<DistributableResource.PathType> pathType() { return pathType; } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if ( ! (o instanceof Constant)) return false; + Constant other = (Constant)o; + if ( ! other.name().equals(this.name())) return false; + if ( ! other.type().equals(this.type())) return false; + if ( ! other.value().equals(this.value())) return false; + if ( ! other.valuePath().equals(this.valuePath())) return false; + if ( ! other.pathType().equals(this.pathType())) return false; + return true; + } + + @Override + public int hashCode() { + return Objects.hash(name, type, value, valuePath, pathType); + } + + @Override + public String toString() { + return "constant '" + name + "' " + type + ":" + + (value().isPresent() ? value.get().toAbbreviatedString() : " file:" + valuePath.get()); + } + + } + + private static class CachedFunctions { + + private final Map<String, RankingExpressionFunction> allRankingExpressionFunctions; + + private final ImmutableMap<String, ExpressionFunction> allExpressionFunctions; + + CachedFunctions(Map<String, RankingExpressionFunction> functions) { + allRankingExpressionFunctions = functions; + ImmutableMap.Builder<String,ExpressionFunction> mapBuilder = new ImmutableMap.Builder<>(); + for (var entry : functions.entrySet()) { + ExpressionFunction function = entry.getValue().function(); + mapBuilder.put(function.getName(), function); + } + allExpressionFunctions = mapBuilder.build(); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/RankProfileRegistry.java b/config-model/src/main/java/com/yahoo/schema/RankProfileRegistry.java new file mode 100644 index 00000000000..06ffc934b2d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/RankProfileRegistry.java @@ -0,0 +1,138 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.schema.document.SDDocumentType; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Mapping from name to {@link RankProfile} as well as a reverse mapping of {@link RankProfile} to {@link Schema}. + * Having both of these mappings consolidated here make it easier to remove dependencies on these mappings at + * run time, since it is essentially only used when building rank profile config at deployment time. + * + * Global rank profiles are represented by the Search key null. + * + * @author Ulf Lilleengen + */ +public class RankProfileRegistry { + + private final Map<String, Map<String, RankProfile>> rankProfiles = new LinkedHashMap<>(); + private static final String globalRankProfilesKey = "[global]"; + + /* These rank profiles can be overridden: 'default' rank profile, as that is documented to work. And 'unranked'. */ + static final Set<String> overridableRankProfileNames = new HashSet<>(Arrays.asList("default", "unranked")); + + public static RankProfileRegistry createRankProfileRegistryWithBuiltinRankProfiles(Schema schema) { + RankProfileRegistry rankProfileRegistry = new RankProfileRegistry(); + rankProfileRegistry.add(new DefaultRankProfile(schema, rankProfileRegistry)); + rankProfileRegistry.add(new UnrankedRankProfile(schema, rankProfileRegistry)); + return rankProfileRegistry; + } + + private String extractName(ImmutableSchema search) { + return search != null ? search.getName() : globalRankProfilesKey; + } + + /** Adds a rank profile to this registry */ + public void add(RankProfile rankProfile) { + String schemaName = extractName(rankProfile.schema()); + if ( ! rankProfiles.containsKey(schemaName)) { + rankProfiles.put(schemaName, new LinkedHashMap<>()); + } + checkForDuplicate(rankProfile); + rankProfiles.get(schemaName).put(rankProfile.name(), rankProfile); + } + + private void checkForDuplicate(RankProfile rankProfile) { + String rankProfileName = rankProfile.name(); + RankProfile existingRankProfileWithSameName = rankProfiles.get(extractName(rankProfile.schema())).get(rankProfileName); + if (existingRankProfileWithSameName == null) return; + + if ( ! overridableRankProfileNames.contains(rankProfileName)) { + throw new IllegalArgumentException("Duplicate rank profile '" + rankProfileName + "' in " + + rankProfile.schema()); + } + } + + /** + * Returns a named rank profile, null if the search definition doesn't have one with the given name + * + * @param schema the {@link Schema} that owns the rank profile + * @param name the name of the rank profile + * @return the RankProfile to return. + */ + public RankProfile get(String schema, String name) { + Map<String, RankProfile> profiles = rankProfiles.get(schema); + if (profiles == null) return null; + return profiles.get(name); + } + + public RankProfile get(ImmutableSchema schema, String name) { + var profile = get(schema.getName(), name); + if (profile != null) return profile; + if (schema.inherited().isPresent()) return get(schema.inherited().get(), name); + return null; + } + + public RankProfile getGlobal(String name) { + Map<String, RankProfile> profiles = rankProfiles.get(globalRankProfilesKey); + if (profiles == null) return null; + return profiles.get(name); + } + + public RankProfile resolve(SDDocumentType docType, String name) { + RankProfile rankProfile = get(docType.getName(), name); + if (rankProfile != null) return rankProfile; + for (var parent : docType.getInheritedTypes()) { + RankProfile parentProfile = resolve(parent, name); + if (parentProfile != null) return parentProfile; + } + return get(globalRankProfilesKey, name); + } + + /** + * Rank profiles that are collected across clusters. + * + * @return a set of global {@link RankProfile} instances + */ + public Collection<RankProfile> all() { + List<RankProfile> all = new ArrayList<>(); + for (var entry : rankProfiles.values()) { + all.addAll(entry.values()); + } + return all; + } + + /** + * Retrieve all rank profiles for a schema + * + * @param schema the schema to fetch rank profiles for, or null for the global ones + * @return a collection of {@link RankProfile} instances + */ + public Collection<RankProfile> rankProfilesOf(ImmutableSchema schema) { + String key = schema == null ? globalRankProfilesKey : schema.getName(); + + if ( ! rankProfiles.containsKey(key)) return List.of(); + + var profiles = new LinkedHashMap<>(rankProfiles.get(key)); + // Add all profiles in inherited schemas, unless they are already present (overridden) + while (schema != null && schema.inherited().isPresent()) { + schema = schema.inherited().get(); + var inheritedProfiles = rankProfiles.get(schema.getName()); + if (inheritedProfiles != null) { + for (Map.Entry<String, RankProfile> inheritedProfile : inheritedProfiles.entrySet()) { + profiles.putIfAbsent(inheritedProfile.getKey(), inheritedProfile.getValue()); + } + } + } + return profiles.values(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/SDDocumentTypeOrderer.java b/config-model/src/main/java/com/yahoo/schema/SDDocumentTypeOrderer.java new file mode 100644 index 00000000000..64bca9367d8 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/SDDocumentTypeOrderer.java @@ -0,0 +1,136 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.*; +import com.yahoo.document.annotation.AnnotationReferenceDataType; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.documentmodel.NewDocumentType; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.TemporarySDDocumentType; + +import java.util.*; +import java.util.logging.Level; + +/** + * @author Einar M R Rosenvinge + */ +public class SDDocumentTypeOrderer { + + private final Map<DataTypeName, SDDocumentType> createdSDTypes = new LinkedHashMap<>(); + private final Set<Object> seenTypes = Collections.newSetFromMap(new IdentityHashMap<>()); + List<SDDocumentType> processingOrder = new LinkedList<>(); + private final DeployLogger deployLogger; + + public SDDocumentTypeOrderer(List<SDDocumentType> sdTypes, DeployLogger deployLogger) { + this.deployLogger = deployLogger; + for (SDDocumentType type : sdTypes) { + createdSDTypes.put(type.getDocumentName(), type); + } + } + + List<SDDocumentType> getOrdered() { return processingOrder; } + + public void process() { + for (SDDocumentType type : createdSDTypes.values()) { + process(type, type); + } + } + + private void process(SDDocumentType docOrStruct, SDDocumentType owningDocument) { + resolveAndProcessInheritedTemporaryTypes(docOrStruct, owningDocument); + if (seenTypes.contains(docOrStruct)) { + return; + } + seenTypes.add(docOrStruct); + for (Field field : docOrStruct.fieldSet()) { + var type = field.getDataType(); + String typeName = type.getName(); + if (!seenTypes.contains(type)) { + seenTypes.add(type); + //we haven't seen this before, do it + visit(type, owningDocument); + } + } + processingOrder.add(docOrStruct); + } + + private void resolveAndProcessInheritedTemporaryTypes(SDDocumentType type, SDDocumentType owningDocument) { + List<DataTypeName> toReplace = new ArrayList<>(); + for (SDDocumentType sdoc : type.getInheritedTypes()) { + if (sdoc instanceof TemporarySDDocumentType) { + toReplace.add(sdoc.getDocumentName()); + } + } + for (DataTypeName name : toReplace) { + SDDocumentType inherited; + if (type.isStruct()) { + inherited = owningDocument.allTypes().get(new NewDocumentType.Name(name.getName())); + if (inherited == null) throw new IllegalArgumentException("Struct '" + name + "' not found in " + owningDocument); + process(inherited, owningDocument); + } + else { + inherited = createdSDTypes.get(name); + if (inherited == null) { + throw new IllegalArgumentException("document " + type.getName() + + " inherits from unavailable document " + name); + } + process(inherited, inherited); + } + type.inherit(inherited); + } + } + + private SDDocumentType find(String name) { + SDDocumentType sdDocType = createdSDTypes.get(new DataTypeName(name)); + if (sdDocType != null) { + return sdDocType; + } + for(SDDocumentType sdoc : createdSDTypes.values()) { + for (SDDocumentType stype : sdoc.getTypes()) { + if (stype.getName().equals(name)) { + return stype; + } + } + } + return null; + } + + private void visit(DataType type, SDDocumentType owningDocument) { + if (type instanceof StructuredDataType) { + StructuredDataType structType = (StructuredDataType) type; + SDDocumentType sdDocType = owningDocument.getType(structType.getName()); + if (sdDocType == null) { + sdDocType = find(structType.getName()); + } + if (sdDocType == null) { + throw new IllegalArgumentException("Could not find struct '" + type.getName() + "'"); + } + process(sdDocType, owningDocument); + return; + } + + if (type instanceof MapDataType) { + MapDataType mType = (MapDataType) type; + visit(mType.getValueType(), owningDocument); + visit(mType.getKeyType(), owningDocument); + } else if (type instanceof WeightedSetDataType) { + WeightedSetDataType wType = (WeightedSetDataType) type; + visit(wType.getNestedType(), owningDocument); + } else if (type instanceof CollectionDataType) { + CollectionDataType cType = (CollectionDataType) type; + visit(cType.getNestedType(), owningDocument); + } else if (type instanceof AnnotationReferenceDataType) { + //do nothing + } else if (type instanceof PrimitiveDataType) { + //do nothing + } else if (type instanceof TensorDataType) { + //do nothing + } else if (type instanceof NewDocumentReferenceDataType) { + //do nothing + } else { + deployLogger.logApplicationPackage(Level.WARNING, "Unknown type : " + type); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/Schema.java b/config-model/src/main/java/com/yahoo/schema/Schema.java new file mode 100644 index 00000000000..c733b6012f9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/Schema.java @@ -0,0 +1,754 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.document.DataTypeName; +import com.yahoo.document.Field; +import com.yahoo.schema.derived.SummaryClass; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.ImportedField; +import com.yahoo.schema.document.ImportedFields; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.schema.document.TemporaryImportedFields; +import com.yahoo.schema.document.annotation.SDAnnotationType; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; + +import java.io.Reader; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.TreeMap; +import java.util.logging.Level; +import java.util.stream.Stream; + +/** + * A schema contains a document type, additional fields, rank profiles and document summaries. + * + * @author bratseth + */ +// TODO: Make a class owned by this, for each of these responsibilities: +// Managing indexes, managing attributes, managing summary classes. +// Ensure that after the processing step, all implicit instances of the above types are explicitly represented +public class Schema implements ImmutableSchema { + + private static final String SD_DOC_FIELD_NAME = "sddocname"; + private static final List<String> RESERVED_NAMES = List.of( + "index", "index_url", "summary", "attribute", "select_input", "host", SummaryClass.DOCUMENT_ID_FIELD, + "position", "split_foreach", "tokenize", "if", "else", "switch", "case", SD_DOC_FIELD_NAME, "relevancy"); + + /** The unique name of this schema */ + private String name; + + /** The application package this is constructed from */ + private final ApplicationPackage applicationPackage; + + /** The name of the schema this should inherit all the content of, if any */ + private final Optional<String> inherited; + + /** True if this doesn't define a search, just a document type */ + private final boolean documentsOnly; + + private Boolean rawAsBase64 = null; + + /** The stemming setting of this schema. Default is BEST. */ + private Stemming stemming = null; + + private final FieldSets fieldSets = new FieldSets(Optional.of(this)); + + /** The document contained in this schema */ + private SDDocumentType documentType; + + /** The extra fields of this schema */ + private final Map<String, SDField> fields = new LinkedHashMap<>(); + + private final Map<String, Index> indices = new LinkedHashMap<>(); + + /** The explicitly defined summaries of this schema. _Must_ preserve order. */ + private final Map<String, DocumentSummary> summaries = new LinkedHashMap<>(); + + /** External rank expression files of this */ + private final LargeRankExpressions largeRankExpressions; + + /** Constants that will be available in all rank profiles. */ + // TODO: Remove on Vespa 9: Should always be in a rank profile + private final Map<Reference, RankProfile.Constant> constants = new LinkedHashMap<>(); + + // TODO: Remove on Vespa 9: Should always be in a rank profile + private final Map<String, OnnxModel> onnxModels = new LinkedHashMap<>(); + + /** All imported fields of this (and parent schemas) */ + // TODO: Use empty, not optional + // TODO: Merge this and importedFields + private final Optional<TemporaryImportedFields> temporaryImportedFields = Optional.of(new TemporaryImportedFields(this)); + /** The resulting processed field */ + private Optional<ImportedFields> importedFields = Optional.empty(); + + private final DeployLogger deployLogger; + private final ModelContext.Properties properties; + + private Application owner; + + /** Testing only */ + public Schema(String name, ApplicationPackage applicationPackage) { + this(name, applicationPackage, Optional.empty(), null, new BaseDeployLogger(), new TestProperties()); + } + + public Schema(String name, + ApplicationPackage applicationPackage, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties) { + this(name, applicationPackage, Optional.empty(), fileRegistry, deployLogger, properties); + } + + /** + * Creates a schema + * + * @param name of the schema + * @param inherited the schema this inherits, if any + */ + public Schema(String name, + ApplicationPackage applicationPackage, + Optional<String> inherited, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties) { + this(inherited, applicationPackage, fileRegistry, deployLogger, properties, false); + this.name = Objects.requireNonNull(name, "A schema must have a name"); + } + + protected Schema(ApplicationPackage applicationPackage, FileRegistry fileRegistry, + DeployLogger deployLogger, ModelContext.Properties properties) { + this(Optional.empty(), applicationPackage, fileRegistry, deployLogger, properties, true); + } + + private Schema(Optional<String> inherited, + ApplicationPackage applicationPackage, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties, + boolean documentsOnly) { + this.inherited = inherited; + this.applicationPackage = applicationPackage; + this.deployLogger = deployLogger; + this.properties = properties; + this.documentsOnly = documentsOnly; + largeRankExpressions = new LargeRankExpressions(fileRegistry); + } + + /** + * Assigns the owner of this + * + * @throws IllegalStateException if an owner is already assigned + */ + public void setOwner(Application owner) { + if (this.owner != null) + throw new IllegalStateException("Cannot reassign the owner of " + this); + this.owner = owner; + } + + protected void setName(String name) { this.name = name; } + + @Override + public String getName() {return name; } + + /** Returns true if this only defines a document type, not a full schema */ + public boolean isDocumentsOnly() { + return documentsOnly; + } + + @Override + public Optional<Schema> inherited() { + return inherited.map(name -> owner.schemas().get(name)); + } + + /** + * Returns true if 'raw' fields shall be presented as base64 in summary + * Note that this is temporary and will disappear on Vespa 8 as it will become default, and only option. + * + * @return true if raw shall be encoded as base64 in summary + */ + public boolean isRawAsBase64() { + if (rawAsBase64 != null) return rawAsBase64; + // TODO Vespa 8: flip default: + if (inherited.isEmpty()) return false; + return requireInherited().isRawAsBase64(); + } + + public void enableRawAsBase64(boolean value) { rawAsBase64 = value; } + + /** + * Sets the stemming default of fields. Default is ALL + * + * @param stemming set default stemming for this searchdefinition + * @throws NullPointerException if this is attempted set to null + */ + public void setStemming(Stemming stemming) { + this.stemming = Objects.requireNonNull(stemming, "Stemming cannot be null"); + } + + /** Returns whether fields should be stemmed by default or not. Default is BEST. This is never null. */ + public Stemming getStemming() { + if (stemming != null) return stemming; + if (inherited.isEmpty()) return Stemming.BEST; + return requireInherited().getStemming(); + } + + /** + * Adds a document type which is defined in this search definition + * + * @param document the document type to add + */ + public void addDocument(SDDocumentType document) { + if (documentType != null) { + throw new IllegalArgumentException("Schema cannot have more than one document"); + } + documentType = document; + } + + @Override + public LargeRankExpressions rankExpressionFiles() { return largeRankExpressions; } + + public void add(RankProfile.Constant constant) { + constants.put(constant.name(), constant); + } + + /** Returns an unmodifiable map of the constants declared in this. */ + public Map<Reference, RankProfile.Constant> declaredConstants() { return constants; } + + /** Returns an unmodifiable map of the constants available in this. */ + @Override + public Map<Reference, RankProfile.Constant> constants() { + if (inherited().isEmpty()) return Collections.unmodifiableMap(constants); + if (constants.isEmpty()) return inherited().get().constants(); + + Map<Reference, RankProfile.Constant> allConstants = new LinkedHashMap<>(inherited().get().constants()); + allConstants.putAll(constants); + return allConstants; + } + + public void add(OnnxModel model) { + onnxModels.put(model.getName(), model); + } + + /** Returns an unmodifiable map of the onnx models declared in this. */ + public Map<String, OnnxModel> declaredOnnxModels() { return onnxModels; } + + /** Returns an unmodifiable map of the onnx models available in this. */ + @Override + public Map<String, OnnxModel> onnxModels() { + if (inherited().isEmpty()) return Collections.unmodifiableMap(onnxModels); + if (onnxModels.isEmpty()) return inherited().get().onnxModels(); + + Map<String, OnnxModel> allModels = new LinkedHashMap<>(inherited().get().onnxModels()); + allModels.putAll(onnxModels); + return allModels; + } + + public Optional<TemporaryImportedFields> temporaryImportedFields() { + return temporaryImportedFields; + } + + public Optional<ImportedFields> importedFields() { + return importedFields; + } + + public void setImportedFields(ImportedFields importedFields) { + this.importedFields = Optional.of(importedFields); + } + + @Override + public Stream<ImmutableSDField> allImportedFields() { + return importedFields + .map(fields -> fields.fields().values().stream()) + .orElse(Stream.empty()) + .map(field -> field.asImmutableSDField()); + } + + @Override + public ImmutableSDField getField(String name) { + ImmutableSDField field = getConcreteField(name); + if (field != null) return field; + return allImportedFields() + .filter(f -> f.getName().equals(name)) + .findFirst() + .orElse(null); + } + + @Override + public List<ImmutableSDField> allFieldsList() { + List<ImmutableSDField> all = new ArrayList<>(); + all.addAll(extraFieldList()); + for (Field field : documentType.fieldSet()) { + all.add((ImmutableSDField) field); + } + if (importedFields.isPresent()) { + for (ImportedField imported : importedFields.get().fields().values()) { + all.add(imported.asImmutableSDField()); + } + } + return all; + } + + /** + * Gets a document from this search definition + * + * @param name the name of the document to return + * @return the contained or used document type, or null if there is no such document + */ + public SDDocumentType getDocument(String name) { + if (documentType != null && name.equals(documentType.getName())) { + return documentType; + } + return null; + } + + /** + * @return true if the document has been added. + */ + public boolean hasDocument() { + return documentType != null; + } + + /** + * @return The document in this search. + */ + @Override + public SDDocumentType getDocument() { + return documentType; + } + + /** + * Returns a list of all the fields of this search definition, that is all fields in all documents, in the documents + * they inherit, and all extra fields. The caller receives ownership to the list - subsequent changes to it will not + * impact this + */ + @Override + public List<SDField> allConcreteFields() { + List<SDField> allFields = new ArrayList<>(); + allFields.addAll(extraFieldList()); + for (Field field : documentType.fieldSet()) { + allFields.add((SDField)field); + } + return allFields; + } + + /** + * Returns the content of a ranking expression file + */ + @Override + public Reader getRankingExpression(String fileName) { + return applicationPackage.getRankingExpression(fileName); + } + + public Application application() { return owner; } + + @Override + public ApplicationPackage applicationPackage() { return applicationPackage; } + + @Override + public DeployLogger getDeployLogger() { return deployLogger; } + + @Override + public ModelContext.Properties getDeployProperties() { return properties; } + + /** + * Returns a field defined in this search definition or one if its documents. Fields in this search definition takes + * precedence over document fields having the same name + * + * @param name of the field + * @return the SDField representing the field + */ + @Override + public SDField getConcreteField(String name) { + SDField field = getExtraField(name); + if (field != null) return field; + + return (SDField) documentType.getField(name); + } + + /** + * Returns a field defined in one of the documents of this search definition. + * This does not include the extra fields defined outside the document + * (those accessible through the getExtraField() method). + * + * @param name the name of the field to return + * @return the named field, or null if not found + */ + public SDField getDocumentField(String name) { + return (SDField) documentType.getField(name); + } + + /** + * Adds an extra field of this search definition not contained in a document + * + * @param field to add to the schemas list of external fields + */ + public void addExtraField(SDField field) { + if (fields.containsKey(field.getName())) { + deployLogger.logApplicationPackage(Level.WARNING, "Duplicate field " + field.getName() + " in search definition " + getName()); + } else { + field.setIsExtraField(true); + fields.put(field.getName(), field); + } + } + + public Collection<SDField> extraFieldList() { + if (inherited.isEmpty()) return fields.values(); + var fields = new HashSet<>(requireInherited().extraFieldList()); + fields.addAll(this.fields.values()); + return fields; + } + + public Collection<SDField> allExtraFields() { + Map<String, SDField> extraFields = new TreeMap<>(); + if (inherited.isPresent()) + requireInherited().allExtraFields().forEach(field -> extraFields.put(field.getName(), field)); + for (Field field : documentType.fieldSet()) { + SDField sdField = (SDField) field; + if (sdField.isExtraField()) { + extraFields.put(sdField.getName(), sdField); + } + } + for (SDField field : extraFieldList()) { + extraFields.put(field.getName(), field); + } + return extraFields.values(); + } + + /** + * Returns a field by name, or null if it is not present + * + * @param fieldName the name of the external field to get + * @return the SDField of this name + */ + public SDField getExtraField(String fieldName) { + SDField field = fields.get(fieldName); + if (field != null) return field; + if (inherited.isEmpty()) return null; + return requireInherited().getExtraField(fieldName); + } + + /** + * Adds an explicitly defined index to this search definition + * + * @param index the index to add + */ + public void addIndex(Index index) { + indices.put(index.getName(), index); + } + + /** + * Returns an index, or null if no index with this name has had some <b>explicit settings</b> applied. Even if + * this returns null, the index may be implicitly defined by an indexing statement. This will return the + * index whether it is defined on this schema or on one of its fields. + * + * @param name the name of the index to get + * @return the index requested + */ + @Override + public Index getIndex(String name) { + List<Index> sameIndices = new ArrayList<>(1); + + getSchemaIndex(name).ifPresent(sameIndices::add); + + for (ImmutableSDField field : allConcreteFields()) { + if (field.getIndex(name) != null) + sameIndices.add(field.getIndex(name)); + } + if (sameIndices.size() == 0) return null; + if (sameIndices.size() == 1) return sameIndices.get(0); + return consolidateIndices(sameIndices); + } + + /** Returns the schema level index of this name, in this or any inherited schema, if any */ + Optional<Index> getSchemaIndex(String name) { + if (indices.containsKey(name)) return Optional.of(indices.get(name)); + if (inherited.isPresent()) return requireInherited().getSchemaIndex(name); + return Optional.empty(); + } + + public boolean existsIndex(String name) { + if (indices.get(name) != null) + return true; + if (inherited.isPresent() && requireInherited().existsIndex(name)) + return true; + for (ImmutableSDField field : allConcreteFields()) { + if (field.existsIndex(name)) + return true; + } + return false; + } + + /** + * Consolidates a set of index settings for the same index into one + * + * @param indices the list of indexes to consolidate + * @return the consolidated index + */ + private Index consolidateIndices(List<Index> indices) { + Index first = indices.get(0); + Index consolidated = new Index(first.getName()); + consolidated.setRankType(first.getRankType()); + consolidated.setType(first.getType()); + for (Index current : indices) { + if (current.isPrefix()) { + consolidated.setPrefix(true); + } + if (current.useInterleavedFeatures()) { + consolidated.setInterleavedFeatures(true); + } + + if (consolidated.getRankType() == null) { + consolidated.setRankType(current.getRankType()); + } else { + if (current.getRankType() != null && consolidated.getRankType() != current.getRankType()) + deployLogger.logApplicationPackage(Level.WARNING, "Conflicting rank type settings for " + + first.getName() + " in " + this + ", using " + + consolidated.getRankType()); + } + + for (Iterator<String> j = current.aliasIterator(); j.hasNext();) { + consolidated.addAlias(j.next()); + } + } + return consolidated; + } + + /** All explicitly defined indices, both on this schema itself (returned first) and all its fields */ + @Override + public List<Index> getExplicitIndices() { + List<Index> allIndices = new ArrayList<>(indices.values()); + + if (inherited.isPresent()) { + for (Index inheritedIndex : requireInherited().getExplicitIndices()) { + if ( ! indices.containsKey(inheritedIndex.getName())) // child redefinitions shadows parents + allIndices.add(inheritedIndex); + } + } + + for (ImmutableSDField field : allConcreteFields()) + allIndices.addAll(field.getIndices().values()); + + return Collections.unmodifiableList(allIndices); + } + + /** Adds an explicitly defined summary to this search definition */ + public void addSummary(DocumentSummary summary) { + summaries.put(summary.getName(), summary); + } + + /** + * Returns a summary class defined by this search definition, or null if no summary with this name is defined. + * The default summary, named "default" is always present. + */ + public DocumentSummary getSummary(String name) { + var summary = summaries.get(name); + if (summary != null) return summary; + if (inherited.isEmpty()) return null; + return requireInherited().getSummary(name); + } + + /** + * Returns the first explicit instance found of a summary field with this name, or null if not present (implicitly + * or explicitly) in any summary class. + */ + public SummaryField getSummaryField(String name) { + for (DocumentSummary summary : summaries.values()) { + SummaryField summaryField = summary.getSummaryField(name); + if (summaryField != null) { + return summaryField; + } + } + if (inherited.isEmpty()) return null; + return requireInherited().getSummaryField(name); + } + + /** + * Returns the first explicit instance found of a summary field with this name, or null if not present explicitly in + * any summary class + * + * @param name the name of the explicit summary field to get. + * @return the SummaryField found. + */ + public SummaryField getExplicitSummaryField(String name) { + for (DocumentSummary summary : summaries.values()) { + SummaryField summaryField = summary.getSummaryField(name); + if (summaryField != null && !summaryField.isImplicit()) + return summaryField; + } + if (inherited.isEmpty()) return null; + return requireInherited().getExplicitSummaryField(name); + } + + /** + * Summaries defined by fields of this search definition. The default summary, named "default", is always the first + * one in the returned iterator. + */ + public Map<String, DocumentSummary> getSummaries() { + // Shortcuts + if (inherited.isEmpty()) return summaries; + if (summaries.isEmpty()) return requireInherited().getSummaries(); + + var allSummaries = new LinkedHashMap<>(requireInherited().getSummaries()); + allSummaries.putAll(summaries); + return allSummaries; + } + + /** Returns the summaries defines in this only, not any that are inherited. */ + public Map<String, DocumentSummary> getSummariesInThis() { return Collections.unmodifiableMap(summaries); } + + /** + * Returns all summary fields, of all document summaries, which has the given field as source. + * The list becomes owned by the receiver. + * + * @param field the source field + * @return the list of summary fields found + */ + @Override + public List<SummaryField> getSummaryFields(ImmutableSDField field) { + List<SummaryField> summaryFields = inherited.isPresent() + ? requireInherited().getSummaryFields(field) + : new java.util.ArrayList<>(); + for (DocumentSummary documentSummary : summaries.values()) { + for (SummaryField summaryField : documentSummary.getSummaryFields().values()) { + if (summaryField.hasSource(field.getName())) { + boolean wanted = true; + for (var already : summaryFields) { + if (summaryField == already) wanted = false; + } + if (wanted) { + summaryFields.add(summaryField); + } + } + } + } + return summaryFields; + } + + /** + * Returns one summary field for each summary field name. If there are multiple summary fields with the same + * name, the last one will be used. Multiple fields of the same name should all have the same content in a valid + * search definition, except from the destination set. So this method can be used for all summary handling except + * processing the destination set. The map becomes owned by the receiver. + */ + public Map<String, SummaryField> getUniqueNamedSummaryFields() { + Map<String, SummaryField> summaryFields = inherited.isPresent() ? requireInherited().getUniqueNamedSummaryFields() + : new java.util.LinkedHashMap<>(); + for (DocumentSummary documentSummary : summaries.values()) { + for (SummaryField summaryField : documentSummary.getSummaryFields().values()) { + summaryFields.put(summaryField.getName(), summaryField); + } + } + return summaryFields; + } + + /** Returns the first occurrence of an attribute having this name, or null if none */ + public Attribute getAttribute(String name) { + for (ImmutableSDField field : allConcreteFields()) { + Attribute attribute = field.getAttributes().get(name); + if (attribute != null) { + return attribute; + } + } + return null; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Schema)) { + return false; + } + + Schema other = (Schema)o; + return getName().equals(other.getName()); + } + + @Override + public int hashCode() { + return name.hashCode(); + } + + @Override + public String toString() { + return "schema '" + getName() + "'"; + } + + public boolean isAccessingDiskSummary(SummaryField field) { + if (!field.getTransform().isInMemory()) return true; + if (field.getSources().size() == 0) return isAccessingDiskSummary(getName()); + for (SummaryField.Source source : field.getSources()) { + if (isAccessingDiskSummary(source.getName())) + return true; + } + return false; + } + + private boolean isAccessingDiskSummary(String source) { + SDField field = getConcreteField(source); + if (field == null) return false; + if (field.doesSummarying() && !field.doesAttributing()) return true; + return false; + } + + public FieldSets fieldSets() { return fieldSets; } + + /** Returns the schema inherited by this, or throws if none */ + private Schema requireInherited() { return owner.schemas().get(inherited.get()); } + + /** + * For adding structs defined in document scope + * + * @param dt the struct to add + * @return self, for chaining + */ + public Schema addType(SDDocumentType dt) { + documentType.addType(dt); // TODO This is a very very dirty thing. It must go + return this; + } + + public Schema addAnnotation(SDAnnotationType dt) { + documentType.addAnnotation(dt); + return this; + } + + public void validate(DeployLogger logger) { + if (inherited.isPresent()) { + if (! owner.schemas().containsKey(inherited.get())) + throw new IllegalArgumentException(this + " inherits '" + inherited.get() + + "', but this schema does not exist"); + + // Require schema and document type inheritance to be consistent to keep things simple + // And require it to be explicit so we have the option to support other possibilities later + var parentDocument = owner.schemas().get(inherited.get()).getDocument(); + if ( ! getDocument().inheritedTypes().containsKey(new DataTypeName(parentDocument.getName()))) + throw new IllegalArgumentException(this + " inherits '" + inherited.get() + + "', but its document type does not inherit the parent's document type"); + } + for (var summary : summaries.values()) + summary.validate(logger); + } + + /** Returns true if the given field name is a reserved name */ + public static boolean isReservedName(String name) { + return RESERVED_NAMES.contains(name); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/TemporarySDTypeResolver.java b/config-model/src/main/java/com/yahoo/schema/TemporarySDTypeResolver.java new file mode 100644 index 00000000000..b1ce6f5eb4f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/TemporarySDTypeResolver.java @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.TemporarySDDocumentType; + +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; +import java.util.logging.Level; + +/** + * @author arnej + */ +public class TemporarySDTypeResolver { + + private final DeployLogger deployLogger; + private final Collection<Schema> toProcess; + private final List<SDDocumentType> docTypes = new LinkedList<>(); + + public TemporarySDTypeResolver(Collection<Schema> schemas, DeployLogger deployLogger) { + this.deployLogger = deployLogger; + this.toProcess = schemas; + } + + private SDDocumentType findDocType(String name) { + assert(name != null); + for (var doc : docTypes) { + if (doc.getName().equals(name)) { + return doc; + } + } + deployLogger.logApplicationPackage(Level.WARNING, "No document type in application matching name: "+name); + return null; + } + + public void process() { + docTypes.add(SDDocumentType.VESPA_DOCUMENT); + for (Schema schema : toProcess) { + if (schema.hasDocument()) { + docTypes.add(schema.getDocument()); + } + } + // first, fix inheritance + for (SDDocumentType doc : docTypes) { + for (SDDocumentType inherited : doc.getInheritedTypes()) { + if (inherited instanceof TemporarySDDocumentType) { + var actual = findDocType(inherited.getName()); + if (actual != null) { + doc.inherit(actual); + } else { + deployLogger.logApplicationPackage(Level.WARNING, "Unresolved inherit '"+inherited.getName() +"' for document "+doc.getName()); + } + } + } + } + // next, check owned types (structs only?) + for (SDDocumentType doc : docTypes) { + for (SDDocumentType owned : doc.getTypes()) { + if (owned instanceof TemporarySDDocumentType) { + deployLogger.logApplicationPackage(Level.WARNING, "Schema '"+doc.getName()+"' owned type '"+owned.getName()+"' is temporary, should not happen"); + continue; + } + for (SDDocumentType inherited : owned.getInheritedTypes()) { + if (inherited instanceof TemporarySDDocumentType) { + var actual = doc.getType(inherited.getName()); + if (actual != null) { + owned.inherit(actual); + } else { + deployLogger.logApplicationPackage(Level.WARNING, "Unresolved inherit '"+inherited.getName() +"' for type '"+owned.getName()+"' in document "+doc.getName()); + } + } + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/UnrankedRankProfile.java b/config-model/src/main/java/com/yahoo/schema/UnrankedRankProfile.java new file mode 100644 index 00000000000..6c1f5fc8731 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/UnrankedRankProfile.java @@ -0,0 +1,28 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema; + +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; + +/** + * A low-cost ranking profile to use for watcher queries etc. + * + * @author Vegard Havdal + */ +public class UnrankedRankProfile extends RankProfile { + + public UnrankedRankProfile(Schema schema, RankProfileRegistry rankProfileRegistry) { + super("unranked", schema, rankProfileRegistry); + try { + RankingExpression exp = new RankingExpression("value(0)"); + this.setFirstPhaseRanking(exp); + } catch (ParseException e) { + throw new IllegalArgumentException("Could not parse the ranking expression 'value(0)' when setting up " + + "the 'unranked' rank profile"); + } + this.setIgnoreDefaultRankFeatures(true); + this.setKeepRankCount(0); + this.setRerankCount(0); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java b/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java new file mode 100644 index 00000000000..99f73a75669 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/AttributeFields.java @@ -0,0 +1,324 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.subscription.ConfigInstanceUtil; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.Dictionary; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.Ranking; +import com.yahoo.schema.document.Sorting; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.vespa.indexinglanguage.expressions.ToPositionExpression; + +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isArrayOfSimpleStruct; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfPrimitiveType; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfSimpleStruct; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isSupportedComplexField; + +/** + * The set of all attribute fields defined by a search definition + * + * @author bratseth + */ +public class AttributeFields extends Derived implements AttributesConfig.Producer { + + public enum FieldSet {ALL, FAST_ACCESS} + + private Map<String, Attribute> attributes = new java.util.LinkedHashMap<>(); + private Map<String, Attribute> importedAttributes = new java.util.LinkedHashMap<>(); + + /** Whether this has any position attribute */ + private boolean hasPosition = false; + + public static final AttributeFields empty = new AttributeFields(null); + + public AttributeFields(Schema schema) { + if (schema != null) + derive(schema); + } + + /** Derives everything from a field */ + @Override + protected void derive(ImmutableSDField field, Schema schema) { + if (unsupportedFieldType(field)) { + return; // Ignore complex struct and map fields for indexed search (only supported for streaming search) + } + if (isArrayOfSimpleStruct(field)) { + deriveArrayOfSimpleStruct(field); + } else if (isMapOfSimpleStruct(field)) { + deriveMapOfSimpleStruct(field); + } else if (isMapOfPrimitiveType(field)) { + deriveMapOfPrimitiveType(field); + } else { + deriveAttributes(field); + } + } + + private static boolean unsupportedFieldType(ImmutableSDField field) { + return (field.usesStructOrMap() && + !isSupportedComplexField(field) && + !GeoPos.isAnyPos(field)); + } + + /** Returns an attribute by name, or null if it doesn't exist */ + public Attribute getAttribute(String attributeName) { + return attributes.get(attributeName); + } + + public boolean containsAttribute(String attributeName) { + return getAttribute(attributeName) != null; + } + + /** Derives one attribute. TODO: Support non-default named attributes */ + private void deriveAttributes(ImmutableSDField field) { + if (field.isImportedField()) { + deriveImportedAttributes(field); + return; + } + for (Attribute fieldAttribute : field.getAttributes().values()) { + deriveAttribute(field, fieldAttribute); + } + + if (field.containsExpression(ToPositionExpression.class)) { + // TODO: Move this check to processing and remove this + if (hasPosition) { + throw new IllegalArgumentException("Can not specify more than one set of position attributes per field: " + field.getName()); + } + hasPosition = true; + } + } + + private void applyRanking(ImmutableSDField field, Attribute attribute) { + Ranking ranking = field.getRanking(); + if (ranking != null && ranking.isFilter()) { + attribute.setEnableBitVectors(true); + attribute.setEnableOnlyBitVector(true); + } + } + + private void deriveAttribute(ImmutableSDField field, Attribute fieldAttribute) { + Attribute attribute = getAttribute(fieldAttribute.getName()); + if (attribute == null) { + attributes.put(fieldAttribute.getName(), fieldAttribute); + attribute = getAttribute(fieldAttribute.getName()); + } + applyRanking(field, attribute); + } + + private void deriveImportedAttributes(ImmutableSDField field) { + for (Attribute attribute : field.getAttributes().values()) { + if (!importedAttributes.containsKey(field.getName())) { + importedAttributes.put(field.getName(), attribute); + } + } + } + + private void deriveArrayOfSimpleStruct(ImmutableSDField field) { + for (ImmutableSDField structField : field.getStructFields()) { + deriveAttributeAsArrayType(structField); + } + } + + private void deriveAttributeAsArrayType(ImmutableSDField field) { + if (field.isImportedField()) { + deriveImportedAttributes(field); + return; + } + Attribute attribute = field.getAttributes().get(field.getName()); + if (attribute != null) { + applyRanking(field, attribute); + attributes.put(attribute.getName(), attribute.convertToArray()); + } + } + + private void deriveMapOfSimpleStruct(ImmutableSDField field) { + deriveAttributeAsArrayType(field.getStructField("key")); + deriveMapValueField(field.getStructField("value")); + } + + private void deriveMapValueField(ImmutableSDField valueField) { + for (ImmutableSDField structField : valueField.getStructFields()) { + deriveAttributeAsArrayType(structField); + } + } + + private void deriveMapOfPrimitiveType(ImmutableSDField field) { + deriveAttributeAsArrayType(field.getStructField("key")); + deriveAttributeAsArrayType(field.getStructField("value")); + } + + /** Returns a read only attribute iterator */ + public Iterator attributeIterator() { + return attributes().iterator(); + } + + public Collection<Attribute> attributes() { + return Collections.unmodifiableCollection(attributes.values()); + } + + public Collection<Attribute> structFieldAttributes(String baseFieldName) { + String structPrefix = baseFieldName + "."; + return attributes().stream() + .filter(attribute -> attribute.getName().startsWith(structPrefix)) + .collect(Collectors.toList()); + } + + public String toString() { + return "attributes " + getName(); + } + + @Override + protected String getDerivedName() { + return "attributes"; + } + + @SuppressWarnings("removal") // TODO Vespa 8: remove + private Map<String, AttributesConfig.Attribute.Builder> toMap(List<AttributesConfig.Attribute.Builder> ls) { + Map<String, AttributesConfig.Attribute.Builder> ret = new LinkedHashMap<>(); + for (AttributesConfig.Attribute.Builder builder : ls) { + ret.put((String) ConfigInstanceUtil.getField(builder, "name"), builder); + } + return ret; + } + + @Override + public void getConfig(AttributesConfig.Builder builder) { + //TODO This is just to get some exporting tests to work, Should be undone and removed + getConfig(builder, FieldSet.ALL, 77777, false); + } + + private boolean isAttributeInFieldSet(Attribute attribute, FieldSet fs) { + return (fs == FieldSet.ALL) || ((fs == FieldSet.FAST_ACCESS) && attribute.isFastAccess()); + } + + private AttributesConfig.Attribute.Builder getConfig(String attrName, Attribute attribute, boolean imported) { + AttributesConfig.Attribute.Builder aaB = new AttributesConfig.Attribute.Builder() + .name(attrName) + .datatype(AttributesConfig.Attribute.Datatype.Enum.valueOf(attribute.getType().getExportAttributeTypeName())) + .collectiontype(AttributesConfig.Attribute.Collectiontype.Enum.valueOf(attribute.getCollectionType().getName())); + if (attribute.isRemoveIfZero()) { + aaB.removeifzero(true); + } + if (attribute.isCreateIfNonExistent()) { + aaB.createifnonexistent(true); + } + aaB.enablebitvectors(attribute.isEnabledBitVectors()); + aaB.enableonlybitvector(attribute.isEnabledOnlyBitVector()); + if (attribute.isFastSearch() || attribute.isFastRank()) { + // TODO make a separate fastrank flag in config instead of overloading fastsearch + aaB.fastsearch(true); + } + if (attribute.isFastAccess()) { + aaB.fastaccess(true); + } + if (attribute.isMutable()) { + aaB.ismutable(true); + } + if (attribute.isHuge()) { + aaB.huge(true); + } + if (attribute.isPaged()) { + aaB.paged(true); + } + if (attribute.getSorting().isDescending()) { + aaB.sortascending(false); + } + if (attribute.getSorting().getFunction() != Sorting.Function.UCA) { + aaB.sortfunction(AttributesConfig.Attribute.Sortfunction.Enum.valueOf(attribute.getSorting().getFunction().toString())); + } + if (attribute.getSorting().getStrength() != Sorting.Strength.PRIMARY) { + aaB.sortstrength(AttributesConfig.Attribute.Sortstrength.Enum.valueOf(attribute.getSorting().getStrength().toString())); + } + if (!attribute.getSorting().getLocale().isEmpty()) { + aaB.sortlocale(attribute.getSorting().getLocale()); + } + aaB.arity(attribute.arity()); + aaB.lowerbound(attribute.lowerBound()); + aaB.upperbound(attribute.upperBound()); + aaB.densepostinglistthreshold(attribute.densePostingListThreshold()); + if (attribute.tensorType().isPresent()) { + aaB.tensortype(attribute.tensorType().get().toString()); + } + aaB.imported(imported); + var dma = attribute.distanceMetric(); + aaB.distancemetric(AttributesConfig.Attribute.Distancemetric.Enum.valueOf(dma.toString())); + if (attribute.hnswIndexParams().isPresent()) { + var ib = new AttributesConfig.Attribute.Index.Builder(); + var params = attribute.hnswIndexParams().get(); + ib.hnsw.enabled(true); + ib.hnsw.maxlinkspernode(params.maxLinksPerNode()); + ib.hnsw.neighborstoexploreatinsert(params.neighborsToExploreAtInsert()); + ib.hnsw.multithreadedindexing(params.multiThreadedIndexing()); + aaB.index(ib); + } + Dictionary dictionary = attribute.getDictionary(); + if (dictionary != null) { + aaB.dictionary.type(convert(dictionary.getType())); + aaB.dictionary.match(convert(dictionary.getMatch())); + } + aaB.match(convertMatch(attribute.getCase())); + return aaB; + } + + private static AttributesConfig.Attribute.Dictionary.Type.Enum convert(Dictionary.Type type) { + switch (type) { + case BTREE: + return AttributesConfig.Attribute.Dictionary.Type.BTREE; + case HASH: + return AttributesConfig.Attribute.Dictionary.Type.HASH; + case BTREE_AND_HASH: + return AttributesConfig.Attribute.Dictionary.Type.BTREE_AND_HASH; + } + return AttributesConfig.Attribute.Dictionary.Type.BTREE; + } + private static AttributesConfig.Attribute.Dictionary.Match.Enum convert(Case type) { + switch (type) { + case CASED: + return AttributesConfig.Attribute.Dictionary.Match.CASED; + case UNCASED: + return AttributesConfig.Attribute.Dictionary.Match.UNCASED; + } + return AttributesConfig.Attribute.Dictionary.Match.UNCASED; + } + private static AttributesConfig.Attribute.Match.Enum convertMatch(Case type) { + switch (type) { + case CASED: + return AttributesConfig.Attribute.Match.CASED; + case UNCASED: + return AttributesConfig.Attribute.Match.UNCASED; + } + return AttributesConfig.Attribute.Match.UNCASED; + } + + public void getConfig(AttributesConfig.Builder builder, FieldSet fs, long maxUnCommittedMemory, boolean enableBitVectors) { + for (Attribute attribute : attributes.values()) { + if (isAttributeInFieldSet(attribute, fs)) { + AttributesConfig.Attribute.Builder attrBuilder = getConfig(attribute.getName(), attribute, false); + attrBuilder.maxuncommittedmemory(maxUnCommittedMemory); + if (enableBitVectors && attribute.isFastSearch()) { + attrBuilder.enablebitvectors(true); + } + builder.attribute(attrBuilder); + } + } + if (fs == FieldSet.ALL) { + for (Map.Entry<String, Attribute> entry : importedAttributes.entrySet()) { + AttributesConfig.Attribute.Builder attrBuilder = getConfig(entry.getKey(), entry.getValue(), true); + attrBuilder.maxuncommittedmemory(maxUnCommittedMemory); + builder.attribute(attrBuilder); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Derived.java b/config-model/src/main/java/com/yahoo/schema/derived/Derived.java new file mode 100644 index 00000000000..9943a02a2f2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Derived.java @@ -0,0 +1,141 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.ConfigInstance; +import com.yahoo.config.ConfigInstance.Builder; +import com.yahoo.document.Field; +import com.yahoo.io.IOUtils; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.text.StringUtilities; + +import java.io.IOException; +import java.io.Writer; +import java.lang.reflect.Method; +import java.util.List; + +/** + * Superclass of all derived configurations + * + * @author bratseth + */ +public abstract class Derived implements Exportable { + + private String name; + + public Derived() { + this(""); + } + + public Derived(String name) { + this.name = name; + } + + public String getName() { return name; } + + protected final void setName(String name) { this.name = name; } + + /** + * Derives the content of this configuration. This + * default calls derive(Document) for each document + * and derive(SDField) for each search definition level field + * AND sets the name of this to the name of the input search definition + */ + protected void derive(Schema schema) { + setName(schema.getName()); + derive(schema.getDocument(), schema); + for (Index index : schema.getExplicitIndices()) + derive(index, schema); + for (SDField field : schema.allExtraFields()) + derive(field, schema); + schema.allImportedFields().forEach(importedField -> derive(importedField, schema)); + } + + + /** + * Derives the content of this configuration. This + * default calls derive(SDField) for each document field + */ + protected void derive(SDDocumentType document, Schema schema) { + for (Field field : document.fieldSet()) { + SDField sdField = (SDField) field; + if ( ! sdField.isExtraField()) { + derive(sdField, schema); + } + } + } + + /** + * Derives the content of this configuration. This + * default does nothing. + */ + protected void derive(ImmutableSDField field, Schema schema) {} + + /** + * Derives the content of this configuration. This + * default does nothing. + */ + protected void derive(Index index, Schema schema) { + } + + protected abstract String getDerivedName(); + + /** Returns the value of getName if true, the given number as a string otherwise */ + protected String getIndex(int number, boolean labels) { + return labels ? getName() : String.valueOf(number); + } + + /** + * Exports this derived configuration to its .cfg file + * in toDirectory + * + * @param toDirectory the directory to export to, or null + * + */ + public final void export(String toDirectory) throws IOException { + Writer writer = null; + try { + String fileName = getDerivedName() + ".cfg"; + if (toDirectory != null) + writer = IOUtils.createWriter(toDirectory + "/" + fileName,false); + try { + exportBuilderConfig(writer); + } catch (ReflectiveOperationException | SecurityException | IllegalArgumentException e) { + throw new RuntimeException(e); + } + } + finally { + if (writer != null) IOUtils.closeWriter(writer); + } + } + + /** + * Checks what this is a producer of, instantiate that and export to writer + */ + // TODO move to ReflectionUtil, and move that to unexported pkg + private void exportBuilderConfig(Writer writer) throws ReflectiveOperationException, SecurityException, IllegalArgumentException, IOException { + for (Class<?> intf : getClass().getInterfaces()) { + if (ConfigInstance.Producer.class.isAssignableFrom(intf)) { + Class<?> configClass = intf.getEnclosingClass(); + String builderClassName = configClass.getCanonicalName()+"$Builder"; + Class<?> builderClass = Class.forName(builderClassName); + ConfigInstance.Builder builder = (Builder) builderClass.getDeclaredConstructor().newInstance(); + Method getConfig = getClass().getMethod("getConfig", builderClass); + getConfig.invoke(this, builder); + ConfigInstance inst = (ConfigInstance) configClass.getConstructor(builderClass).newInstance(builder); + List<String> payloadL = ConfigInstance.serialize(inst); + String payload = StringUtilities.implodeMultiline(payloadL); + writer.write(payload); + } + } + } + + @Override + public String getFileName() { + return getDerivedName() + ".cfg"; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java new file mode 100644 index 00000000000..0f5721bbab3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/DerivedConfiguration.java @@ -0,0 +1,212 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.ConfigInstance; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.document.config.DocumenttypesConfig; +import com.yahoo.document.config.DocumentmanagerConfig; +import com.yahoo.io.IOUtils; +import com.yahoo.protect.Validator; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.validation.Validation; +import com.yahoo.vespa.config.search.AttributesConfig; +import com.yahoo.vespa.config.search.core.RankingConstantsConfig; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.io.IOException; +import java.io.Writer; + +/** + * A set of all derived configuration of a schema. Use this as a facade to individual configurations when + * necessary. + * + * @author bratseth + */ +public class DerivedConfiguration implements AttributesConfig.Producer { + + private final Schema schema; + private Summaries summaries; + private SummaryMap summaryMap; + private Juniperrc juniperrc; + private AttributeFields attributeFields; + private RankProfileList rankProfileList; + private IndexingScript indexingScript; + private IndexInfo indexInfo; + private SchemaInfo schemaInfo; + private VsmFields streamingFields; + private VsmSummary streamingSummary; + private IndexSchema indexSchema; + private ImportedFields importedFields; + private final QueryProfileRegistry queryProfiles; + private final long maxUncommittedMemory; + private final boolean enableBitVectors; + + /** + * Creates a complete derived configuration from a search definition. + * Only used in tests. + * + * @param schema the search to derive a configuration from. Derived objects will be snapshots, but this argument is + * live. Which means that this object will be inconsistent when the given search definition is later + * modified. + * @param rankProfileRegistry a {@link com.yahoo.schema.RankProfileRegistry} + */ + public DerivedConfiguration(Schema schema, RankProfileRegistry rankProfileRegistry) { + this(schema, rankProfileRegistry, new QueryProfileRegistry()); + } + + DerivedConfiguration(Schema schema, RankProfileRegistry rankProfileRegistry, QueryProfileRegistry queryProfiles) { + this(schema, new DeployState.Builder().rankProfileRegistry(rankProfileRegistry).queryProfiles(queryProfiles).build()); + } + + /** + * Creates a complete derived configuration snapshot from a schema. + * + * @param schema the schema to derive a configuration from. Derived objects will be snapshots, but this + * argument is live. Which means that this object will be inconsistent if the given + * schema is later modified. + */ + public DerivedConfiguration(Schema schema, DeployState deployState) { + Validator.ensureNotNull("Schema", schema); + this.schema = schema; + this.queryProfiles = deployState.getQueryProfiles().getRegistry(); + this.maxUncommittedMemory = deployState.getProperties().featureFlags().maxUnCommittedMemory(); + this.enableBitVectors = deployState.getProperties().featureFlags().enableBitVectors(); + if ( ! schema.isDocumentsOnly()) { + streamingFields = new VsmFields(schema); + streamingSummary = new VsmSummary(schema); + } + if ( ! schema.isDocumentsOnly()) { + attributeFields = new AttributeFields(schema); + summaries = new Summaries(schema, deployState.getDeployLogger(), deployState.getProperties().featureFlags()); + summaryMap = new SummaryMap(schema); + juniperrc = new Juniperrc(schema); + rankProfileList = new RankProfileList(schema, schema.rankExpressionFiles(), attributeFields, deployState); + indexingScript = new IndexingScript(schema); + indexInfo = new IndexInfo(schema); + schemaInfo = new SchemaInfo(schema, deployState.rankProfileRegistry(), summaries, summaryMap); + indexSchema = new IndexSchema(schema); + importedFields = new ImportedFields(schema); + } + Validation.validate(this, schema); + } + + /** + * Exports a complete set of configuration-server format config files. + * + * @param toDirectory the directory to export to, current dir if null + * @throws IOException if exporting fails, some files may still be created + */ + public void export(String toDirectory) throws IOException { + if (!schema.isDocumentsOnly()) { + summaries.export(toDirectory); + summaryMap.export(toDirectory); + juniperrc.export(toDirectory); + attributeFields.export(toDirectory); + streamingFields.export(toDirectory); + streamingSummary.export(toDirectory); + indexSchema.export(toDirectory); + rankProfileList.export(toDirectory); + indexingScript.export(toDirectory); + indexInfo.export(toDirectory); + importedFields.export(toDirectory); + schemaInfo.export(toDirectory); + } + } + + public static void exportDocuments(DocumentmanagerConfig.Builder documentManagerCfg, String toDirectory) throws IOException { + exportCfg(new DocumentmanagerConfig(documentManagerCfg), toDirectory + "/" + "documentmanager.cfg"); + } + + public static void exportDocuments(DocumenttypesConfig.Builder documentTypesCfg, String toDirectory) throws IOException { + exportCfg(new DocumenttypesConfig(documentTypesCfg), toDirectory + "/" + "documenttypes.cfg"); + } + + public static void exportQueryProfiles(QueryProfileRegistry queryProfileRegistry, String toDirectory) throws IOException { + exportCfg(new QueryProfiles(queryProfileRegistry, (level, message) -> {}).getConfig(), toDirectory + "/" + "query-profiles.cfg"); + } + + public void exportConstants(String toDirectory) throws IOException { + RankingConstantsConfig.Builder b = new RankingConstantsConfig.Builder(); + rankProfileList.getConfig(b); + exportCfg(b.build(), toDirectory + "/" + "ranking-constants.cfg"); + } + + private static void exportCfg(ConfigInstance instance, String fileName) throws IOException { + Writer writer = null; + try { + writer = IOUtils.createWriter(fileName, false); + writer.write(instance.toString()); + writer.write("\n"); + } finally { + if (writer != null) { + IOUtils.closeWriter(writer); + } + } + } + + public Summaries getSummaries() { + return summaries; + } + + public AttributeFields getAttributeFields() { + return attributeFields; + } + + @Override + public void getConfig(AttributesConfig.Builder builder) { + getConfig(builder, AttributeFields.FieldSet.ALL); + } + + public void getConfig(AttributesConfig.Builder builder, AttributeFields.FieldSet fs) { + attributeFields.getConfig(builder, fs, maxUncommittedMemory, enableBitVectors); + } + + public IndexingScript getIndexingScript() { + return indexingScript; + } + + public IndexInfo getIndexInfo() { + return indexInfo; + } + + public SchemaInfo getSchemaInfo() { return schemaInfo; } + + public void setIndexingScript(IndexingScript script) { + this.indexingScript = script; + } + + public Schema getSchema() { return schema; } + + public RankProfileList getRankProfileList() { + return rankProfileList; + } + + public VsmSummary getVsmSummary() { + return streamingSummary; + } + + public VsmFields getVsmFields() { + return streamingFields; + } + + public IndexSchema getIndexSchema() { + return indexSchema; + } + + public Juniperrc getJuniperrc() { + return juniperrc; + } + + public SummaryMap getSummaryMap() { + return summaryMap; + } + + public ImportedFields getImportedFields() { + return importedFields; + } + + public QueryProfileRegistry getQueryProfiles() { return queryProfiles; } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Deriver.java b/config-model/src/main/java/com/yahoo/schema/derived/Deriver.java new file mode 100644 index 00000000000..44bea43a8e3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Deriver.java @@ -0,0 +1,48 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; +import com.yahoo.document.config.DocumenttypesConfig; +import com.yahoo.document.config.DocumentmanagerConfig; +import com.yahoo.schema.ApplicationBuilder; +import com.yahoo.schema.parser.ParseException; +import com.yahoo.vespa.configmodel.producers.DocumentManager; +import com.yahoo.vespa.configmodel.producers.DocumentTypes; +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +/** + * Facade for deriving configs from schemas + * + * @author bratseth + */ +public class Deriver { + + public static ApplicationBuilder getSchemaBuilder(List<String> schemas) { + ApplicationBuilder builder = new ApplicationBuilder(); + try { + for (String schema : schemas) + builder.addSchemaFile(schema); + } catch (ParseException | IOException e) { + throw new IllegalArgumentException(e); + } + builder.build(true); + return builder; + } + + public static DocumentmanagerConfig.Builder getDocumentManagerConfig(String sd) { + return getDocumentManagerConfig(Collections.singletonList(sd)); + } + + public static DocumentmanagerConfig.Builder getDocumentManagerConfig(List<String> schemas) { + return new DocumentManager().produce(getSchemaBuilder(schemas).getModel(), new DocumentmanagerConfig.Builder()); + } + + public static DocumenttypesConfig.Builder getDocumentTypesConfig(String schema) { + return getDocumentTypesConfig(Collections.singletonList(schema)); + } + + public static DocumenttypesConfig.Builder getDocumentTypesConfig(List<String> schemas) { + return new DocumentTypes().produce(getSchemaBuilder(schemas).getModel(), new DocumenttypesConfig.Builder()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Exportable.java b/config-model/src/main/java/com/yahoo/schema/derived/Exportable.java new file mode 100644 index 00000000000..4fccfb5d9f8 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Exportable.java @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +/** + * Classes exportable to configurations + * + * @author bratseth + */ +public interface Exportable { + + /** + * Exports the configuration of this object + * + * + * @param toDirectory the directory to export to, does not write to disk if null + * @throws java.io.IOException if exporting fails, some files may still be created + */ + public void export(String toDirectory) throws java.io.IOException; + + /** + * The (short) name of the exported file + * @return a String with the (short) name of the exported file + */ + public String getFileName(); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FieldRankSettings.java b/config-model/src/main/java/com/yahoo/schema/derived/FieldRankSettings.java new file mode 100644 index 00000000000..ccb25df031c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/FieldRankSettings.java @@ -0,0 +1,75 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.collections.Pair; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * The rank settings of a field used for native rank features. + * + * @author geirst + */ +public class FieldRankSettings { + + private final String fieldName; + + private final Map<String, NativeTable> tables = new LinkedHashMap<>(); + + public FieldRankSettings(String fieldName) { + this.fieldName = fieldName; + } + + public void addTable(NativeTable table) { + NativeTable existing = tables.get(table.getType().getName()); + if (existing != null) { + // TODO: Throw? + return; + } + tables.put(table.getType().getName(), table); + } + + public static boolean isIndexFieldTable(NativeTable table) { + return isFieldMatchTable(table) || isProximityTable(table); + } + + public static boolean isAttributeFieldTable(NativeTable table) { + return isAttributeMatchTable(table); + } + + private static boolean isFieldMatchTable(NativeTable table) { + return (table.getType().equals(NativeTable.Type.FIRST_OCCURRENCE) || + table.getType().equals(NativeTable.Type.OCCURRENCE_COUNT)); + } + + private static boolean isAttributeMatchTable(NativeTable table) { + return (table.getType().equals(NativeTable.Type.WEIGHT)); + } + + private static boolean isProximityTable(NativeTable table) { + return (table.getType().equals(NativeTable.Type.PROXIMITY) || + table.getType().equals(NativeTable.Type.REVERSE_PROXIMITY)); + } + + public List<Pair<String, String>> deriveRankProperties() { + List<Pair<String, String>> properties = new ArrayList<>(); + for (NativeTable table : tables.values()) { + if (isFieldMatchTable(table)) + properties.add(new Pair<>("nativeFieldMatch." + table.getType().getName() + "." + fieldName, table.getName())); + if (isAttributeMatchTable(table)) + properties.add(new Pair<>("nativeAttributeMatch." + table.getType().getName() + "." + fieldName, table.getName())); + if (isProximityTable(table)) + properties.add(new Pair<>("nativeProximity." + table.getType().getName() + "." + fieldName, table.getName())); + } + return properties; + } + + @Override + public String toString() { + return "rank settings of field " + fieldName; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FieldResultTransform.java b/config-model/src/main/java/com/yahoo/schema/derived/FieldResultTransform.java new file mode 100644 index 00000000000..99b2925d714 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/FieldResultTransform.java @@ -0,0 +1,57 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.vespa.documentmodel.SummaryTransform; + +/** + * The result transformation of a named field + * + * @author bratseth + */ +public class FieldResultTransform { + + private final String fieldName; + + private SummaryTransform transform; + + private final String argument; + + public FieldResultTransform(String fieldName, SummaryTransform transform, String argument) { + this.fieldName = fieldName; + this.transform = transform; + this.argument = argument; + } + + public String getFieldName() { return fieldName; } + + public SummaryTransform getTransform() { return transform; } + + public void setTransform(SummaryTransform transform) { this.transform = transform; } + + /** Returns the argument of this (used as input to the backend docsum rewriter) */ + public String getArgument() { return argument; } + + public int hashCode() { + return fieldName.hashCode() + 11 * transform.hashCode() + 17 * argument.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (! (o instanceof FieldResultTransform)) return false; + FieldResultTransform other = (FieldResultTransform)o; + + return + this.fieldName.equals(other.fieldName) && + this.transform.equals(other.transform) && + this.argument.equals(other.argument); + } + + @Override + public String toString() { + String sourceString = ""; + if ( ! argument.equals(fieldName)) + sourceString = " (argument: " + argument + ")"; + return "field " + fieldName + ": " + transform + sourceString; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedConstants.java b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedConstants.java new file mode 100644 index 00000000000..05f6be2f6f1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedConstants.java @@ -0,0 +1,87 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.schema.DistributableResource; +import com.yahoo.schema.RankProfile; +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.config.search.core.RankingConstantsConfig; + +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Constant values for ranking/model execution tied to a rank profile, + * to be distributed as files. + * + * @author bratseth + */ +public class FileDistributedConstants { + + private final Map<String, DistributableConstant> constants; + + public FileDistributedConstants(FileRegistry fileRegistry, Collection<RankProfile.Constant> constants) { + Map<String, DistributableConstant> distributableConstants = new LinkedHashMap<>(); + for (var constant : constants) { + if ( ! constant.valuePath().isPresent()) continue; + + var distributableConstant = new DistributableConstant(constant.name().simpleArgument().get(), + constant.type(), + constant.valuePath().get(), + constant.pathType().get()); + distributableConstant.validate(); + distributableConstant.register(fileRegistry); + distributableConstants.put(distributableConstant.getName(), distributableConstant); + } + this.constants = Collections.unmodifiableMap(distributableConstants); + } + + /** Returns a read-only map of the constants in this indexed by name. */ + public Map<String, DistributableConstant> asMap() { return constants; } + + public void getConfig(RankingConstantsConfig.Builder builder) { + for (var constant : constants.values()) { + builder.constant(new RankingConstantsConfig.Constant.Builder() + .name(constant.getName()) + .fileref(constant.getFileReference()) + .type(constant.getType())); + } + } + + public static class DistributableConstant extends DistributableResource { + + private final TensorType tensorType; + + public DistributableConstant(String name, TensorType type, String fileName) { + this(name, type, fileName, PathType.FILE); + } + + public DistributableConstant(String name, TensorType type, String fileName, PathType pathType) { + super(name, fileName, pathType); + this.tensorType = type; + validate(); + } + + public TensorType getTensorType() { return tensorType; } + public String getType() { return tensorType.toString(); } + + public void validate() { + super.validate(); + if (tensorType == null) + throw new IllegalArgumentException("Ranking constant '" + getName() + "' must have a type."); + if (tensorType.dimensions().stream().anyMatch(d -> d.isIndexed() && d.size().isEmpty())) + throw new IllegalArgumentException("Illegal type in field " + getName() + " type " + tensorType + + ": Dense tensor dimensions must have a size"); + } + + @Override + public String toString() { + return super.toString() + "' of type '" + tensorType + "'"; + } + + } + +} + diff --git a/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java new file mode 100644 index 00000000000..b5c3909c78c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/FileDistributedOnnxModels.java @@ -0,0 +1,60 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.schema.OnnxModel; +import com.yahoo.vespa.config.search.core.OnnxModelsConfig; + +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.logging.Logger; + +/** + * ONNX models distributed as files. + * + * @author bratseth + */ +public class FileDistributedOnnxModels { + + private static final Logger log = Logger.getLogger(FileDistributedOnnxModels.class.getName()); + + private final Map<String, OnnxModel> models; + + public FileDistributedOnnxModels(FileRegistry fileRegistry, Collection<OnnxModel> models) { + Map<String, OnnxModel> distributableModels = new LinkedHashMap<>(); + for (var model : models) { + model.validate(); + model.register(fileRegistry); + distributableModels.put(model.getName(), model); + } + this.models = Collections.unmodifiableMap(distributableModels); + } + + public Map<String, OnnxModel> asMap() { return models; } + + public void getConfig(OnnxModelsConfig.Builder builder) { + for (OnnxModel model : models.values()) { + if ("".equals(model.getFileReference())) + log.warning("Illegal file reference " + model); // Let tests pass ... we should find a better way + else { + OnnxModelsConfig.Model.Builder modelBuilder = new OnnxModelsConfig.Model.Builder(); + modelBuilder.dry_run_on_setup(true); + modelBuilder.name(model.getName()); + modelBuilder.fileref(model.getFileReference()); + model.getInputMap().forEach((name, source) -> modelBuilder.input(new OnnxModelsConfig.Model.Input.Builder().name(name).source(source))); + model.getOutputMap().forEach((name, as) -> modelBuilder.output(new OnnxModelsConfig.Model.Output.Builder().name(name).as(as))); + if (model.getStatelessExecutionMode().isPresent()) + modelBuilder.stateless_execution_mode(model.getStatelessExecutionMode().get()); + if (model.getStatelessInterOpThreads().isPresent()) + modelBuilder.stateless_interop_threads(model.getStatelessInterOpThreads().get()); + if (model.getStatelessIntraOpThreads().isPresent()) + modelBuilder.stateless_intraop_threads(model.getStatelessIntraOpThreads().get()); + + builder.model(modelBuilder); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/ImportedFields.java b/config-model/src/main/java/com/yahoo/schema/derived/ImportedFields.java new file mode 100644 index 00000000000..fa3f49f06d5 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/ImportedFields.java @@ -0,0 +1,105 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.ImportedComplexField; +import com.yahoo.schema.document.ImportedField; +import com.yahoo.vespa.config.search.ImportedFieldsConfig; + +import java.util.Optional; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isArrayOfSimpleStruct; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfPrimitiveType; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfSimpleStruct; + +/** + * This class derives imported fields from search definition and produces imported-fields.cfg as needed by the search backend. + * + * @author geirst + */ +public class ImportedFields extends Derived implements ImportedFieldsConfig.Producer { + + private Optional<com.yahoo.schema.document.ImportedFields> importedFields = Optional.empty(); + + public ImportedFields(Schema schema) { + derive(schema); + } + + @Override + protected void derive(Schema schema) { + importedFields = schema.importedFields(); + } + + @Override + protected String getDerivedName() { + return "imported-fields"; + } + + @Override + public void getConfig(ImportedFieldsConfig.Builder builder) { + if (importedFields.isPresent()) { + importedFields.get().fields().forEach( (name, field) -> considerField(builder, field)); + } + } + + private static boolean isNestedFieldName(String fieldName) { + return fieldName.indexOf('.') != -1; + } + + private static void considerField(ImportedFieldsConfig.Builder builder, ImportedField field) { + if (field instanceof ImportedComplexField) { + considerComplexField(builder, (ImportedComplexField) field); + } else { + considerSimpleField(builder, field); + } + } + + private static void considerComplexField(ImportedFieldsConfig.Builder builder, ImportedComplexField field) { + ImmutableSDField targetField = field.targetField(); + if (GeoPos.isAnyPos(targetField)) { + // no action needed + } else if (isArrayOfSimpleStruct(targetField)) { + considerNestedFields(builder, field); + } else if (isMapOfSimpleStruct(targetField)) { + considerSimpleField(builder, field.getNestedField("key")); + considerNestedFields(builder, field.getNestedField("value")); + } else if (isMapOfPrimitiveType(targetField)) { + considerSimpleField(builder, field.getNestedField("key")); + considerSimpleField(builder, field.getNestedField("value")); + } + } + + private static void considerNestedFields(ImportedFieldsConfig.Builder builder, ImportedField field) { + if (field instanceof ImportedComplexField) { + ImportedComplexField complexField = (ImportedComplexField) field; + complexField.getNestedFields().forEach(nestedField -> considerSimpleField(builder, nestedField)); + } + } + + private static void considerSimpleField(ImportedFieldsConfig.Builder builder, ImportedField field) { + ImmutableSDField targetField = field.targetField(); + String targetFieldName = targetField.getName(); + if (!isNestedFieldName(targetFieldName)) { + if (targetField.doesAttributing()) { + builder.attribute.add(createAttributeBuilder(field)); + } + } else { + Attribute attribute = targetField.getAttribute(); + if (attribute != null) { + builder.attribute.add(createAttributeBuilder(field)); + } + } + } + + private static ImportedFieldsConfig.Attribute.Builder createAttributeBuilder(ImportedField field) { + ImportedFieldsConfig.Attribute.Builder result = new ImportedFieldsConfig.Attribute.Builder(); + result.name(field.fieldName()); + result.referencefield(field.reference().referenceField().getName()); + result.targetfield(field.targetField().getName()); + return result; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Index.java b/config-model/src/main/java/com/yahoo/schema/derived/Index.java new file mode 100644 index 00000000000..3b5e617d3dc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Index.java @@ -0,0 +1,64 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.NumericDataType; +import com.yahoo.document.datatypes.*; + +/** + * A type of an index structure + * + * @author bratseth + */ +public class Index { + + /** The index type enumeration */ + public static class Type { + + public static final Type TEXT=new Type("text"); + public static final Type INT64=new Type("long"); + public static final Type BOOLEANTREE=new Type("booleantree"); + + private String name; + + private Type(String name) { + this.name=name; + } + + public int hashCode() { + return name.hashCode(); + } + + public String getName() { return name; } + + public boolean equals(Object other) { + if ( ! (other instanceof Type)) return false; + return this.name.equals(((Type)other).name); + } + + public String toString() { + return "type: " + name; + } + + } + + /** Sets the right index type from a field type */ + public static Type convertType(DataType fieldType) { + FieldValue fval = fieldType.createFieldValue(); + if (fieldType instanceof NumericDataType) { + return Type.INT64; + } else if (fval instanceof StringFieldValue) { + return Type.TEXT; + } else if (fval instanceof Raw) { + return Type.BOOLEANTREE; + } else if (fval instanceof PredicateFieldValue) { + return Type.BOOLEANTREE; + } else if (fieldType instanceof CollectionDataType) { + return convertType(((CollectionDataType) fieldType).getNestedType()); + } else { + throw new IllegalArgumentException("Don't know which index type to " + + "convert " + fieldType + " to"); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java new file mode 100644 index 00000000000..4887ad52974 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java @@ -0,0 +1,595 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.document.MapDataType; +import com.yahoo.document.NumericDataType; +import com.yahoo.document.PrimitiveDataType; +import com.yahoo.document.StructuredDataType; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.FieldSet; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.Stemming; +import com.yahoo.schema.processing.ExactMatch; +import com.yahoo.schema.processing.NGramMatch; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.search.config.IndexInfoConfig; + +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +/** + * Per-index commands which should be applied to queries prior to searching + * + * @author bratseth + */ +public class IndexInfo extends Derived implements IndexInfoConfig.Producer { + + private static final String CMD_ATTRIBUTE = "attribute"; + private static final String CMD_DEFAULT_POSITION = "default-position"; + private static final String CMD_DYNTEASER = "dynteaser"; + private static final String CMD_FULLURL = "fullurl"; + private static final String CMD_HIGHLIGHT = "highlight"; + private static final String CMD_INDEX = "index"; + private static final String CMD_LOWERCASE = "lowercase"; + private static final String CMD_NORMALIZE = "normalize"; + private static final String CMD_STEM = "stem"; + private static final String CMD_URLHOST = "urlhost"; + private static final String CMD_WORD = "word"; + private static final String CMD_PLAIN_TOKENS = "plain-tokens"; + private static final String CMD_MULTIVALUE = "multivalue"; + private static final String CMD_FAST_SEARCH = "fast-search"; + private static final String CMD_PREDICATE = "predicate"; + private static final String CMD_PREDICATE_BOUNDS = "predicate-bounds"; + private static final String CMD_NUMERICAL = "numerical"; + private static final String CMD_PHRASE_SEGMENTING = "phrase-segmenting"; + private final Set<IndexCommand> commands = new java.util.LinkedHashSet<>(); + private final Map<String, String> aliases = new java.util.LinkedHashMap<>(); + private final Map<String, FieldSet> fieldSets; + private Schema schema; + + public IndexInfo(Schema schema) { + this.fieldSets = schema.fieldSets().userFieldSets(); + addIndexCommand("sddocname", CMD_INDEX); + addIndexCommand("sddocname", CMD_WORD); + derive(schema); + } + + @Override + protected void derive(Schema schema) { + super.derive(schema); // Derive per field + this.schema = schema; + // Populate fieldsets with actual field objects, bit late to do that here but + for (FieldSet fs : fieldSets.values()) { + for (String fieldName : fs.getFieldNames()) { + fs.fields().add(schema.getField(fieldName)); + } + } + // Must follow, because index settings overrides field settings + for (Index index : schema.getExplicitIndices()) { + derive(index, schema); + } + + // Commands for summary fields + // TODO: Move to fieldinfo and implement differently. This is not right + for (SummaryField summaryField : schema.getUniqueNamedSummaryFields().values()) { + if (summaryField.getTransform().isTeaser()) { + addIndexCommand(summaryField.getName(), CMD_DYNTEASER); + } + if (summaryField.getTransform().isBolded()) { + addIndexCommand(summaryField.getName(), CMD_HIGHLIGHT); + } + } + } + + private static boolean isPositionField(ImmutableSDField field) { + return GeoPos.isAnyPos(field); + } + + @Override + protected void derive(ImmutableSDField field, Schema schema) { + derive(field, schema, false); + } + + protected void derive(ImmutableSDField field, Schema schema, boolean inPosition) { + if (field.getDataType().equals(DataType.PREDICATE)) { + addIndexCommand(field, CMD_PREDICATE); + Index index = field.getIndex(field.getName()); + if (index != null) { + BooleanIndexDefinition options = index.getBooleanIndexDefiniton(); + if (options.hasLowerBound() || options.hasUpperBound()) { + addIndexCommand(field.getName(), CMD_PREDICATE_BOUNDS + " [" + + (options.hasLowerBound() ? Long.toString(options.getLowerBound()) : "") + ".." + + (options.hasUpperBound() ? Long.toString(options.getUpperBound()) : "") + "]"); + } + } + } + + // Field level aliases + for (Map.Entry<String, String> e : field.getAliasToName().entrySet()) { + String alias = e.getKey(); + String name = e.getValue(); + addIndexAlias(alias, name); + } + boolean isPosition = isPositionField(field); + if (field.usesStructOrMap()) { + for (ImmutableSDField structField : field.getStructFields()) { + derive(structField, schema, isPosition); // Recursion + } + } + + if (isPosition) { + addIndexCommand(field.getName(), CMD_DEFAULT_POSITION); + } + + addIndexCommand(field, CMD_INDEX); // List the indices + + if (needLowerCase(field)) { + addIndexCommand(field, CMD_LOWERCASE); + } + + if (field.getDataType().isMultivalue()) { + addIndexCommand(field, CMD_MULTIVALUE); + } + + Attribute attribute = field.getAttribute(); + if ((field.doesAttributing() || (attribute != null && !inPosition)) && !field.doesIndexing()) { + addIndexCommand(field.getName(), CMD_ATTRIBUTE); + if (attribute != null && attribute.isFastSearch()) + addIndexCommand(field.getName(), CMD_FAST_SEARCH); + } else if (field.doesIndexing()) { + if (stemSomehow(field, schema)) { + addIndexCommand(field, stemCmd(field, schema), new StemmingOverrider(this, schema)); + } + if (normalizeAccents(field)) { + addIndexCommand(field, CMD_NORMALIZE); + } + if (field.getMatching() == null || field.getMatching().getType().equals(MatchType.TEXT)) { + addIndexCommand(field, CMD_PLAIN_TOKENS); + } + } + + if (isUriField(field)) { + addUriIndexCommands(field); + } + + if (field.getDataType().getPrimitiveType() instanceof NumericDataType) { + addIndexCommand(field, CMD_NUMERICAL); + } + + // Explicit commands + for (String command : field.getQueryCommands()) { + addIndexCommand(field, command); + } + + } + + private static boolean isAnyChildString(DataType dataType) { + PrimitiveDataType primitive = dataType.getPrimitiveType(); + if (primitive == PrimitiveDataType.STRING) return true; + if (primitive != null) return false; + if (dataType instanceof StructuredDataType) { + StructuredDataType structured = (StructuredDataType) dataType; + for (Field field : structured.getFields()) { + if (isAnyChildString(field.getDataType())) return true; + } + } else if (dataType instanceof MapDataType) { + MapDataType mapType = (MapDataType) dataType; + return isAnyChildString(mapType.getKeyType()) || isAnyChildString(mapType.getValueType()); + } + return false; + } + + private static boolean needLowerCase(ImmutableSDField field) { + return field.doesIndexing() + || field.doesLowerCasing() + || ((field.doesAttributing() || (field.getAttribute() != null)) + && isAnyChildString(field.getDataType()) + && field.getMatching().getCase().equals(Case.UNCASED)); + } + + static String stemCmd(ImmutableSDField field, Schema schema) { + return CMD_STEM + ":" + field.getStemming(schema).toStemMode(); + } + + private boolean stemSomehow(ImmutableSDField field, Schema schema) { + if (field.getStemming(schema).equals(Stemming.NONE)) return false; + return isTypeOrNested(field, DataType.STRING); + } + + private boolean normalizeAccents(ImmutableSDField field) { + return field.getNormalizing().doRemoveAccents() && isTypeOrNested(field, DataType.STRING); + } + + private boolean isTypeOrNested(ImmutableSDField field, DataType type) { + return field.getDataType().equals(type) || field.getDataType().equals(DataType.getArray(type)) || + field.getDataType().equals(DataType.getWeightedSet(type)); + } + + private boolean isUriField(ImmutableSDField field) { + DataType fieldType = field.getDataType(); + if (DataType.URI.equals(fieldType)) { + return true; + } + if (fieldType instanceof CollectionDataType && + DataType.URI.equals(((CollectionDataType)fieldType).getNestedType())) + { + return true; + } + return false; + } + + private void addUriIndexCommands(ImmutableSDField field) { + String fieldName = field.getName(); + addIndexCommand(fieldName, CMD_FULLURL); + addIndexCommand(fieldName, CMD_LOWERCASE); + addIndexCommand(fieldName + "." + fieldName, CMD_FULLURL); + addIndexCommand(fieldName + "." + fieldName, CMD_LOWERCASE); + addIndexCommand(fieldName + ".path", CMD_FULLURL); + addIndexCommand(fieldName + ".path", CMD_LOWERCASE); + addIndexCommand(fieldName + ".query", CMD_FULLURL); + addIndexCommand(fieldName + ".query", CMD_LOWERCASE); + addIndexCommand(fieldName + ".hostname", CMD_URLHOST); + addIndexCommand(fieldName + ".hostname", CMD_LOWERCASE); + + // XXX hack + Index index = field.getIndex("hostname"); + if (index != null) { + addIndexCommand(index, CMD_URLHOST); + } + } + + /** + * Sets a command for all indices of a field + */ + private void addIndexCommand(Index index, String command) { + addIndexCommand(index.getName(), command); + } + + /** + * Sets a command for all indices of a field + */ + private void addIndexCommand(ImmutableSDField field, String command) { + addIndexCommand(field, command, null); + } + + /** + * Sets a command for all indices of a field + */ + private void addIndexCommand(ImmutableSDField field, String command, IndexOverrider overrider) { + if (overrider == null || !overrider.override(field.getName(), command, field)) { + addIndexCommand(field.getName(), command); + } + } + + private void addIndexCommand(String indexName, String command) { + commands.add(new IndexCommand(indexName, command)); + } + + private void addIndexAlias(String alias, String indexName) { + aliases.put(alias, indexName); + } + + /** + * Returns whether a particular command is prsent in this index info + */ + public boolean hasCommand(String indexName, String command) { + return commands.contains(new IndexCommand(indexName, command)); + } + + private boolean notInCommands(String index) { + for (IndexCommand command : commands) { + if (command.getIndex().equals(index)) { + return false; + } + } + return true; + } + + @Override + public void getConfig(IndexInfoConfig.Builder builder) { + IndexInfoConfig.Indexinfo.Builder iiB = new IndexInfoConfig.Indexinfo.Builder(); + iiB.name(getName()); + for (IndexCommand command : commands) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(command.getIndex()) + .command(command.getCommand())); + } + // Make user defined field sets searchable + for (FieldSet fieldSet : fieldSets.values()) { + if (notInCommands(fieldSet.getName())) { + addFieldSetCommands(iiB, fieldSet); + } + } + + for (Map.Entry<String, String> e : aliases.entrySet()) { + iiB.alias( + new IndexInfoConfig.Indexinfo.Alias.Builder() + .alias(e.getKey()) + .indexname(e.getValue())); + } + builder.indexinfo(iiB); + } + + // TODO: Move this to the FieldSetSettings processor (and rename it) as that already has to look at this. + private void addFieldSetCommands(IndexInfoConfig.Indexinfo.Builder iiB, FieldSet fieldSet) { + for (String qc : fieldSet.queryCommands()) + iiB.command(new IndexInfoConfig.Indexinfo.Command.Builder().indexname(fieldSet.getName()).command(qc)); + boolean anyIndexing = false; + boolean anyAttributing = false; + boolean anyLowerCasing = false; + boolean anyStemming = false; + boolean anyNormalizing = false; + String phraseSegmentingCommand = null; + String stemmingCommand = null; + Matching fieldSetMatching = fieldSet.getMatching(); // null if no explicit matching + // First a pass over the fields to read some params to decide field settings implicitly: + for (ImmutableSDField field : fieldSet.fields()) { + if (field.doesIndexing()) { + anyIndexing = true; + } + if (field.doesAttributing()) { + anyAttributing = true; + } + if (needLowerCase(field)) { + anyLowerCasing = true; + } + if (stemming(field)) { + anyStemming = true; + stemmingCommand = CMD_STEM + ":" + getEffectiveStemming(field).toStemMode(); + } + if (field.getNormalizing().doRemoveAccents()) { + anyNormalizing = true; + } + if (fieldSetMatching == null && field.getMatching().getType() != Matching.defaultType) { + fieldSetMatching = field.getMatching(); + } + Optional<String> explicitPhraseSegmentingCommand = field.getQueryCommands().stream().filter(c -> c.startsWith(CMD_PHRASE_SEGMENTING)).findFirst(); + if (explicitPhraseSegmentingCommand.isPresent()) { + phraseSegmentingCommand = explicitPhraseSegmentingCommand.get(); + } + } + if (anyIndexing && anyAttributing && fieldSet.getMatching() == null) { + // We have both attributes and indexes and no explicit match setting -> + // use default matching as that at least works if the data in the attribute consists + // of single tokens only. + fieldSetMatching = new Matching(); + } + if (anyLowerCasing) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_LOWERCASE)); + } + if (hasMultiValueField(fieldSet)) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_MULTIVALUE)); + } + if (anyIndexing) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_INDEX)); + if ( ! isExactMatch(fieldSetMatching)) { + if (fieldSetMatching == null || fieldSetMatching.getType().equals(MatchType.TEXT)) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_PLAIN_TOKENS)); + } + if (anyStemming) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(stemmingCommand)); + } + if (anyNormalizing) + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_NORMALIZE)); + if (phraseSegmentingCommand != null) + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(phraseSegmentingCommand)); + } + } else { + // Assume only attribute fields + iiB + .command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_ATTRIBUTE)) + .command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_INDEX)); + } + if (fieldSetMatching != null) { + // Explicit matching set on fieldset + if (fieldSetMatching.getType().equals(MatchType.EXACT)) { + String term = fieldSetMatching.getExactMatchTerminator(); + if (term==null) term=ExactMatch.DEFAULT_EXACT_TERMINATOR; + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command("exact "+term)); + } else if (fieldSetMatching.getType().equals(MatchType.WORD)) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command(CMD_WORD)); + } else if (fieldSetMatching.getType().equals(MatchType.GRAM)) { + iiB.command( + new IndexInfoConfig.Indexinfo.Command.Builder() + .indexname(fieldSet.getName()) + .command("ngram "+(fieldSetMatching.getGramSize()>0 ? fieldSetMatching.getGramSize() : NGramMatch.DEFAULT_GRAM_SIZE))); + } else if (fieldSetMatching.getType().equals(MatchType.TEXT)) { + + } + } + } + + private boolean hasMultiValueField(FieldSet fieldSet) { + for (ImmutableSDField field : fieldSet.fields()) { + if (field.getDataType().isMultivalue()) + return true; + } + return false; + } + + private Stemming getEffectiveStemming(ImmutableSDField field) { + Stemming active = field.getStemming(schema); + if (field.getIndex(field.getName()) != null) { + if (field.getIndex(field.getName()).getStemming()!=null) { + active = field.getIndex(field.getName()).getStemming(); + } + } + if (active != null) { + return active; + } + return Stemming.BEST; // assume default + } + + private boolean stemming(ImmutableSDField field) { + if (field.getStemming() != null) { + return !field.getStemming().equals(Stemming.NONE); + } + if (schema.getStemming() == Stemming.NONE) return false; + if (field.isImportedField()) return false; + if (field.getIndex(field.getName())==null) return true; + if (field.getIndex(field.getName()).getStemming()==null) return true; + return !(field.getIndex(field.getName()).getStemming().equals(Stemming.NONE)); + } + + private boolean isExactMatch(Matching m) { + if (m == null) return false; + if (m.getType().equals(MatchType.EXACT)) return true; + if (m.getType().equals(MatchType.WORD)) return true; + return false; + } + + @Override + protected String getDerivedName() { + return "index-info"; + } + + /** + * An index command. Null commands are also represented, to detect consistency issues. This is an (immutable) value + * object. + */ + public static class IndexCommand { + + private String index; + + private String command; + + public IndexCommand(String index, String command) { + this.index = index; + this.command = command; + } + + public String getIndex() { + return index; + } + + public String getCommand() { + return command; + } + + /** + * Returns true if this is the null command (do nothing) + */ + public boolean isNull() { + return command.equals(""); + } + + public int hashCode() { + return index.hashCode() + 17 * command.hashCode(); + } + + public boolean equals(Object object) { + if (!(object instanceof IndexCommand)) { + return false; + } + + IndexCommand other = (IndexCommand)object; + return + other.index.equals(this.index) && + other.command.equals(this.command); + } + + public String toString() { + return "index command " + command + " on index " + index; + } + + } + + /** + * A command which may override the command setting of a field for a particular index + */ + private static abstract class IndexOverrider { + + protected IndexInfo owner; + + public IndexOverrider(IndexInfo owner) { + this.owner = owner; + } + + /** + * Override the setting of this index for this field, returns true if overriden, false if this index should be + * set according to the field + */ + public abstract boolean override(String indexName, String command, ImmutableSDField field); + + } + + private static class StemmingOverrider extends IndexOverrider { + + private Schema schema; + + public StemmingOverrider(IndexInfo owner, Schema schema) { + super(owner); + this.schema = schema; + } + + public boolean override(String indexName, String command, ImmutableSDField field) { + if (schema == null) { + return false; + } + + Index index = schema.getIndex(indexName); + if (index == null) { + return false; + } + + Stemming indexStemming = index.getStemming(); + if (indexStemming == null) { + return false; + } + + if (Stemming.NONE.equals(indexStemming)) { + // Add nothing + } else { + owner.addIndexCommand(indexName, CMD_STEM + ":" + indexStemming.toStemMode()); + } + return true; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexSchema.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexSchema.java new file mode 100644 index 00000000000..7f6c824b979 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexSchema.java @@ -0,0 +1,245 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.document.StructuredDataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.FieldSet; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.config.search.IndexschemaConfig; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +/** + * Deriver of indexschema config containing information of all text index fields with name and data type. + * + * @author geirst + */ +public class IndexSchema extends Derived implements IndexschemaConfig.Producer { + + private final List<IndexField> fields = new ArrayList<>(); + private final Map<String, FieldCollection> collections = new LinkedHashMap<>(); + private final Map<String, FieldSet> fieldSets = new LinkedHashMap<>(); + + public IndexSchema(Schema schema) { + fieldSets.putAll(schema.fieldSets().userFieldSets()); + derive(schema); + } + + public boolean containsField(String fieldName) { + return fields.stream().anyMatch(field -> field.getName().equals(fieldName)); + } + + @Override + protected void derive(Schema schema) { + super.derive(schema); + } + + private boolean isTensorField(ImmutableSDField field) { + return field.getDataType() instanceof TensorDataType; + } + + private void deriveIndexFields(ImmutableSDField field, Schema schema) { + // Note: Indexes for tensor fields are NOT part of the index schema for text fields. + if ((!field.doesIndexing() && !field.isIndexStructureField()) || + isTensorField(field)) + { + return; + } + List<Field> lst = flattenField(field.asField()); + if (lst.isEmpty()) { + return; + } + String fieldName = field.getName(); + for (Field flatField : lst) { + deriveIndexFields(flatField, schema); + } + if (lst.size() > 1) { + FieldSet fieldSet = new FieldSet(fieldName); + for (Field flatField : lst) { + fieldSet.addFieldName(flatField.getName()); + } + fieldSets.put(fieldName, fieldSet); + } + } + + private void deriveIndexFields(Field field, Schema schema) { + IndexField toAdd = new IndexField(field.getName(), Index.convertType(field.getDataType()), field.getDataType()); + com.yahoo.schema.Index definedIndex = schema.getIndex(field.getName()); + if (definedIndex != null) { + toAdd.setIndexSettings(definedIndex); + } + fields.add(toAdd); + addFieldToCollection(field.getName(), field.getName()); // implicit + } + + private FieldCollection getCollection(String collectionName) { + FieldCollection retval = collections.get(collectionName); + if (retval == null) { + collections.put(collectionName, new FieldCollection(collectionName)); + return collections.get(collectionName); + } + return retval; + } + + private void addFieldToCollection(String fieldName, String collectionName) { + FieldCollection collection = getCollection(collectionName); + collection.fields.add(fieldName); + } + + @Override + protected void derive(ImmutableSDField field, Schema schema) { + if (field.usesStructOrMap()) { + return; // unsupported + } + deriveIndexFields(field, schema); + } + + @Override + protected String getDerivedName() { + return "indexschema"; + } + + @Override + public void getConfig(IndexschemaConfig.Builder icB) { + for (int i = 0; i < fields.size(); ++i) { + IndexField f = fields.get(i); + IndexschemaConfig.Indexfield.Builder ifB = new IndexschemaConfig.Indexfield.Builder() + .name(f.getName()) + .datatype(IndexschemaConfig.Indexfield.Datatype.Enum.valueOf(f.getType())) + .prefix(f.hasPrefix()) + .phrases(f.hasPhrases()) + .positions(f.hasPositions()) + .interleavedfeatures(f.useInterleavedFeatures()); + if (!f.getCollectionType().equals("SINGLE")) { + ifB.collectiontype(IndexschemaConfig.Indexfield.Collectiontype.Enum.valueOf(f.getCollectionType())); + } + icB.indexfield(ifB); + } + for (FieldSet fieldSet : fieldSets.values()) { + IndexschemaConfig.Fieldset.Builder fsB = new IndexschemaConfig.Fieldset.Builder() + .name(fieldSet.getName()); + for (String f : fieldSet.getFieldNames()) { + fsB.field(new IndexschemaConfig.Fieldset.Field.Builder() + .name(f)); + } + icB.fieldset(fsB); + } + } + + @SuppressWarnings("deprecation") + static List<Field> flattenField(Field field) { + DataType fieldType = field.getDataType(); + if (fieldType.getPrimitiveType() != null){ + return Collections.singletonList(field); + } + if (fieldType instanceof ArrayDataType) { + List<Field> ret = new LinkedList<>(); + Field innerField = new Field(field.getName(), ((ArrayDataType)fieldType).getNestedType()); + for (Field flatField : flattenField(innerField)) { + ret.add(new Field(flatField.getName(), DataType.getArray(flatField.getDataType()))); + } + return ret; + } + if (fieldType instanceof StructuredDataType) { + List<Field> ret = new LinkedList<>(); + String fieldName = field.getName(); + for (Field childField : ((StructuredDataType)fieldType).getFields()) { + for (Field flatField : flattenField(childField)) { + ret.add(new Field(fieldName + "." + flatField.getName(), flatField)); + } + } + return ret; + } + throw new UnsupportedOperationException(fieldType.getName()); + } + + public List<IndexField> getFields() { + return fields; + } + + /** + * Representation of an index field with name and data type. + */ + public static class IndexField { + private String name; + private Index.Type type; + private com.yahoo.schema.Index.Type sdType; // The index type in "user intent land" + private DataType sdFieldType; + private boolean prefix = false; + private boolean phrases = false; // TODO dead, but keep a while to ensure config compatibility? + private boolean positions = true;// TODO dead, but keep a while to ensure config compatibility? + private BooleanIndexDefinition boolIndex = null; + // Whether the posting lists of this index field should have interleaved features (num occs, field length) in document id stream. + private boolean interleavedFeatures = false; + + public IndexField(String name, Index.Type type, DataType sdFieldType) { + this.name = name; + this.type = type; + this.sdFieldType = sdFieldType; + } + public void setIndexSettings(com.yahoo.schema.Index index) { + if (type.equals(Index.Type.TEXT)) { + prefix = index.isPrefix(); + interleavedFeatures = index.useInterleavedFeatures(); + } + sdType = index.getType(); + boolIndex = index.getBooleanIndexDefiniton(); + } + public String getName() { return name; } + public Index.Type getRawType() { return type; } + public String getType() { + return type.equals(Index.Type.INT64) + ? "INT64" : "STRING"; + } + public String getCollectionType() { + return (sdFieldType == null) + ? "SINGLE" + : (sdFieldType instanceof WeightedSetDataType) + ? "WEIGHTEDSET" + : (sdFieldType instanceof ArrayDataType) + ? "ARRAY" + : "SINGLE"; + } + public boolean hasPrefix() { return prefix; } + public boolean hasPhrases() { return phrases; } + public boolean hasPositions() { return positions; } + public boolean useInterleavedFeatures() { return interleavedFeatures; } + + public BooleanIndexDefinition getBooleanIndexDefinition() { + return boolIndex; + } + + /** + * The user set index type + * @return the type + */ + public com.yahoo.schema.Index.Type getSdType() { + return sdType; + } + } + + /** + * Representation of a collection of fields (aka index, physical view). + */ + @SuppressWarnings({ "UnusedDeclaration" }) + private static class FieldCollection { + + private final String name; + private final List<String> fields = new ArrayList<>(); + + FieldCollection(String name) { + this.name = name; + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java new file mode 100644 index 00000000000..6dae89bf692 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexingScript.java @@ -0,0 +1,197 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.configdefinition.IlscriptsConfig; +import com.yahoo.vespa.configdefinition.IlscriptsConfig.Ilscript.Builder; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.GuardExpression; +import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.PassthroughExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SetLanguageExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * An indexing language script derived from a search definition. An indexing script contains a set of indexing + * statements, organized in a composite structure of indexing code snippets. + * + * @author bratseth + */ +public final class IndexingScript extends Derived implements IlscriptsConfig.Producer { + + private final List<String> docFields = new ArrayList<>(); + private final List<Expression> expressions = new ArrayList<>(); + private List<ImmutableSDField> fieldsSettingLanguage; + + public IndexingScript(Schema schema) { + derive(schema); + } + + @Override + protected void derive(Schema schema) { + fieldsSettingLanguage = fieldsSettingLanguage(schema); + if (fieldsSettingLanguage.size() == 1) // Assume this language should be used for all fields + addExpression(fieldsSettingLanguage.get(0).getIndexingScript()); + super.derive(schema); + } + + @Override + protected void derive(ImmutableSDField field, Schema schema) { + if (field.isImportedField()) return; + + if (field.hasFullIndexingDocprocRights()) + docFields.add(field.getName()); + + if (field.usesStructOrMap() && ! GeoPos.isAnyPos(field)) { + return; // unsupported + } + + if (fieldsSettingLanguage.size() == 1 && fieldsSettingLanguage.get(0).equals(field)) + return; // Already added + + addExpression(field.getIndexingScript()); + } + + private void addExpression(ScriptExpression expression) { + if ( expression.isEmpty()) return; + expressions.add(new StatementExpression(new ClearStateExpression(), new GuardExpression(expression))); + } + + private List<ImmutableSDField> fieldsSettingLanguage(Schema schema) { + return schema.allFieldsList().stream() + .filter(field -> ! field.isImportedField()) + .filter(field -> field.containsExpression(SetLanguageExpression.class)) + .collect(Collectors.toList()); + } + + public Iterable<Expression> expressions() { + return Collections.unmodifiableCollection(expressions); + } + + @Override + public String getDerivedName() { + return "ilscripts"; + } + + @Override + public void getConfig(IlscriptsConfig.Builder configBuilder) { + IlscriptsConfig.Ilscript.Builder ilscriptBuilder = new IlscriptsConfig.Ilscript.Builder(); + ilscriptBuilder.doctype(getName()); + ilscriptBuilder.docfield(docFields); + addContentInOrder(ilscriptBuilder); + configBuilder.ilscript(ilscriptBuilder); + } + + private void addContentInOrder(IlscriptsConfig.Ilscript.Builder ilscriptBuilder) { + ArrayList<Expression> later = new ArrayList<>(); + Set<String> touchedFields = new HashSet<>(); + for (Expression expression : expressions) { + if (modifiesSelf(expression) && ! setsLanguage(expression)) + later.add(expression); + else + ilscriptBuilder.content(expression.toString()); + + FieldScanVisitor fieldFetcher = new FieldScanVisitor(); + fieldFetcher.visit(expression); + touchedFields.addAll(fieldFetcher.touchedFields()); + } + for (Expression exp : later) + ilscriptBuilder.content(exp.toString()); + generateSyntheticStatementsForUntouchedFields(ilscriptBuilder, touchedFields); + } + + private void generateSyntheticStatementsForUntouchedFields(Builder ilscriptBuilder, Set<String> touchedFields) { + Set<String> fieldsWithSyntheticStatements = new HashSet<>(docFields); + fieldsWithSyntheticStatements.removeAll(touchedFields); + List<String> orderedFields = new ArrayList<>(fieldsWithSyntheticStatements); + Collections.sort(orderedFields); + for (String fieldName : orderedFields) { + StatementExpression copyField = new StatementExpression(new InputExpression(fieldName), + new PassthroughExpression(fieldName)); + ilscriptBuilder.content(copyField.toString()); + } + } + + private boolean setsLanguage(Expression expression) { + SetsLanguageVisitor visitor = new SetsLanguageVisitor(); + visitor.visit(expression); + return visitor.setsLanguage; + } + + private boolean modifiesSelf(Expression expression) { + ModifiesSelfVisitor visitor = new ModifiesSelfVisitor(); + visitor.visit(expression); + return visitor.modifiesSelf(); + } + + private static class ModifiesSelfVisitor extends ExpressionVisitor { + + private String inputField = null; + private String outputField = null; + + public boolean modifiesSelf() { return outputField != null && outputField.equals(inputField); } + + @Override + protected void doVisit(Expression expression) { + if (modifiesSelf()) return; + + if (expression instanceof InputExpression) { + inputField = ((InputExpression) expression).getFieldName(); + } + if (expression instanceof OutputExpression) { + outputField = ((OutputExpression) expression).getFieldName(); + } + } + } + + private static class SetsLanguageVisitor extends ExpressionVisitor { + + boolean setsLanguage = false; + + @Override + protected void doVisit(Expression expression) { + if (expression instanceof SetLanguageExpression) + setsLanguage = true; + } + + } + + private static class FieldScanVisitor extends ExpressionVisitor { + List<String> touchedFields = new ArrayList<String>(); + List<String> candidates = new ArrayList<String>(); + + @Override + protected void doVisit(Expression exp) { + if (exp instanceof OutputExpression) { + touchedFields.add(((OutputExpression) exp).getFieldName()); + } + if (exp instanceof InputExpression) { + candidates.add(((InputExpression) exp).getFieldName()); + } + if (exp instanceof ZCurveExpression) { + touchedFields.addAll(candidates); + } + } + + Collection<String> touchedFields() { + Collection<String> output = touchedFields; + touchedFields = null; // deny re-use to try and avoid obvious bugs + return output; + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Juniperrc.java b/config-model/src/main/java/com/yahoo/schema/derived/Juniperrc.java new file mode 100644 index 00000000000..162efbb25b4 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Juniperrc.java @@ -0,0 +1,62 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.config.search.summary.JuniperrcConfig; + +import java.util.Set; + +/** + * Generated juniperrc-config for controlling juniper. + * + * @author Simon Thoresen Hult + */ +public class Juniperrc extends Derived implements JuniperrcConfig.Producer { + + // List of all fields that should be bolded. + private Set<String> boldingFields = new java.util.LinkedHashSet<>(); + + /** + * Constructs a new juniper rc instance for a given search object. This will derive the configuration automatically, + * so there is no need to call {@link #derive(Schema)}. + * + * @param schema The search model to use for deriving. + */ + public Juniperrc(Schema schema) { + derive(schema); + } + + // Inherit doc from Derived. + @Override + protected void derive(Schema schema) { + super.derive(schema); + for (SummaryField summaryField : schema.getUniqueNamedSummaryFields().values()) { + if (summaryField.getTransform() == SummaryTransform.BOLDED) { + boldingFields.add(summaryField.getName()); + } + } + } + + // Inherit doc from Derived. + @Override + protected String getDerivedName() { + return "juniperrc"; + } + + @Override + public void getConfig(JuniperrcConfig.Builder builder) { + if (boldingFields.size() != 0) { + builder.prefix(true); + for (String name : boldingFields) { + builder.override(new JuniperrcConfig.Override.Builder() + .fieldname(name) + .length(65536) + .max_matches(1) + .min_length(8192) + .surround_max(65536)); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinition.java b/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinition.java new file mode 100644 index 00000000000..7d558ea51cc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinition.java @@ -0,0 +1,44 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.document.RankType; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * The definition of a rank type used for native rank features. + * + * @author geirst + */ +public class NativeRankTypeDefinition { + + /** The type this defines */ + private RankType type; + + /** The rank tables of this rank type */ + private List<NativeTable> rankTables = new java.util.ArrayList<>(); + + public NativeRankTypeDefinition(RankType type) { + this.type = type; + } + + public RankType getType() { + return type; + } + + public void addTable(NativeTable table) { + rankTables.add(table); + } + + /** Returns an unmodifiable list of the tables in this type definition */ + public Iterator<NativeTable> rankSettingIterator() { + return Collections.unmodifiableList(rankTables).iterator(); + } + + public String toString() { + return "native definition of rank type '" + type + "'"; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinitionSet.java b/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinitionSet.java new file mode 100644 index 00000000000..65e68181b5b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/NativeRankTypeDefinitionSet.java @@ -0,0 +1,93 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.document.RankType; + +import java.util.Collections; +import java.util.Map; + +/** + * A set of rank type definitions used for native rank features. + * + * @author geirst + */ +public class NativeRankTypeDefinitionSet { + + /** The name of this rank definition set */ + private String name; + + /** The unmodifiable rank type implementations in this set */ + private final Map<RankType, NativeRankTypeDefinition> typeDefinitions; + + /** Returns the default rank type (about) */ + public static RankType getDefaultRankType() { return RankType.ABOUT; } + + public NativeRankTypeDefinitionSet(String name) { + this.name = name; + + Map<RankType, NativeRankTypeDefinition> typeDefinitions = new java.util.LinkedHashMap<>(); + typeDefinitions.put(RankType.IDENTITY, createIdentityRankType(RankType.IDENTITY)); + typeDefinitions.put(RankType.ABOUT, createAboutRankType(RankType.ABOUT)); + typeDefinitions.put(RankType.TAGS, createTagsRankType(RankType.TAGS)); + typeDefinitions.put(RankType.EMPTY, createEmptyRankType(RankType.EMPTY)); + this.typeDefinitions = Collections.unmodifiableMap(typeDefinitions); + } + + private NativeRankTypeDefinition createEmptyRankType(RankType type) { + NativeRankTypeDefinition rank = new NativeRankTypeDefinition(type); + rank.addTable(new NativeTable(NativeTable.Type.FIRST_OCCURRENCE, "linear(0,0)")); + rank.addTable(new NativeTable(NativeTable.Type.OCCURRENCE_COUNT, "linear(0,0)")); + rank.addTable(new NativeTable(NativeTable.Type.PROXIMITY, "linear(0,0)")); + rank.addTable(new NativeTable(NativeTable.Type.REVERSE_PROXIMITY, "linear(0,0)")); + rank.addTable(new NativeTable(NativeTable.Type.WEIGHT, "linear(0,0)")); + return rank; + } + + private NativeRankTypeDefinition createAboutRankType(RankType type) { + NativeRankTypeDefinition rank = new NativeRankTypeDefinition(type); + rank.addTable(new NativeTable(NativeTable.Type.FIRST_OCCURRENCE, "expdecay(8000,12.50)")); + rank.addTable(new NativeTable(NativeTable.Type.OCCURRENCE_COUNT, "loggrowth(1500,4000,19)")); + rank.addTable(new NativeTable(NativeTable.Type.PROXIMITY, "expdecay(500,3)")); + rank.addTable(new NativeTable(NativeTable.Type.REVERSE_PROXIMITY, "expdecay(400,3)")); + rank.addTable(new NativeTable(NativeTable.Type.WEIGHT, "linear(1,0)")); + return rank; + } + + private NativeRankTypeDefinition createIdentityRankType(RankType type) { + NativeRankTypeDefinition rank = new NativeRankTypeDefinition(type); + rank.addTable(new NativeTable(NativeTable.Type.FIRST_OCCURRENCE, "expdecay(100,12.50)")); + rank.addTable(new NativeTable(NativeTable.Type.OCCURRENCE_COUNT, "loggrowth(1500,4000,19)")); + rank.addTable(new NativeTable(NativeTable.Type.PROXIMITY, "expdecay(5000,3)")); + rank.addTable(new NativeTable(NativeTable.Type.REVERSE_PROXIMITY, "expdecay(3000,3)")); + rank.addTable(new NativeTable(NativeTable.Type.WEIGHT, "linear(1,0)")); + return rank; + } + + private NativeRankTypeDefinition createTagsRankType(RankType type) { + NativeRankTypeDefinition rank = new NativeRankTypeDefinition(type); + rank.addTable(new NativeTable(NativeTable.Type.FIRST_OCCURRENCE, "expdecay(8000,12.50)")); + rank.addTable(new NativeTable(NativeTable.Type.OCCURRENCE_COUNT, "loggrowth(1500,4000,19)")); + rank.addTable(new NativeTable(NativeTable.Type.PROXIMITY, "expdecay(500,3)")); + rank.addTable(new NativeTable(NativeTable.Type.REVERSE_PROXIMITY, "expdecay(400,3)")); + rank.addTable(new NativeTable(NativeTable.Type.WEIGHT, "loggrowth(38,50,1)")); + return rank; + } + + /** + * Returns a rank type definition if given an existing rank type name, + * or null if given a rank type which has no native implementation (meaning somebody forgot to add it), + */ + public NativeRankTypeDefinition getRankTypeDefinition(RankType type) { + if (type == RankType.DEFAULT) + type = getDefaultRankType(); + return typeDefinitions.get(type); + } + + /** Returns an unmodifiable map of the type definitions in this */ + public Map<RankType, NativeRankTypeDefinition> types() { return typeDefinitions; } + + public String toString() { + return "native rank type definitions " + name; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/NativeTable.java b/config-model/src/main/java/com/yahoo/schema/derived/NativeTable.java new file mode 100644 index 00000000000..6eff2487bca --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/NativeTable.java @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +/** + * A named rank table of a certain type. + * + * @author geirst + */ +public class NativeTable { + + private String name; + + private Type type; + + /** A table type enumeration */ + public static class Type { + + public static Type FIRST_OCCURRENCE = new Type("firstOccurrenceTable"); + public static Type OCCURRENCE_COUNT = new Type("occurrenceCountTable"); + public static Type WEIGHT = new Type("weightTable"); + public static Type PROXIMITY = new Type("proximityTable"); + public static Type REVERSE_PROXIMITY = new Type("reverseProximityTable"); + + private String name; + + private Type(String name) { + this.name = name; + } + + public String getName() { return name; } + + public boolean equals(Object object) { + if (!(object instanceof Type)) { + return false; + } + Type other = (Type)object; + return this.name.equals(other.name); + } + + public int hashCode() { + return name.hashCode(); + } + + public String toString() { + return getName(); + } + } + + public NativeTable(Type type, String name) { + this.type = type; + this.name = name; + } + + public Type getType() { return type; } + + public String getName() { return name; } + + public int hashCode() { + return type.hashCode() + 17*name.hashCode(); + } + + public boolean equals(Object object) { + if (! (object instanceof NativeTable)) return false; + NativeTable other = (NativeTable)object; + return other.getName().equals(this.getName()) && other.getType().equals(this.getType()); + } + + public String toString() { + return getType() + ": " + getName(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RankProfileList.java b/config-model/src/main/java/com/yahoo/schema/derived/RankProfileList.java new file mode 100644 index 00000000000..98815410876 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/RankProfileList.java @@ -0,0 +1,210 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import ai.vespa.rankingexpression.importer.configmodelview.ImportedMlModels; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.schema.LargeRankExpressions; +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.vespa.config.search.RankProfilesConfig; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.config.search.core.OnnxModelsConfig; +import com.yahoo.vespa.config.search.core.RankingConstantsConfig; +import com.yahoo.vespa.config.search.core.RankingExpressionsConfig; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; + +/** + * The derived rank profiles of a schema + * + * @author bratseth + */ +public class RankProfileList extends Derived implements RankProfilesConfig.Producer { + + private final Map<String, RawRankProfile> rankProfiles; + private final FileDistributedConstants constants; + private final LargeRankExpressions largeRankExpressions; + private final FileDistributedOnnxModels onnxModels; + + public static final RankProfileList empty = new RankProfileList(); + + private RankProfileList() { + constants = new FileDistributedConstants(null, List.of()); + largeRankExpressions = new LargeRankExpressions(null); + onnxModels = new FileDistributedOnnxModels(null, List.of()); + rankProfiles = Map.of(); + } + + /** + * Creates a rank profile list + * + * @param schema the schema this is a rank profile from + * @param attributeFields the attribute fields to create a ranking for + */ + public RankProfileList(Schema schema, + LargeRankExpressions largeRankExpressions, + AttributeFields attributeFields, + DeployState deployState) { + setName(schema == null ? "default" : schema.getName()); + this.largeRankExpressions = largeRankExpressions; + this.rankProfiles = deriveRankProfiles(schema, attributeFields, deployState); + this.constants = deriveFileDistributedConstants(schema, rankProfiles.values(), deployState); + this.onnxModels = deriveFileDistributedOnnxModels(schema, rankProfiles.values(), deployState); + } + + private boolean areDependenciesReady(RankProfile rank, RankProfileRegistry registry, Set<String> processedProfiles) { + return rank.inheritedNames().isEmpty() || + processedProfiles.containsAll(rank.inheritedNames()) || + (rank.schema() != null && rank.inheritedNames().stream().allMatch(name -> registry.resolve(rank.schema().getDocument(), name) != null)); + } + + private Map<String, RawRankProfile> deriveRankProfiles(Schema schema, + AttributeFields attributeFields, + DeployState deployState) { + Map<String, RawRankProfile> rawRankProfiles = new LinkedHashMap<>(); + if (schema != null) { // profiles belonging to a schema have a default profile + RawRankProfile rawRank = new RawRankProfile(deployState.rankProfileRegistry().get(schema, "default"), + largeRankExpressions, + deployState.getQueryProfiles().getRegistry(), + deployState.getImportedModels(), + attributeFields, + deployState.getProperties()); + rawRankProfiles.put(rawRank.getName(), rawRank); + } + + Map<String, RankProfile> remaining = new LinkedHashMap<>(); + deployState.rankProfileRegistry().rankProfilesOf(schema).forEach(rank -> remaining.put(rank.name(), rank)); + remaining.remove("default"); + while (!remaining.isEmpty()) { + List<RankProfile> ready = new ArrayList<>(); + remaining.forEach((name, profile) -> { + if (areDependenciesReady(profile, deployState.rankProfileRegistry(), rawRankProfiles.keySet())) + ready.add(profile); + }); + rawRankProfiles.putAll(processRankProfiles(ready, + deployState.getQueryProfiles().getRegistry(), + deployState.getImportedModels(), + attributeFields, + deployState.getProperties(), + deployState.getExecutor())); + ready.forEach(rank -> remaining.remove(rank.name())); + } + return rawRankProfiles; + } + + private Map<String, RawRankProfile> processRankProfiles(List<RankProfile> profiles, + QueryProfileRegistry queryProfiles, + ImportedMlModels importedModels, + AttributeFields attributeFields, + ModelContext.Properties deployProperties, + ExecutorService executor) { + Map<String, Future<RawRankProfile>> futureRawRankProfiles = new LinkedHashMap<>(); + for (RankProfile profile : profiles) { + futureRawRankProfiles.put(profile.name(), executor.submit(() -> new RawRankProfile(profile, largeRankExpressions, queryProfiles, importedModels, + attributeFields, deployProperties))); + } + try { + Map<String, RawRankProfile> rawRankProfiles = new LinkedHashMap<>(); + for (Future<RawRankProfile> rawFuture : futureRawRankProfiles.values()) { + RawRankProfile rawRank = rawFuture.get(); + rawRankProfiles.put(rawRank.getName(), rawRank); + } + return rawRankProfiles; + } catch (InterruptedException | ExecutionException e) { + throw new IllegalStateException(e); + } + } + + private static FileDistributedConstants deriveFileDistributedConstants(Schema schema, + Collection<RawRankProfile> rankProfiles, + DeployState deployState) { + Map<Reference, RankProfile.Constant> allFileConstants = new HashMap<>(); + addFileConstants(schema != null ? schema.constants().values() : List.of(), + allFileConstants, + schema != null ? schema.toString() : "[global]"); + for (var profile : rankProfiles) + addFileConstants(profile.compiled().constants().values(), allFileConstants, profile.toString()); + return new FileDistributedConstants(deployState.getFileRegistry(), allFileConstants.values()); + } + + private static void addFileConstants(Collection<RankProfile.Constant> source, + Map<Reference, RankProfile.Constant> destination, + String sourceName) { + for (var constant : source) { + if (constant.valuePath().isEmpty()) continue; + var existing = destination.get(constant.name()); + if ( existing != null && ! constant.equals(existing)) { + throw new IllegalArgumentException("Duplicate constants: " + sourceName + " have " + constant + + ", but we already have " + existing + + ": Value reference constants must be unique across all rank profiles/models"); + } + destination.put(constant.name(), constant); + } + } + + private static FileDistributedOnnxModels deriveFileDistributedOnnxModels(Schema schema, + Collection<RawRankProfile> rankProfiles, + DeployState deployState) { + Map<String, OnnxModel> allModels = new LinkedHashMap<>(); + addOnnxModels(schema != null ? schema.onnxModels().values() : List.of(), + allModels, + schema != null ? schema.toString() : "[global]"); + for (var profile : rankProfiles) + addOnnxModels(profile.compiled().onnxModels().values(), allModels, profile.toString()); + return new FileDistributedOnnxModels(deployState.getFileRegistry(), allModels.values()); + } + + private static void addOnnxModels(Collection<OnnxModel> source, + Map<String, OnnxModel> destination, + String sourceName) { + for (var model : source) { + var existing = destination.get(model.getName()); + if ( existing != null && ! model.equals(existing)) { + throw new IllegalArgumentException("Duplicate onnx model: " + sourceName + " have " + model + + ", but we already have " + existing + + ": Onnx models must be unique across all rank profiles/models"); + } + destination.put(model.getName(), model); + } + } + + public Map<String, RawRankProfile> getRankProfiles() { return rankProfiles; } + public FileDistributedConstants constants() { return constants; } + public FileDistributedOnnxModels getOnnxModels() { return onnxModels; } + + @Override + public String getDerivedName() { return "rank-profiles"; } + + @Override + public void getConfig(RankProfilesConfig.Builder builder) { + for (RawRankProfile rank : rankProfiles.values() ) { + rank.getConfig(builder); + } + } + + public void getConfig(RankingExpressionsConfig.Builder builder) { + largeRankExpressions.expressions().forEach((expr) -> builder.expression.add(new RankingExpressionsConfig.Expression.Builder().name(expr.getName()).fileref(expr.getFileReference()))); + } + + public void getConfig(RankingConstantsConfig.Builder builder) { + constants.getConfig(builder); + } + + public void getConfig(OnnxModelsConfig.Builder builder) { + onnxModels.getConfig(builder); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java new file mode 100644 index 00000000000..a8a9b4c8755 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java @@ -0,0 +1,524 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import ai.vespa.rankingexpression.importer.configmodelview.ImportedMlModels; +import com.google.common.collect.ImmutableList; +import com.yahoo.collections.Pair; +import com.yahoo.compress.Compressor; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.schema.FeatureNames; +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.LargeRankExpressions; +import com.yahoo.schema.RankExpressionBody; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.expressiontransforms.OnnxModelTransformer; +import com.yahoo.searchlib.rankingexpression.ExpressionFunction; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.parser.ParseException; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.rule.SerializationContext; +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.config.search.RankProfilesConfig; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.OptionalDouble; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * A rank profile derived from a search definition, containing exactly the features available natively in the server + * + * @author bratseth + */ +public class RawRankProfile implements RankProfilesConfig.Producer { + + /** A reusable compressor with default settings */ + private static final Compressor compressor = new Compressor(); + + private static final String keyEndMarker = "\r="; + private static final String valueEndMarker = "\r\n"; + + private final String name; + private final Compressor.Compression compressedProperties; + + /** The compiled profile this is created from. */ + private final RankProfile compiled; + + /** Creates a raw rank profile from the given rank profile. */ + public RawRankProfile(RankProfile rankProfile, LargeRankExpressions largeExpressions, + QueryProfileRegistry queryProfiles, ImportedMlModels importedModels, + AttributeFields attributeFields, ModelContext.Properties deployProperties) { + this.name = rankProfile.name(); + compiled = rankProfile.compile(queryProfiles, importedModels); + compressedProperties = compress(new Deriver(compiled, attributeFields, deployProperties, queryProfiles) + .derive(largeExpressions)); + } + + public RankProfile compiled() { return compiled; } + + private Compressor.Compression compress(List<Pair<String, String>> properties) { + StringBuilder b = new StringBuilder(); + for (Pair<String, String> property : properties) + b.append(property.getFirst()).append(keyEndMarker).append(property.getSecond()).append(valueEndMarker); + return compressor.compress(b.toString().getBytes(StandardCharsets.UTF_8)); + } + + private List<Pair<String, String>> decompress(Compressor.Compression compression) { + String propertiesString = new String(compressor.decompress(compression), StandardCharsets.UTF_8); + if (propertiesString.isEmpty()) return ImmutableList.of(); + + ImmutableList.Builder<Pair<String, String>> properties = new ImmutableList.Builder<>(); + for (int pos = 0; pos < propertiesString.length();) { + int keyEndPos = propertiesString.indexOf(keyEndMarker, pos); + String key = propertiesString.substring(pos, keyEndPos); + pos = keyEndPos + keyEndMarker.length(); + int valueEndPos = propertiesString.indexOf(valueEndMarker, pos); + String value = propertiesString.substring(pos, valueEndPos); + pos = valueEndPos + valueEndMarker.length(); + properties.add(new Pair<>(key, value)); + } + return properties.build(); + } + + public String getName() { return name; } + + private void getRankProperties(RankProfilesConfig.Rankprofile.Builder b) { + RankProfilesConfig.Rankprofile.Fef.Builder fefB = new RankProfilesConfig.Rankprofile.Fef.Builder(); + for (Pair<String, String> p : decompress(compressedProperties)) + fefB.property(new RankProfilesConfig.Rankprofile.Fef.Property.Builder().name(p.getFirst()).value(p.getSecond())); + b.fef(fefB); + } + + /** + * Returns the properties of this as an unmodifiable list. + * Note: This method is expensive. + */ + public List<Pair<String, String>> configProperties() { return decompress(compressedProperties); } + + @Override + public void getConfig(RankProfilesConfig.Builder builder) { + RankProfilesConfig.Rankprofile.Builder b = new RankProfilesConfig.Rankprofile.Builder().name(getName()); + getRankProperties(b); + builder.rankprofile(b); + } + + @Override + public String toString() { + return " rank profile " + name; + } + + private static class Deriver { + + private final Map<String, FieldRankSettings> fieldRankSettings = new java.util.LinkedHashMap<>(); + private final Set<ReferenceNode> summaryFeatures; + private final Set<ReferenceNode> matchFeatures; + private final Set<ReferenceNode> rankFeatures; + private final Map<String, String> featureRenames = new java.util.LinkedHashMap<>(); + private final List<RankProfile.RankProperty> rankProperties; + + /** + * Rank properties for weight settings to make these available to feature executors + */ + private final List<RankProfile.RankProperty> boostAndWeightRankProperties = new ArrayList<>(); + + private final boolean ignoreDefaultRankFeatures; + private final RankProfile.MatchPhaseSettings matchPhaseSettings; + private final int rerankCount; + private final int keepRankCount; + private final int numThreadsPerSearch; + private final int minHitsPerThread; + private final int numSearchPartitions; + private final double termwiseLimit; + private final OptionalDouble postFilterThreshold; + private final OptionalDouble approximateThreshold; + private final double rankScoreDropLimit; + private final boolean mapBackRankingExpressionFeatures; + + /** + * The rank type definitions used to derive settings for the native rank features + */ + private final NativeRankTypeDefinitionSet nativeRankTypeDefinitions = new NativeRankTypeDefinitionSet("default"); + private final Map<String, String> attributeTypes; + private final Map<Reference, RankProfile.Input> inputs; + private final Set<String> filterFields = new java.util.LinkedHashSet<>(); + private final String rankprofileName; + + private RankingExpression firstPhaseRanking; + private RankingExpression secondPhaseRanking; + + /** + * Creates a raw rank profile from the given rank profile + */ + Deriver(RankProfile compiled, + AttributeFields attributeFields, + ModelContext.Properties deployProperties, + QueryProfileRegistry queryProfiles) { + rankprofileName = compiled.name(); + attributeTypes = compiled.getAttributeTypes(); + inputs = compiled.inputs(); + firstPhaseRanking = compiled.getFirstPhaseRanking(); + secondPhaseRanking = compiled.getSecondPhaseRanking(); + summaryFeatures = new LinkedHashSet<>(compiled.getSummaryFeatures()); + matchFeatures = new LinkedHashSet<>(compiled.getMatchFeatures()); + rankFeatures = compiled.getRankFeatures(); + rerankCount = compiled.getRerankCount(); + matchPhaseSettings = compiled.getMatchPhaseSettings(); + numThreadsPerSearch = compiled.getNumThreadsPerSearch(); + minHitsPerThread = compiled.getMinHitsPerThread(); + numSearchPartitions = compiled.getNumSearchPartitions(); + termwiseLimit = compiled.getTermwiseLimit().orElse(deployProperties.featureFlags().defaultTermwiseLimit()); + postFilterThreshold = compiled.getPostFilterThreshold(); + approximateThreshold = compiled.getApproximateThreshold(); + keepRankCount = compiled.getKeepRankCount(); + rankScoreDropLimit = compiled.getRankScoreDropLimit(); + mapBackRankingExpressionFeatures = deployProperties.featureFlags().avoidRenamingSummaryFeatures(); + ignoreDefaultRankFeatures = compiled.getIgnoreDefaultRankFeatures(); + rankProperties = new ArrayList<>(compiled.getRankProperties()); + + Map<String, RankProfile.RankingExpressionFunction> functions = compiled.getFunctions(); + List<ExpressionFunction> functionExpressions = functions.values().stream().map(f -> f.function()).collect(Collectors.toList()); + Map<String, String> functionProperties = new LinkedHashMap<>(); + SerializationContext functionSerializationContext = new SerializationContext(functionExpressions, + Map.of(), + compiled.typeContext(queryProfiles)); + + if (firstPhaseRanking != null) { + functionProperties.putAll(firstPhaseRanking.getRankProperties(functionSerializationContext)); + } + if (secondPhaseRanking != null) { + functionProperties.putAll(secondPhaseRanking.getRankProperties(functionSerializationContext)); + } + + derivePropertiesAndFeaturesFromFunctions(functions, functionProperties, functionSerializationContext); + deriveOnnxModelFunctionsAndFeatures(compiled); + + deriveRankTypeSetting(compiled, attributeFields); + deriveFilterFields(compiled); + deriveWeightProperties(compiled); + } + + private void deriveFilterFields(RankProfile rp) { + filterFields.addAll(rp.allFilterFields()); + } + + private void derivePropertiesAndFeaturesFromFunctions(Map<String, RankProfile.RankingExpressionFunction> functions, + Map<String, String> functionProperties, + SerializationContext functionContext) { + if (functions.isEmpty()) return; + + replaceFunctionFeatures(summaryFeatures, functionContext); + replaceFunctionFeatures(matchFeatures, functionContext); + + // First phase, second phase and summary features should add all required functions to the context. + // However, we need to add any functions not referenced in those anyway for model-evaluation. + deriveFunctionProperties(functions, functionProperties, functionContext); + + for (Map.Entry<String, String> e : functionProperties.entrySet()) { + rankProperties.add(new RankProfile.RankProperty(e.getKey(), e.getValue())); + } + } + + private void deriveFunctionProperties(Map<String, RankProfile.RankingExpressionFunction> functions, + Map<String, String> functionProperties, + SerializationContext context) { + for (Map.Entry<String, RankProfile.RankingExpressionFunction> e : functions.entrySet()) { + String propertyName = RankingExpression.propertyName(e.getKey()); + if (context.serializedFunctions().containsKey(propertyName)) continue; + + String expressionString = e.getValue().function().getBody().getRoot().toString(context).toString(); + + context.addFunctionSerialization(propertyName, expressionString); + for (Map.Entry<String, TensorType> argumentType : e.getValue().function().argumentTypes().entrySet()) + context.addArgumentTypeSerialization(e.getKey(), argumentType.getKey(), argumentType.getValue()); + if (e.getValue().function().returnType().isPresent()) + context.addFunctionTypeSerialization(e.getKey(), e.getValue().function().returnType().get()); + // else if (e.getValue().function().arguments().isEmpty()) TODO: Enable this check when we resolve all types + // throw new IllegalStateException("Type of function '" + e.getKey() + "' is not resolved"); + } + functionProperties.putAll(context.serializedFunctions()); + } + + private void replaceFunctionFeatures(Set<ReferenceNode> features, SerializationContext context) { + if (features == null) return; + Map<String, ReferenceNode> functionFeatures = new LinkedHashMap<>(); + for (Iterator<ReferenceNode> i = features.iterator(); i.hasNext(); ) { + ReferenceNode referenceNode = i.next(); + // Is the feature a function? + ExpressionFunction function = context.getFunction(referenceNode.getName()); + if (function != null) { + String propertyName = RankingExpression.propertyName(referenceNode.getName()); + String expressionString = function.getBody().getRoot().toString(context).toString(); + context.addFunctionSerialization(propertyName, expressionString); + ReferenceNode backendReferenceNode = new ReferenceNode("rankingExpression(" + referenceNode.getName() + ")", + referenceNode.getArguments().expressions(), + referenceNode.getOutput()); + if (mapBackRankingExpressionFeatures) { + // tell backend to map back to the name the user expects: + featureRenames.put(backendReferenceNode.toString(), referenceNode.toString()); + } + functionFeatures.put(referenceNode.getName(), backendReferenceNode); + i.remove(); // Will add the expanded one in next block + } + } + // Then, replace the features that were functions + for (Map.Entry<String, ReferenceNode> e : functionFeatures.entrySet()) { + features.add(e.getValue()); + } + } + + private void deriveWeightProperties(RankProfile rankProfile) { + + for (RankProfile.RankSetting setting : rankProfile.rankSettings()) { + if (setting.getType() != RankProfile.RankSetting.Type.WEIGHT) continue; + boostAndWeightRankProperties.add(new RankProfile.RankProperty("vespa.fieldweight." + setting.getFieldName(), + String.valueOf(setting.getIntValue()))); + } + } + + /** + * Adds the type boosts from a rank profile + */ + private void deriveRankTypeSetting(RankProfile rankProfile, AttributeFields attributeFields) { + for (Iterator<RankProfile.RankSetting> i = rankProfile.rankSettingIterator(); i.hasNext(); ) { + RankProfile.RankSetting setting = i.next(); + if (setting.getType() != RankProfile.RankSetting.Type.RANKTYPE) continue; + + deriveNativeRankTypeSetting(setting.getFieldName(), (RankType) setting.getValue(), attributeFields, + hasDefaultRankTypeSetting(rankProfile, setting.getFieldName())); + } + } + + private void deriveNativeRankTypeSetting(String fieldName, RankType rankType, AttributeFields attributeFields, + boolean isDefaultSetting) { + if (isDefaultSetting) return; + + NativeRankTypeDefinition definition = nativeRankTypeDefinitions.getRankTypeDefinition(rankType); + if (definition == null) throw new IllegalArgumentException("In field '" + fieldName + "': " + + rankType + " is known but has no implementation. " + + "Supported rank types: " + + nativeRankTypeDefinitions.types().keySet()); + + FieldRankSettings settings = deriveFieldRankSettings(fieldName); + for (Iterator<NativeTable> i = definition.rankSettingIterator(); i.hasNext(); ) { + NativeTable table = i.next(); + // only add index field tables if we are processing an index field and + // only add attribute field tables if we are processing an attribute field + if ((FieldRankSettings.isIndexFieldTable(table) && attributeFields.getAttribute(fieldName) == null) || + (FieldRankSettings.isAttributeFieldTable(table) && attributeFields.getAttribute(fieldName) != null)) { + settings.addTable(table); + } + } + } + + private boolean hasDefaultRankTypeSetting(RankProfile rankProfile, String fieldName) { + RankProfile.RankSetting setting = + rankProfile.getRankSetting(fieldName, RankProfile.RankSetting.Type.RANKTYPE); + return setting != null && setting.getValue().equals(RankType.DEFAULT); + } + + private FieldRankSettings deriveFieldRankSettings(String fieldName) { + FieldRankSettings settings = fieldRankSettings.get(fieldName); + if (settings == null) { + settings = new FieldRankSettings(fieldName); + fieldRankSettings.put(fieldName, settings); + } + return settings; + } + + /** Derives the properties this produces */ + public List<Pair<String, String>> derive(LargeRankExpressions largeRankExpressions) { + List<Pair<String, String>> properties = new ArrayList<>(); + for (RankProfile.RankProperty property : rankProperties) { + if (RankingExpression.propertyName(RankProfile.FIRST_PHASE).equals(property.getName())) { + // Could have been set by function expansion. Set expressions, then skip this property. + try { + firstPhaseRanking = new RankingExpression(property.getValue()); + } catch (ParseException e) { + throw new IllegalArgumentException("Could not parse first phase expression", e); + } + } + else if (RankingExpression.propertyName(RankProfile.SECOND_PHASE).equals(property.getName())) { + try { + secondPhaseRanking = new RankingExpression(property.getValue()); + } catch (ParseException e) { + throw new IllegalArgumentException("Could not parse second phase expression", e); + } + } + else { + properties.add(new Pair<>(property.getName(), property.getValue())); + } + } + properties.addAll(deriveRankingPhaseRankProperties(firstPhaseRanking, RankProfile.FIRST_PHASE)); + properties.addAll(deriveRankingPhaseRankProperties(secondPhaseRanking, RankProfile.SECOND_PHASE)); + for (FieldRankSettings settings : fieldRankSettings.values()) { + properties.addAll(settings.deriveRankProperties()); + } + for (RankProfile.RankProperty property : boostAndWeightRankProperties) { + properties.add(new Pair<>(property.getName(), property.getValue())); + } + for (ReferenceNode feature : summaryFeatures) { + properties.add(new Pair<>("vespa.summary.feature", feature.toString())); + } + for (ReferenceNode feature : matchFeatures) { + properties.add(new Pair<>("vespa.match.feature", feature.toString())); + } + for (ReferenceNode feature : rankFeatures) { + properties.add(new Pair<>("vespa.dump.feature", feature.toString())); + } + for (var entry : featureRenames.entrySet()) { + properties.add(new Pair<>("vespa.feature.rename", entry.getKey())); + properties.add(new Pair<>("vespa.feature.rename", entry.getValue())); + } + if (numThreadsPerSearch > 0) { + properties.add(new Pair<>("vespa.matching.numthreadspersearch", numThreadsPerSearch + "")); + } + if (minHitsPerThread > 0) { + properties.add(new Pair<>("vespa.matching.minhitsperthread", minHitsPerThread + "")); + } + if (numSearchPartitions >= 0) { + properties.add(new Pair<>("vespa.matching.numsearchpartitions", numSearchPartitions + "")); + } + if (termwiseLimit < 1.0) { + properties.add(new Pair<>("vespa.matching.termwise_limit", termwiseLimit + "")); + } + if (postFilterThreshold.isPresent()) { + properties.add(new Pair<>("vespa.matching.global_filter.upper_limit", String.valueOf(postFilterThreshold.getAsDouble()))); + } + if (approximateThreshold.isPresent()) { + properties.add(new Pair<>("vespa.matching.global_filter.lower_limit", String.valueOf(approximateThreshold.getAsDouble()))); + } + if (matchPhaseSettings != null) { + properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute())); + properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + "")); + properties.add(new Pair<>("vespa.matchphase.degradation.maxhits", matchPhaseSettings.getMaxHits() + "")); + properties.add(new Pair<>("vespa.matchphase.degradation.maxfiltercoverage", matchPhaseSettings.getMaxFilterCoverage() + "")); + properties.add(new Pair<>("vespa.matchphase.degradation.samplepercentage", matchPhaseSettings.getEvaluationPoint() + "")); + properties.add(new Pair<>("vespa.matchphase.degradation.postfiltermultiplier", matchPhaseSettings.getPrePostFilterTippingPoint() + "")); + RankProfile.DiversitySettings diversitySettings = matchPhaseSettings.getDiversity(); + if (diversitySettings != null) { + properties.add(new Pair<>("vespa.matchphase.diversity.attribute", diversitySettings.getAttribute())); + properties.add(new Pair<>("vespa.matchphase.diversity.mingroups", String.valueOf(diversitySettings.getMinGroups()))); + properties.add(new Pair<>("vespa.matchphase.diversity.cutoff.factor", String.valueOf(diversitySettings.getCutoffFactor()))); + properties.add(new Pair<>("vespa.matchphase.diversity.cutoff.strategy", String.valueOf(diversitySettings.getCutoffStrategy()))); + } + } + if (rerankCount > -1) { + properties.add(new Pair<>("vespa.hitcollector.heapsize", rerankCount + "")); + } + if (keepRankCount > -1) { + properties.add(new Pair<>("vespa.hitcollector.arraysize", keepRankCount + "")); + } + if (rankScoreDropLimit > -Double.MAX_VALUE) { + properties.add(new Pair<>("vespa.hitcollector.rankscoredroplimit", rankScoreDropLimit + "")); + } + if (ignoreDefaultRankFeatures) { + properties.add(new Pair<>("vespa.dump.ignoredefaultfeatures", String.valueOf(true))); + } + for (String fieldName : filterFields) { + properties.add(new Pair<>("vespa.isfilterfield." + fieldName, String.valueOf(true))); + } + for (Map.Entry<String, String> attributeType : attributeTypes.entrySet()) { + properties.add(new Pair<>("vespa.type.attribute." + attributeType.getKey(), attributeType.getValue())); + } + + for (var input : inputs.values()) { + if (FeatureNames.isQueryFeature(input.name())) { + if (input.type().rank() > 0) // Proton does not like representing the double type as a rank 0 tensor + properties.add(new Pair<>("vespa.type.query." + input.name().arguments().expressions().get(0), + input.type().toString())); + if (input.defaultValue().isPresent()) { + properties.add(new Pair<>(input.name().toString(), + input.type().rank() == 0 ? + String.valueOf(input.defaultValue().get().asDouble()) : + input.defaultValue().get().toString(true, false))); + } + } + } + if (properties.size() >= 1000000) throw new IllegalArgumentException("Too many rank properties"); + distributeLargeExpressionsAsFiles(properties, largeRankExpressions); + return properties; + } + + private void distributeLargeExpressionsAsFiles(List<Pair<String, String>> properties, LargeRankExpressions largeRankExpressions) { + for (ListIterator<Pair<String, String>> iter = properties.listIterator(); iter.hasNext();) { + Pair<String, String> property = iter.next(); + String expression = property.getSecond(); + if (expression.length() > largeRankExpressions.limit()) { + String propertyName = property.getFirst(); + String functionName = RankingExpression.extractScriptName(propertyName); + if (functionName != null) { + String mangledName = rankprofileName + "." + functionName; + largeRankExpressions.add(new RankExpressionBody(mangledName, ByteBuffer.wrap(expression.getBytes(StandardCharsets.UTF_8)))); + iter.set(new Pair<>(RankingExpression.propertyExpressionName(functionName), mangledName)); + } + } + } + } + + private List<Pair<String, String>> deriveRankingPhaseRankProperties(RankingExpression expression, String phase) { + List<Pair<String, String>> properties = new ArrayList<>(); + if (expression == null) return properties; + + String name = expression.getName(); + if ("".equals(name)) + name = phase; + + if (expression.getRoot() instanceof ReferenceNode) { + properties.add(new Pair<>("vespa.rank." + phase, expression.getRoot().toString())); + } else { + properties.add(new Pair<>("vespa.rank." + phase, "rankingExpression(" + name + ")")); + properties.add(new Pair<>(RankingExpression.propertyName(name), expression.getRoot().toString())); + } + return properties; + } + + private void deriveOnnxModelFunctionsAndFeatures(RankProfile rankProfile) { + if (rankProfile.schema() == null) return; + if (rankProfile.onnxModels().isEmpty()) return; + replaceOnnxFunctionInputs(rankProfile); + replaceImplicitOnnxConfigFeatures(summaryFeatures, rankProfile); + replaceImplicitOnnxConfigFeatures(matchFeatures, rankProfile); + } + + private void replaceOnnxFunctionInputs(RankProfile rankProfile) { + Set<String> functionNames = rankProfile.getFunctions().keySet(); + if (functionNames.isEmpty()) return; + for (OnnxModel onnxModel: rankProfile.onnxModels().values()) { + for (Map.Entry<String, String> mapping : onnxModel.getInputMap().entrySet()) { + String source = mapping.getValue(); + if (functionNames.contains(source)) { + onnxModel.addInputNameMapping(mapping.getKey(), "rankingExpression(" + source + ")"); + } + } + } + } + + private void replaceImplicitOnnxConfigFeatures(Set<ReferenceNode> features, RankProfile rankProfile) { + if (features == null || features.isEmpty()) return; + Set<ReferenceNode> replacedFeatures = new HashSet<>(); + for (Iterator<ReferenceNode> i = features.iterator(); i.hasNext(); ) { + ReferenceNode referenceNode = i.next(); + ReferenceNode replacedNode = (ReferenceNode) OnnxModelTransformer.transformFeature(referenceNode, rankProfile); + if (referenceNode != replacedNode) { + replacedFeatures.add(replacedNode); + i.remove(); + } + } + features.addAll(replacedFeatures); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java new file mode 100644 index 00000000000..18c6f335787 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java @@ -0,0 +1,129 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.search.config.SchemaInfoConfig; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Information about a schema. + * + * @author bratseth + */ +public final class SchemaInfo extends Derived implements SchemaInfoConfig.Producer { + + private final Schema schema; + + // Info about profiles needed in memory after build. + // The rank profile registry itself is not kept around due to its size. + private final Map<String, RankProfileInfo> rankProfiles; + + private final Summaries summaries; + private final SummaryMap summaryMap; + + public SchemaInfo(Schema schema, RankProfileRegistry rankProfileRegistry, + Summaries summaries, SummaryMap summaryMap) { + this.schema = schema; + this.rankProfiles = Collections.unmodifiableMap(toRankProfiles(rankProfileRegistry.rankProfilesOf(schema))); + this.summaries = summaries; + this.summaryMap = summaryMap; + } + + public String name() { return schema.getName(); } + + @Override + public String getDerivedName() { return "schema-info"; } + + public Schema fullSchema() { return schema; } + + public Map<String, RankProfileInfo> rankProfiles() { return rankProfiles; } + + private Map<String, RankProfileInfo> toRankProfiles(Collection<RankProfile> rankProfiles) { + Map<String, RankProfileInfo> rankProfileInfos = new LinkedHashMap<>(); + rankProfiles.forEach(profile -> rankProfileInfos.put(profile.name(), new RankProfileInfo(profile))); + return rankProfileInfos; + } + + @Override + public void getConfig(SchemaInfoConfig.Builder builder) { + var schemaBuilder = new SchemaInfoConfig.Schema.Builder(); + schemaBuilder.name(schema.getName()); + addSummaryConfig(schemaBuilder); + addRankProfilesConfig(schemaBuilder); + builder.schema(schemaBuilder); + } + + private void addSummaryConfig(SchemaInfoConfig.Schema.Builder schemaBuilder) { + for (var summary : summaries.asList()) { + var summaryBuilder = new SchemaInfoConfig.Schema.Summaryclass.Builder(); + summaryBuilder.name(summary.getName()); + for (var field : summary.fields().values()) { + var fieldsBuilder = new SchemaInfoConfig.Schema.Summaryclass.Fields.Builder(); + fieldsBuilder.name(field.getName()) + .type(field.getType().getName()) + .dynamic(isDynamic(field.getName())); + summaryBuilder.fields(fieldsBuilder); + } + schemaBuilder.summaryclass(summaryBuilder); + } + } + + /** Returns whether the given field is a dynamic summary field. */ + private boolean isDynamic(String fieldName) { + if (summaryMap == null) return false; // not know for streaming, but also not used + + var fieldTransform = summaryMap.resultTransforms().get(fieldName); + if (fieldTransform == null) return false; + // TODO: Move this into SummaryTransform and call it something else than "dynamic" + return fieldTransform.getTransform().isDynamic() || + fieldTransform.getTransform() == SummaryTransform.MATCHED_ELEMENTS_FILTER || + fieldTransform.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER; + } + + private void addRankProfilesConfig(SchemaInfoConfig.Schema.Builder schemaBuilder) { + for (RankProfileInfo rankProfile : rankProfiles().values()) { + var rankProfileConfig = new SchemaInfoConfig.Schema.Rankprofile.Builder(); + rankProfileConfig.name(rankProfile.name()); + rankProfileConfig.hasSummaryFeatures(rankProfile.hasSummaryFeatures()); + rankProfileConfig.hasRankFeatures(rankProfile.hasRankFeatures()); + for (var input : rankProfile.inputs().entrySet()) { + var inputConfig = new SchemaInfoConfig.Schema.Rankprofile.Input.Builder(); + inputConfig.name(input.getKey().toString()); + inputConfig.type(input.getValue().type().toString()); + rankProfileConfig.input(inputConfig); + } + schemaBuilder.rankprofile(rankProfileConfig); + } + } + + /** A store of a *small* (in memory) amount of rank profile info. */ + public static final class RankProfileInfo { + + private final String name; + private final boolean hasSummaryFeatures; + private final boolean hasRankFeatures; + private final Map<Reference, RankProfile.Input> inputs; + + public RankProfileInfo(RankProfile profile) { + this.name = profile.name(); + this.hasSummaryFeatures = ! profile.getSummaryFeatures().isEmpty(); + this.hasRankFeatures = ! profile.getRankFeatures().isEmpty(); + this.inputs = profile.inputs(); + } + + public String name() { return name; } + public boolean hasSummaryFeatures() { return hasSummaryFeatures; } + public boolean hasRankFeatures() { return hasRankFeatures; } + public Map<Reference, RankProfile.Input> inputs() { return inputs; } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SearchOrderer.java b/config-model/src/main/java/com/yahoo/schema/derived/SearchOrderer.java new file mode 100644 index 00000000000..3bab808beff --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SearchOrderer.java @@ -0,0 +1,123 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.DataTypeName; +import com.yahoo.schema.DocumentReference; +import com.yahoo.schema.DocumentReferences; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDDocumentType; + +import java.util.*; + +/** + * <p>A class which can reorder a list of search definitions such that any supertype + * always preceed any subtype. Subject to this condition the given order + * is preserved (the minimal reordering is done).</p> + * + * <p>This class is <b>not</b> multithread safe. Only one ordering must be done + * at the time in any instance.</p> + * + * @author bratseth + * @author bjorncs + */ +public class SearchOrderer { + + /** A map from DataTypeName to the Search defining them */ + private final Map<DataTypeName, Schema> documentNameToSearch = new HashMap<>(); + + /** + * Reorders the given list of search definitions such that any supertype + * always preceed any subtype. Subject to this condition the given order + * is preserved (the minimal reordering is done). + * + * @return a new list containing the same search instances in the right order + */ + public List<Schema> order(List<Schema> unordered) { + // Description above state that the original order should be preserved, except for the dependency constraint. + // Yet we botch that guarantee by sorting the list... + unordered.sort(Comparator.comparing(Schema::getName)); + + // No, this is not a fast algorithm... + indexOnDocumentName(unordered); + List<Schema> ordered = new ArrayList<>(unordered.size()); + List<Schema> moveOutwards = new ArrayList<>(); + for (Schema schema : unordered) { + if (allDependenciesAlreadyEmitted(ordered, schema)) { + addOrdered(ordered, schema, moveOutwards); + } + else { + moveOutwards.add(schema); + } + } + + // Any leftovers means we have search definitions with undefined inheritants. + // This is warned about elsewhere. + ordered.addAll(moveOutwards); + + documentNameToSearch.clear(); + return ordered; + } + + private void addOrdered(List<Schema> ordered, Schema schema, List<Schema> moveOutwards) { + ordered.add(schema); + Schema eligibleMove; + do { + eligibleMove = removeFirstEntryWithFullyEmittedDependencies(moveOutwards, ordered); + if (eligibleMove != null) { + ordered.add(eligibleMove); + } + } while (eligibleMove != null); + } + + /** Removes and returns the first search from the move list which can now be added, or null if none */ + private Schema removeFirstEntryWithFullyEmittedDependencies(List<Schema> moveOutwards, List<Schema> ordered) { + for (Schema move : moveOutwards) { + if (allDependenciesAlreadyEmitted(ordered, move)) { + moveOutwards.remove(move); + return move; + } + } + return null; + } + + private boolean allDependenciesAlreadyEmitted(List<Schema> alreadyOrdered, Schema schema) { + if (schema.getDocument() == null) { + return true; + } + SDDocumentType document = schema.getDocument(); + return allInheritedDependenciesEmitted(alreadyOrdered, document) && allReferenceDependenciesEmitted(alreadyOrdered, document); + } + + private boolean allInheritedDependenciesEmitted(List<Schema> alreadyOrdered, SDDocumentType document) { + for (SDDocumentType sdoc : document.getInheritedTypes() ) { + DataTypeName inheritedName = sdoc.getDocumentName(); + if ("document".equals(inheritedName.getName())) { + continue; + } + Schema inheritedSchema = documentNameToSearch.get(inheritedName); + if (!alreadyOrdered.contains(inheritedSchema)) { + return false; + } + } + return true; + } + + private static boolean allReferenceDependenciesEmitted(List<Schema> alreadyOrdered, SDDocumentType document) { + DocumentReferences documentReferences = document.getDocumentReferences() + .orElseThrow(() -> new IllegalStateException("Missing document references. Should have been processed by now.")); + return documentReferences.stream() + .map(Map.Entry::getValue) + .map(DocumentReference::targetSearch) + .allMatch(alreadyOrdered::contains); + } + + private void indexOnDocumentName(List<Schema> schemas) { + documentNameToSearch.clear(); + for (Schema schema : schemas) { + if (schema.getDocument() != null) { + documentNameToSearch.put(schema.getDocument().getDocumentName(), schema); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/Summaries.java b/config-model/src/main/java/com/yahoo/schema/derived/Summaries.java new file mode 100644 index 00000000000..2b41fbb3b1a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/Summaries.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.config.search.SummaryConfig; + +import java.util.ArrayList; +import java.util.List; + +/** + * A list of derived summaries + * + * @author bratseth + */ +public class Summaries extends Derived implements SummaryConfig.Producer { + + private final boolean useV8GeoPositions; + private final List<SummaryClass> summaries; + + public Summaries(Schema schema, DeployLogger deployLogger, ModelContext.FeatureFlags featureFlags) { + super(); + this.useV8GeoPositions = featureFlags.useV8GeoPositions(); + + // Make sure the default is first + List<SummaryClass> summaries = new ArrayList<>(); + summaries.add(new SummaryClass(schema, schema.getSummary("default"), deployLogger)); + for (DocumentSummary summary : schema.getSummaries().values()) { + if (!summary.getName().equals("default")) + summaries.add(new SummaryClass(schema, summary, deployLogger)); + } + this.summaries = List.copyOf(summaries); + } + + public List<SummaryClass> asList() { return summaries; } + + @Override + protected String getDerivedName() { return "summary"; } + + @Override + public void getConfig(SummaryConfig.Builder builder) { + builder.defaultsummaryid(summaries.isEmpty() ? -1 : summaries.get(0).hashCode()); + builder.usev8geopositions(useV8GeoPositions); + for (SummaryClass summaryClass : summaries) { + builder.classes(summaryClass.getSummaryClassConfig()); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java new file mode 100644 index 00000000000..193c6893203 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java @@ -0,0 +1,133 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.prelude.fastsearch.DocsumDefinitionSet; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.config.search.SummaryConfig; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +import java.util.Collections; +import java.util.Map; +import java.util.logging.Level; + +/** + * A summary derived from a search definition. + * Each summary definition have at least one summary, the default + * which has the same name as the search definition. + * + * @author bratseth + */ +public class SummaryClass extends Derived { + + public static final String DOCUMENT_ID_FIELD = "documentid"; + + private final int id; + + /** True if this summary class needs to access summary information on disk */ + private boolean accessingDiskSummary = false; + private final boolean rawAsBase64; + private final boolean omitSummaryFeatures; + + /** The summary fields of this indexed by name */ + private final Map<String, SummaryClassField> fields; + + private final DeployLogger deployLogger; + + /** + * Creates a summary class from a search definition summary + * + * @param deployLogger a {@link DeployLogger} + */ + public SummaryClass(Schema schema, DocumentSummary summary, DeployLogger deployLogger) { + super(summary.getName()); + this.deployLogger = deployLogger; + this.rawAsBase64 = schema.isRawAsBase64(); + this.omitSummaryFeatures = summary.omitSummaryFeatures(); + Map<String, SummaryClassField> fields = new java.util.LinkedHashMap<>(); + deriveFields(schema, summary, fields); + deriveImplicitFields(summary, fields); + this.fields = Collections.unmodifiableMap(fields); + this.id = deriveId(summary.getName(), fields); + } + + public int id() { return id; } + + /** MUST be called after all other fields are added */ + private void deriveImplicitFields(DocumentSummary summary, Map<String, SummaryClassField> fields) { + if (summary.getName().equals("default")) { + addField(SummaryClass.DOCUMENT_ID_FIELD, DataType.STRING, fields); + } + } + + private void deriveFields(Schema schema, DocumentSummary summary, Map<String, SummaryClassField> fields) { + for (SummaryField summaryField : summary.getSummaryFields().values()) { + if (!accessingDiskSummary && schema.isAccessingDiskSummary(summaryField)) { + accessingDiskSummary = true; + } + addField(summaryField.getName(), summaryField.getDataType(), summaryField.getTransform(), fields); + } + } + + private void addField(String name, DataType type, Map<String, SummaryClassField> fields) { + addField(name, type, null, fields); + } + + private void addField(String name, DataType type, + SummaryTransform transform, + Map<String, SummaryClassField> fields) { + if (fields.containsKey(name)) { + SummaryClassField sf = fields.get(name); + if ( SummaryClassField.convertDataType(type, transform, rawAsBase64) != sf.getType()) { + deployLogger.logApplicationPackage(Level.WARNING, "Conflicting definition of field " + name + + ". " + "Declared as type " + sf.getType() + " and " + type); + } + } else { + fields.put(name, new SummaryClassField(name, type, transform, rawAsBase64)); + } + } + + public Map<String, SummaryClassField> fields() { return fields; } + + private static int deriveId(String name, Map<String, SummaryClassField> fields) { + int hash = name.hashCode(); + int number = 1; + for (var field : fields.values()) { + hash += number++ * (field.getName().hashCode() + + 17 * field.getType().getName().hashCode()); + } + hash = Math.abs(hash); + if (hash == DocsumDefinitionSet.SLIME_MAGIC_ID) + hash++; + return hash; + } + + public SummaryConfig.Classes.Builder getSummaryClassConfig() { + SummaryConfig.Classes.Builder classBuilder = new SummaryConfig.Classes.Builder(); + classBuilder. + id(id). + name(getName()). + omitsummaryfeatures(omitSummaryFeatures); + for (SummaryClassField field : fields.values() ) { + classBuilder.fields(new SummaryConfig.Classes.Fields.Builder(). + name(field.getName()). + type(field.getType().getName())); + } + return classBuilder; + } + + @Override + public int hashCode() { return id; } + + @Override + protected String getDerivedName() { return "summary"; } + + @Override + public String toString() { + return "summary class '" + getName() + "'"; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java new file mode 100644 index 00000000000..f042054a0b5 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java @@ -0,0 +1,132 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.MapDataType; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.document.datatypes.BoolFieldValue; +import com.yahoo.document.datatypes.ByteFieldValue; +import com.yahoo.document.datatypes.DoubleFieldValue; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.Float16FieldValue; +import com.yahoo.document.datatypes.FloatFieldValue; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.document.datatypes.PredicateFieldValue; +import com.yahoo.document.datatypes.Raw; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.Struct; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +/** + * A summary field derived from a search definition + * + * @author bratseth + */ +public class SummaryClassField { + + private final String name; + + private final Type type; + + /** The summary field type enumeration */ + public enum Type { + + BOOL("bool"), + BYTE("byte"), + SHORT("short"), + INTEGER("integer"), + INT64("int64"), + FLOAT16("float16"), + FLOAT("float"), + DOUBLE("double"), + STRING("string"), + DATA("data"), + RAW("raw"), + LONGSTRING("longstring"), + LONGDATA("longdata"), + XMLSTRING("xmlstring"), + FEATUREDATA("featuredata"), + JSONSTRING("jsonstring"), + TENSOR("tensor"); + + private final String name; + + Type(String name) { + this.name = name; + } + + /** Returns the name of this type */ + public String getName() { + return name; + } + + public String toString() { + return "type: " + name; + } + } + + public SummaryClassField(String name, DataType type, SummaryTransform transform, boolean rawAsBase64) { + this.name = name; + this.type = convertDataType(type, transform, rawAsBase64); + } + + public String getName() { return name; } + + public Type getType() { return type; } + + /** Converts to the right summary field type from a field datatype and a transform*/ + public static Type convertDataType(DataType fieldType, SummaryTransform transform, boolean rawAsBase64) { + FieldValue fval = fieldType.createFieldValue(); + if (fval instanceof StringFieldValue) { + if (transform != null && transform.equals(SummaryTransform.RANKFEATURES)) { + return Type.FEATUREDATA; + } else if (transform != null && transform.equals(SummaryTransform.SUMMARYFEATURES)) { + return Type.FEATUREDATA; + } else { + return Type.LONGSTRING; + } + } else if (fval instanceof IntegerFieldValue) { + return Type.INTEGER; + } else if (fval instanceof LongFieldValue) { + return Type.INT64; + } else if (fval instanceof Float16FieldValue) { + return Type.FLOAT16; + } else if (fval instanceof FloatFieldValue) { + return Type.FLOAT; + } else if (fval instanceof DoubleFieldValue) { + return Type.DOUBLE; + } else if (fval instanceof BoolFieldValue) { + return Type.BOOL; + } else if (fval instanceof ByteFieldValue) { + return Type.BYTE; + } else if (fval instanceof Raw) { + return rawAsBase64 ? Type.RAW : Type.DATA; + } else if (fval instanceof Struct) { + return Type.JSONSTRING; + } else if (fval instanceof PredicateFieldValue) { + return Type.STRING; + } else if (fval instanceof TensorFieldValue) { + return Type.TENSOR; + } else if (fieldType instanceof CollectionDataType) { + if (transform != null && transform.equals(SummaryTransform.POSITIONS)) { + return Type.XMLSTRING; + } else { + return Type.JSONSTRING; + } + } else if (fieldType instanceof MapDataType) { + return Type.JSONSTRING; + } else if (fieldType instanceof NewDocumentReferenceDataType) { + return Type.LONGSTRING; + } else { + throw new IllegalArgumentException("Don't know which summary type to convert " + fieldType + " to"); + } + } + + public String toString() { + return "summary class field " + name; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryMap.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryMap.java new file mode 100644 index 00000000000..df9174a12ed --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryMap.java @@ -0,0 +1,120 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.config.search.SummarymapConfig; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +import java.util.Collections; +import java.util.Map; + +/** + * A summary map (describing search-time summary field transformations) + * derived from a Schema. + * + * @author bratseth + */ +public class SummaryMap extends Derived implements SummarymapConfig.Producer { + + private final Map<String, FieldResultTransform> resultTransforms = new java.util.LinkedHashMap<>(); + + /** Creates a summary map from a search definition */ + SummaryMap(Schema schema) { + derive(schema); + } + + protected void derive(Schema schema) { + for (DocumentSummary documentSummary : schema.getSummaries().values()) { + derive(documentSummary); + } + super.derive(schema); + } + + @Override + protected void derive(ImmutableSDField field, Schema schema) { + } + + private void derive(DocumentSummary documentSummary) { + for (SummaryField summaryField : documentSummary.getSummaryFields().values()) { + if (summaryField.getTransform()== SummaryTransform.NONE) continue; + + if (summaryField.getTransform()==SummaryTransform.ATTRIBUTE || + summaryField.getTransform()==SummaryTransform.DISTANCE || + summaryField.getTransform()==SummaryTransform.GEOPOS || + summaryField.getTransform()==SummaryTransform.POSITIONS || + summaryField.getTransform()==SummaryTransform.MATCHED_ELEMENTS_FILTER || + summaryField.getTransform()==SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER) + { + resultTransforms.put(summaryField.getName(), new FieldResultTransform(summaryField.getName(), + summaryField.getTransform(), + summaryField.getSingleSource())); + } else { + // Note: Currently source mapping is handled in the indexing statement, + // by creating a summary field for each of the values + // This works, but is suboptimal. We could consolidate to a minimal set and + // use the right value from the minimal set as the third parameter here, + // and add "override" commands to multiple static values + resultTransforms.put(summaryField.getName(), new FieldResultTransform(summaryField.getName(), + summaryField.getTransform(), + summaryField.getName())); + } + } + } + + /** Returns a read-only iterator of the FieldResultTransforms of this summary map */ + public Map<String, FieldResultTransform> resultTransforms() { + return Collections.unmodifiableMap(resultTransforms); + } + + protected String getDerivedName() { return "summarymap"; } + + /** Returns the command name of a transform */ + private String getCommand(SummaryTransform transform) { + if (transform == SummaryTransform.DISTANCE) + return "absdist"; + else if (transform.isDynamic()) + return "dynamicteaser"; + else + return transform.getName(); + } + + /** + * Does this summary command name stand for a dynamic transform? + * We need this because some model information is shared through configs instead of model - see usage + * A dynamic transform needs the query to perform its computations. + */ + // TODO/Note: "dynamic" here means something else than in SummaryTransform + public static boolean isDynamicCommand(String commandName) { + return (commandName.equals("dynamicteaser") || + commandName.equals(SummaryTransform.MATCHED_ELEMENTS_FILTER.getName()) || + commandName.equals(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER.getName())); + } + + @Override + public void getConfig(SummarymapConfig.Builder builder) { + builder.defaultoutputclass(-1); + for (FieldResultTransform frt : resultTransforms.values()) { + SummarymapConfig.Override.Builder oB = new SummarymapConfig.Override.Builder() + .field(frt.getFieldName()) + .command(getCommand(frt.getTransform())); + if (frt.getTransform().isDynamic() || + frt.getTransform().equals(SummaryTransform.ATTRIBUTE) || + frt.getTransform().equals(SummaryTransform.DISTANCE) || + frt.getTransform().equals(SummaryTransform.GEOPOS) || + frt.getTransform().equals(SummaryTransform.POSITIONS) || + frt.getTransform().equals(SummaryTransform.TEXTEXTRACTOR) || + frt.getTransform().equals(SummaryTransform.MATCHED_ELEMENTS_FILTER) || + frt.getTransform().equals(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER)) + { + oB.arguments(frt.getArgument()); + } else { + oB.arguments(""); + } + builder.override(oB); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java new file mode 100644 index 00000000000..c8679b6166c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/VsmFields.java @@ -0,0 +1,313 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.NumericDataType; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.document.datatypes.BoolFieldValue; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.PredicateFieldValue; +import com.yahoo.document.datatypes.Raw; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.schema.FieldSets; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.FieldSet; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.config.search.vsm.VsmfieldsConfig; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Vertical streaming matcher field specification + */ +public class VsmFields extends Derived implements VsmfieldsConfig.Producer { + + private final Map<String, StreamingField> fields=new LinkedHashMap<>(); + private final Map<String, StreamingDocumentType> doctypes=new LinkedHashMap<>(); + + public VsmFields(Schema schema) { + addSearchdefinition(schema); + } + + private void addSearchdefinition(Schema schema) { + derive(schema); + } + + @Override + protected void derive(SDDocumentType document, Schema schema) { + super.derive(document, schema); + StreamingDocumentType docType=getDocumentType(document.getName()); + if (docType == null) { + docType = new StreamingDocumentType(document.getName(), schema.fieldSets()); + doctypes.put(document.getName(), docType); + } + for (Object o : document.fieldSet()) { + derive(docType, (SDField) o); + } + } + + protected void derive(StreamingDocumentType document, SDField field) { + if (field.usesStructOrMap()) { + if (GeoPos.isAnyPos(field)) { + StreamingField streamingField = new StreamingField(field); + addField(streamingField.getName(), streamingField); + addFieldToIndices(document, field.getName(), streamingField); + } + for (SDField structField : field.getStructFields()) { + derive(document, structField); // Recursion + } + } else { + if (! (field.doesIndexing() || field.doesSummarying() || field.doesAttributing()) ) + return; + + StreamingField streamingField = new StreamingField(field); + addField(streamingField.getName(),streamingField); + deriveIndices(document, field, streamingField); + } + } + + private void deriveIndices(StreamingDocumentType document, SDField field, StreamingField streamingField) { + if (field.doesIndexing()) { + addFieldToIndices(document, field.getName(), streamingField); + } else if (field.doesAttributing()) { + for (String indexName : field.getAttributes().keySet()) { + addFieldToIndices(document, indexName, streamingField); + } + } + } + + private void addFieldToIndices(StreamingDocumentType document, String indexName, StreamingField streamingField) { + if (indexName.contains(".")) { + addFieldToIndices(document, indexName.substring(0,indexName.lastIndexOf(".")), streamingField); // Recursion + } + document.addIndexField(indexName, streamingField.getName()); + } + + private void addField(String name, StreamingField field) { + fields.put(name, field); + } + + /** Returns a streaming index, or null if there is none with this name */ + public StreamingDocumentType getDocumentType(String name) { + return doctypes.get(name); + } + + public String getDerivedName() { + return "vsmfields"; + } + + @Override + public void getConfig(VsmfieldsConfig.Builder vsB) { + for (StreamingField streamingField : fields.values()) { + vsB.fieldspec(streamingField.getFieldSpecConfig()); + } + for (StreamingDocumentType streamingDocType : doctypes.values()) { + vsB.documenttype(streamingDocType.getDocTypeConfig()); + } + } + + private static class StreamingField { + + private final String name; + + /** Whether this field does prefix matching by default */ + private final Matching matching; + + /** The type of this field */ + private final Type type; + + private final boolean isAttribute; + + /** The streaming field type enumeration */ + public static class Type { + + public static Type INT8 = new Type("int8","INT8"); + public static Type INT16 = new Type("int16","INT16"); + public static Type INT32 = new Type("int32","INT32"); + public static Type INT64 = new Type("int64","INT64"); + public static Type FLOAT16 = new Type("float16", "FLOAT16"); + public static Type FLOAT = new Type("float","FLOAT"); + public static Type DOUBLE = new Type("double","DOUBLE"); + public static Type STRING = new Type("string","AUTOUTF8"); + public static Type BOOL = new Type("bool","BOOL"); + public static Type UNSEARCHABLESTRING = new Type("string","NONE"); + public static Type GEO_POSITION = new Type("position", "GEOPOS"); + + private String name; + + private String searchMethod; + + private Type(String name, String searchMethod) { + this.name = name; + this.searchMethod = searchMethod; + } + + @Override + public int hashCode() { + return name.hashCode(); + } + + /** Returns the name of this type */ + public String getName() { return name; } + + public String getSearchMethod() { return searchMethod; } + + @Override + public boolean equals(Object other) { + if ( ! (other instanceof Type)) return false; + return this.name.equals(((Type)other).name); + } + + @Override + public String toString() { + return "type: " + name; + } + + } + + public StreamingField(SDField field) { + this(field.getName(), field.getDataType(), field.getMatching(), field.doesAttributing()); + } + + private StreamingField(String name, DataType sourceType, Matching matching, boolean isAttribute) { + this.name = name; + this.type = convertType(sourceType); + this.matching = matching; + this.isAttribute = isAttribute; + } + + /** Converts to the right index type from a field datatype */ + private static Type convertType(DataType fieldType) { + FieldValue fval = fieldType.createFieldValue(); + if (fieldType.equals(DataType.FLOAT16)) { + return Type.FLOAT16; + } else if (fieldType.equals(DataType.FLOAT)) { + return Type.FLOAT; + } else if (fieldType.equals(DataType.LONG)) { + return Type.INT64; + } else if (fieldType.equals(DataType.DOUBLE)) { + return Type.DOUBLE; + } else if (fieldType.equals(DataType.BOOL)) { + return Type.BOOL; + } else if (fieldType.equals(DataType.BYTE)) { + return Type.INT8; + } else if (GeoPos.isAnyPos(fieldType)) { + return Type.GEO_POSITION; + } else if (fieldType instanceof NumericDataType) { + return Type.INT32; + } else if (fval instanceof StringFieldValue) { + return Type.STRING; + } else if (fval instanceof BoolFieldValue) { + return Type.BOOL; + } else if (fval instanceof Raw) { + return Type.STRING; + } else if (fval instanceof PredicateFieldValue) { + return Type.UNSEARCHABLESTRING; + } else if (fval instanceof TensorFieldValue) { + return Type.UNSEARCHABLESTRING; + } else if (fieldType instanceof CollectionDataType) { + return convertType(((CollectionDataType) fieldType).getNestedType()); + } else if (fieldType instanceof NewDocumentReferenceDataType) { + return Type.UNSEARCHABLESTRING; + } else { + throw new IllegalArgumentException("Don't know which streaming field type to convert " + + fieldType + " to"); + } + } + + public String getName() { return name; } + + public VsmfieldsConfig.Fieldspec.Builder getFieldSpecConfig() { + VsmfieldsConfig.Fieldspec.Builder fB = new VsmfieldsConfig.Fieldspec.Builder(); + String matchingName = matching.getType().getName(); + if (matching.getType().equals(MatchType.TEXT)) + matchingName = ""; + if (matching.getType() != MatchType.EXACT) { + if (matching.isPrefix()) { + matchingName = "prefix"; + } else if (matching.isSubstring()) { + matchingName = "substring"; + } else if (matching.isSuffix()) { + matchingName = "suffix"; + } + } + if (type != Type.STRING) { + matchingName = ""; + } + fB.name(getName()) + .searchmethod(VsmfieldsConfig.Fieldspec.Searchmethod.Enum.valueOf(type.getSearchMethod())) + .arg1(matchingName) + .fieldtype(isAttribute + ? VsmfieldsConfig.Fieldspec.Fieldtype.ATTRIBUTE + : VsmfieldsConfig.Fieldspec.Fieldtype.INDEX); + if (matching.maxLength() != null) { + fB.maxlength(matching.maxLength()); + } + return fB; + } + + @Override + public boolean equals(Object o) { + if (o.getClass().equals(getClass())) { + StreamingField sf = (StreamingField)o; + return name.equals(sf.name) && + matching.equals(sf.matching) && + type.equals(sf.type); + } + return false; + } + + @Override public int hashCode() { + return java.util.Objects.hash(name, matching, type); + } + + } + + private static class StreamingDocumentType { + + private final String name; + private final Map<String, FieldSet> fieldSets = new LinkedHashMap<>(); + private final Map<String, FieldSet> userFieldSets; + + public StreamingDocumentType(String name, FieldSets fieldSets) { + this.name=name; + userFieldSets = fieldSets.userFieldSets(); + } + + public VsmfieldsConfig.Documenttype.Builder getDocTypeConfig() { + VsmfieldsConfig.Documenttype.Builder dtB = new VsmfieldsConfig.Documenttype.Builder(); + dtB.name(name); + Map<String, FieldSet> all = new LinkedHashMap<>(); + all.putAll(fieldSets); + all.putAll(userFieldSets); + for (Map.Entry<String, FieldSet> e : all.entrySet()) { + VsmfieldsConfig.Documenttype.Index.Builder indB = new VsmfieldsConfig.Documenttype.Index.Builder(); + indB.name(e.getValue().getName()); + for (String field : e.getValue().getFieldNames()) { + indB.field(new VsmfieldsConfig.Documenttype.Index.Field.Builder().name(field)); + } + dtB.index(indB); + } + return dtB; + } + + public String getName() { return name; } + + public void addIndexField(String indexName, String fieldName) { + FieldSet fs = fieldSets.get(indexName); + if (fs == null) { + fs = new FieldSet(indexName); + fieldSets.put(indexName, fs); + } + fs.addFieldName(fieldName); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/VsmSummary.java b/config-model/src/main/java/com/yahoo/schema/derived/VsmSummary.java new file mode 100644 index 00000000000..30ae9c97268 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/VsmSummary.java @@ -0,0 +1,109 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.config.search.vsm.VsmsummaryConfig; + +import java.util.*; + +/** + * Vertical streaming matcher summary specification + * + * @author bratseth + */ +public class VsmSummary extends Derived implements VsmsummaryConfig.Producer { + + private final Map<SummaryField, List<String>> summaryMap = new java.util.LinkedHashMap<>(1); + + public VsmSummary(Schema schema) { + derive(schema); + } + + @Override + protected void derive(Schema schema) { + // Use the default class, as it is the superset + derive(schema, schema.getSummary("default")); + } + + private void derive(Schema schema, DocumentSummary documentSummary) { + if (documentSummary == null) return; + for (SummaryField summaryField : documentSummary.getSummaryFields().values()) { + List<String> from = toStringList(summaryField.sourceIterator()); + + if (doMapField(schema, summaryField)) { + SDField sdField = schema.getConcreteField(summaryField.getName()); + if (sdField != null && GeoPos.isAnyPos(sdField)) { + summaryMap.put(summaryField, Collections.singletonList(summaryField.getName())); + } else { + summaryMap.put(summaryField, from); + } + } + } + } + + /** + * Don't include field in map if sources are the same as the struct sub fields for the SDField. + * But do map if not all do summarying. + * Don't map if not struct either. + * @param summaryField a {@link SummaryField} + */ + private boolean doMapField(Schema schema, SummaryField summaryField) { + SDField sdField = schema.getConcreteField(summaryField.getName()); + SDDocumentType document = schema.getDocument(); + if (sdField==null || ((document != null) && (document.getField(summaryField.getName()) == sdField))) { + return true; + } + if (summaryField.getVsmCommand().equals(SummaryField.VsmCommand.FLATTENJUNIPER)) { + return true; + } + if (!sdField.usesStructOrMap()) { + return !(sdField.getName().equals(summaryField.getName())); + } + if (summaryField.getSourceCount()==sdField.getStructFields().size()) { + for (SummaryField.Source source : summaryField.getSources()) { + if (!sdField.getStructFields().contains(new SDField(schema.getDocument(), source.getName(), sdField.getDataType()))) { // equals() uses just name + return true; + } + if (sdField.getStructField(source.getName())!=null && !sdField.getStructField(source.getName()).doesSummarying()) { + return true; + } + } + // The sources in the summary field are the same as the sub-fields in the SD field. + // All sub fields do summarying. + // Don't map. + return false; + } + return true; + } + + private List<String> toStringList(Iterator<SummaryField.Source> i) { + List<String> ret = new ArrayList<>(); + while (i.hasNext()) { + ret.add(i.next().getName()); + } + return ret; + } + + @Override + public String getDerivedName() { + return "vsmsummary"; + } + + @Override + public void getConfig(VsmsummaryConfig.Builder vB) { + for (Map.Entry<SummaryField, List<String>> entry : summaryMap.entrySet()) { + VsmsummaryConfig.Fieldmap.Builder fmB = new VsmsummaryConfig.Fieldmap.Builder().summary(entry.getKey().getName()); + for (String field : entry.getValue()) { + fmB.document(new VsmsummaryConfig.Fieldmap.Document.Builder().field(field)); + } + fmB.command(VsmsummaryConfig.Fieldmap.Command.Enum.valueOf(entry.getKey().getVsmCommand().toString())); + vB.fieldmap(fmB); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/package-info.java b/config-model/src/main/java/com/yahoo/schema/derived/package-info.java new file mode 100644 index 00000000000..370617ac6cc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/package-info.java @@ -0,0 +1,5 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +@ExportPackage +package com.yahoo.schema.derived; + +import com.yahoo.osgi.annotation.ExportPackage; diff --git a/config-model/src/main/java/com/yahoo/schema/derived/validation/IndexStructureValidator.java b/config-model/src/main/java/com/yahoo/schema/derived/validation/IndexStructureValidator.java new file mode 100644 index 00000000000..512d9f742bf --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/validation/IndexStructureValidator.java @@ -0,0 +1,50 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived.validation; + +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.DerivedConfiguration; +import com.yahoo.schema.derived.IndexingScript; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; + +/** + * @author Mathias M Lidal + */ +public class IndexStructureValidator extends Validator { + + public IndexStructureValidator(DerivedConfiguration config, Schema schema) { + super(config, schema); + } + + public void validate() { + IndexingScript script = config.getIndexingScript(); + for (Expression exp : script.expressions()) { + new OutputVisitor(schema.getDocument(), exp).visit(exp); + } + } + + private static class OutputVisitor extends ExpressionVisitor { + + final SDDocumentType docType; + final Expression exp; + + public OutputVisitor(SDDocumentType docType, Expression exp) { + this.docType = docType; + this.exp = exp; + } + + @Override + protected void doVisit(Expression exp) { + if (!(exp instanceof OutputExpression)) return; + + String fieldName = ((OutputExpression)exp).getFieldName(); + if (docType.getField(fieldName) != null) return; + + throw new IllegalArgumentException("Indexing expression '" + this.exp + "' refers to field '" + + fieldName + "' which does not exist in the index structure."); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/validation/Validation.java b/config-model/src/main/java/com/yahoo/schema/derived/validation/Validation.java new file mode 100644 index 00000000000..dba4dce49f0 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/validation/Validation.java @@ -0,0 +1,12 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived.validation; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.DerivedConfiguration; + +public class Validation { + + public static void validate(DerivedConfiguration config, Schema schema) { + new IndexStructureValidator(config, schema).validate(); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/derived/validation/Validator.java b/config-model/src/main/java/com/yahoo/schema/derived/validation/Validator.java new file mode 100644 index 00000000000..bf0f007841c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/derived/validation/Validator.java @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.derived.validation; + +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.DerivedConfiguration; + +/** + * @author mathiasm + */ +public abstract class Validator { + + protected DerivedConfiguration config; + protected Schema schema; + + protected Validator(DerivedConfiguration config, Schema schema) { + this.config = config; + this.schema = schema; + } + + public abstract void validate(); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/Attribute.java b/config-model/src/main/java/com/yahoo/schema/document/Attribute.java new file mode 100644 index 00000000000..f2279a52855 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/Attribute.java @@ -0,0 +1,435 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.DocumentType; +import com.yahoo.document.PrimitiveDataType; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.document.StructuredDataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.document.datatypes.BoolFieldValue; +import com.yahoo.document.datatypes.ByteFieldValue; +import com.yahoo.document.datatypes.DoubleFieldValue; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.FloatFieldValue; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.document.datatypes.PredicateFieldValue; +import com.yahoo.document.datatypes.Raw; +import com.yahoo.document.datatypes.Float16FieldValue; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.TensorFieldValue; +import com.yahoo.tensor.TensorType; + +import java.io.Serializable; +import java.util.function.Supplier; +import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; + +/** + * A search-time document attribute (per-document in-memory value). + * This belongs to the field defining the attribute. + * + * @author bratseth + */ +public final class Attribute implements Cloneable, Serializable { + + public enum DistanceMetric { EUCLIDEAN, ANGULAR, GEODEGREES, INNERPRODUCT, HAMMING } + + // Remember to change hashCode and equals when you add new fields + + private String name; + + private Type type; + private CollectionType collectionType; + + private boolean removeIfZero = false; + private boolean createIfNonExistent = false; + private boolean enableBitVectors = false; + private boolean enableOnlyBitVector = false; + + private boolean fastRank = false; + private boolean fastSearch = false; + private boolean fastAccess = false; + private boolean huge = false; + private boolean mutable = false; + private boolean paged = false; + private int arity = BooleanIndexDefinition.DEFAULT_ARITY; + private long lowerBound = BooleanIndexDefinition.DEFAULT_LOWER_BOUND; + private long upperBound = BooleanIndexDefinition.DEFAULT_UPPER_BOUND; + private double densePostingListThreshold = BooleanIndexDefinition.DEFAULT_DENSE_POSTING_LIST_THRESHOLD; + + /** This is set if the type of this is TENSOR */ + private Optional<TensorType> tensorType = Optional.empty(); + + /** This is set if the type of this is REFERENCE */ + private final Optional<StructuredDataType> referenceDocumentType; + + private Optional<DistanceMetric> distanceMetric = Optional.empty(); + + private Optional<HnswIndexParams> hnswIndexParams = Optional.empty(); + + private boolean isPosition = false; + private final Sorting sorting = new Sorting(); + + /** The aliases for this attribute */ + private final Set<String> aliases = new LinkedHashSet<>(); + + private Dictionary dictionary = null; + private Case casing = Case.UNCASED; + + /** + * True if this attribute should be returned during first pass of search. + * Null means make the default decision for this kind of attribute + */ + private Boolean prefetch = null; + + /** The attribute type enumeration */ + public enum Type { + BYTE("byte", "INT8"), + SHORT("short", "INT16"), + INTEGER("integer", "INT32"), + LONG("long", "INT64"), + FLOAT16("float16", "FLOAT16"), + FLOAT("float", "FLOAT"), + DOUBLE("double", "DOUBLE"), + STRING("string", "STRING"), + BOOL("bool", "BOOL"), + PREDICATE("predicate", "PREDICATE"), + TENSOR("tensor", "TENSOR"), + REFERENCE("reference", "REFERENCE"); + + private final String myName; // different from what name() returns. + private final String exportAttributeTypeName; + + Type(String name, String exportAttributeTypeName) { + this.myName=name; + this.exportAttributeTypeName = exportAttributeTypeName; + } + + public String getName() { return myName; } + public String getExportAttributeTypeName() { return exportAttributeTypeName; } + + @Override + public String toString() { + return "type: " + myName; + } + } + + /** The attribute collection type enumeration */ + public enum CollectionType { + + SINGLE("SINGLE"), + ARRAY("ARRAY"), + WEIGHTEDSET ("WEIGHTEDSET"); + + private final String name; + + CollectionType(String name) { + this.name=name; + } + + public String getName() { return name; } + + @Override + public String toString() { + return "collectiontype: " + name; + } + + } + + /** Creates an attribute with default settings */ + public Attribute(String name, DataType fieldType) { + this(name, convertDataType(fieldType), convertCollectionType(fieldType), convertTensorType(fieldType), convertTargetType(fieldType)); + setRemoveIfZero(fieldType instanceof WeightedSetDataType ? ((WeightedSetDataType)fieldType).removeIfZero() : false); + setCreateIfNonExistent(fieldType instanceof WeightedSetDataType ? ((WeightedSetDataType)fieldType).createIfNonExistent() : false); + } + + public Attribute(String name, Type type, CollectionType collectionType) { + this(name, type, collectionType, Optional.empty(), Optional.empty()); + } + + public Attribute(String name, + Type type, + CollectionType collectionType, + Optional<TensorType> tensorType, + Optional<StructuredDataType> referenceDocumentType) { + this.name=name; + setType(type); + setCollectionType(collectionType); + this.tensorType = tensorType; + this.referenceDocumentType = referenceDocumentType; + } + + public Attribute convertToArray() { + Attribute result = clone(); + result.collectionType = CollectionType.ARRAY; + return result; + } + + /** + * <p>Returns whether this attribute should be included in the "attributeprefetch" summary + * which is returned to the Qrs by prefetchAttributes, used by blending, uniquing etc. + * + * <p>Single value attributes are prefetched by default if summary is true. + * Multi value attributes are not.</p> + */ + public boolean isPrefetch() { + if (prefetch!=null) return prefetch.booleanValue(); + + if (CollectionType.SINGLE.equals(collectionType)) { + return true; + } + + return false; + } + + /** Returns the prefetch value of this, null if the default is used. */ + public Boolean getPrefetchValue() { return prefetch; } + + public boolean isRemoveIfZero() { return removeIfZero; } + public boolean isCreateIfNonExistent() { return createIfNonExistent; } + public boolean isEnabledBitVectors() { return enableBitVectors; } + public boolean isEnabledOnlyBitVector() { return enableOnlyBitVector; } + public boolean isFastSearch() { return fastSearch; } + public boolean isFastRank() { return fastRank; } + public boolean isFastAccess() { return fastAccess; } + public boolean isHuge() { return huge; } + public boolean isPaged() { return paged; } + public boolean isPosition() { return isPosition; } + public boolean isMutable() { return mutable; } + + public int arity() { return arity; } + public long lowerBound() { return lowerBound; } + public long upperBound() { return upperBound; } + public double densePostingListThreshold() { return densePostingListThreshold; } + public Optional<TensorType> tensorType() { return tensorType; } + public Optional<StructuredDataType> referenceDocumentType() { return referenceDocumentType; } + + public static final DistanceMetric DEFAULT_DISTANCE_METRIC = DistanceMetric.EUCLIDEAN; + public DistanceMetric distanceMetric() { + return distanceMetric.orElse(DEFAULT_DISTANCE_METRIC); + } + public Optional<HnswIndexParams> hnswIndexParams() { return hnswIndexParams; } + + public Sorting getSorting() { return sorting; } + public Dictionary getDictionary() { return dictionary; } + public Case getCase() { return casing; } + + public void setRemoveIfZero(boolean remove) { this.removeIfZero = remove; } + public void setCreateIfNonExistent(boolean create) { this.createIfNonExistent = create; } + + /** + * Sets whether this should be included in the "attributeprefetch" document summary. + * True or false to override default, null to use default + */ + public void setPrefetch(Boolean prefetch) { this.prefetch = prefetch; } + public void setEnableBitVectors(boolean enableBitVectors) { this.enableBitVectors = enableBitVectors; } + public void setEnableOnlyBitVector(boolean enableOnlyBitVector) { this.enableOnlyBitVector = enableOnlyBitVector; } + public void setFastRank(boolean value) { + Supplier<IllegalArgumentException> badGen = () -> new IllegalArgumentException("fast-rank is only valid for tensor attributes, invalid for: "+this); + var tt = tensorType.orElseThrow(badGen); + for (var dim : tt.dimensions()) { + if (dim.isMapped()) { + this.fastRank = value; + return; + } + } + throw badGen.get(); + } + public void setFastSearch(boolean fastSearch) { this.fastSearch = fastSearch; } + public void setHuge(boolean huge) { this.huge = huge; } + public void setPaged(boolean paged) { this.paged = paged; } + public void setFastAccess(boolean fastAccess) { this.fastAccess = fastAccess; } + public void setPosition(boolean position) { this.isPosition = position; } + public void setMutable(boolean mutable) { this.mutable = mutable; } + public void setArity(int arity) { this.arity = arity; } + public void setLowerBound(long lowerBound) { this.lowerBound = lowerBound; } + public void setUpperBound(long upperBound) { this.upperBound = upperBound; } + public void setDensePostingListThreshold(double threshold) { this.densePostingListThreshold = threshold; } + public void setTensorType(TensorType tensorType) { this.tensorType = Optional.of(tensorType); } + public void setDistanceMetric(DistanceMetric metric) { this.distanceMetric = Optional.of(metric); } + public void setHnswIndexParams(HnswIndexParams params) { this.hnswIndexParams = Optional.of(params); } + public void setDictionary(Dictionary dictionary) { this.dictionary = dictionary; } + public void setCase(Case casing) { this.casing = casing; } + + public String getName() { return name; } + public Type getType() { return type; } + public CollectionType getCollectionType() { return collectionType; } + + public void setName(String name) { this.name=name; } + private void setType(Type type) { this.type=type; } + public void setCollectionType(CollectionType type) { this.collectionType=type; } + + /** Converts to the right attribute type from a field datatype */ + public static Type convertDataType(DataType fieldType) { + if (fieldType instanceof NewDocumentReferenceDataType) { + return Type.REFERENCE; + } else if (fieldType instanceof CollectionDataType) { + return convertDataType(((CollectionDataType) fieldType).getNestedType()); + } + FieldValue fval = fieldType.createFieldValue(); + if (fval instanceof StringFieldValue) { + return Type.STRING; + } else if (fval instanceof IntegerFieldValue) { + return Type.INTEGER; + } else if (fval instanceof LongFieldValue) { + return Type.LONG; + } else if (fval instanceof FloatFieldValue) { + return Type.FLOAT; + } else if (fval instanceof DoubleFieldValue) { + return Type.DOUBLE; + } else if (fval instanceof BoolFieldValue) { + return Type.BOOL; + } else if (fval instanceof Float16FieldValue) { + return Type.FLOAT16; + } else if (fval instanceof ByteFieldValue) { + return Type.BYTE; + } else if (fval instanceof Raw) { + return Type.BYTE; + } else if (fval instanceof PredicateFieldValue) { + return Type.PREDICATE; + } else if (fval instanceof TensorFieldValue) { + return Type.TENSOR; + } else { + throw new IllegalArgumentException("Don't know which attribute type to convert " + + fieldType + " [" + fieldType.getClass() + "] to"); + } + } + + /** Converts to the right attribute type from a field datatype */ + private static CollectionType convertCollectionType(DataType fieldType) { + if (fieldType instanceof ArrayDataType) { + return CollectionType.ARRAY; + } else if (fieldType instanceof WeightedSetDataType) { + return CollectionType.WEIGHTEDSET; + } else if (fieldType instanceof TensorDataType) { + return CollectionType.SINGLE; + } else if (fieldType instanceof PrimitiveDataType) { + return CollectionType.SINGLE; + } else if (fieldType instanceof NewDocumentReferenceDataType) { + return CollectionType.SINGLE; + } else { + throw new IllegalArgumentException("Field " + fieldType + " not supported in convertCollectionType"); + } + } + + private static Optional<TensorType> convertTensorType(DataType fieldType) { + if ( ! ( fieldType instanceof TensorDataType)) return Optional.empty(); + return Optional.of(((TensorDataType)fieldType).getTensorType()); + } + + private static Optional<StructuredDataType> convertTargetType(DataType fieldType) { + return Optional.of(fieldType) + .filter(NewDocumentReferenceDataType.class::isInstance) + .map(NewDocumentReferenceDataType.class::cast) + .map(NewDocumentReferenceDataType::getTargetType); + } + + /** Converts to the right field type from an attribute type */ + private DataType toDataType(Type attributeType) { + switch (attributeType) { + case STRING : return DataType.STRING; + case INTEGER: return DataType.INT; + case LONG: return DataType.LONG; + case FLOAT16: return DataType.FLOAT16; + case FLOAT: return DataType.FLOAT; + case DOUBLE: return DataType.DOUBLE; + case BOOL: return DataType.BOOL; + case BYTE: return DataType.BYTE; + case PREDICATE: return DataType.PREDICATE; + case TENSOR: return DataType.getTensor(tensorType.orElseThrow(IllegalStateException::new)); + case REFERENCE: return createReferenceDataType(); + default: throw new IllegalArgumentException("Unknown attribute type " + attributeType); + } + } + + @SuppressWarnings("deprecation") + private DataType createReferenceDataType() { + if (referenceDocumentType.isEmpty()) { + throw new IllegalStateException("Referenced document type is not set"); + } + StructuredDataType type = referenceDocumentType.get(); + if (type instanceof DocumentType) { + return new NewDocumentReferenceDataType((DocumentType) type); + } else { + return NewDocumentReferenceDataType.forDocumentName(type.getName()); + } + } + + public DataType getDataType() { + DataType dataType = toDataType(type); + if (collectionType == Attribute.CollectionType.ARRAY) { + return DataType.getArray(dataType); + } else if (collectionType == Attribute.CollectionType.WEIGHTEDSET) { + return DataType.getWeightedSet(dataType, createIfNonExistent, removeIfZero); + } else { + return dataType; + } + } + + @Override + public int hashCode() { + return Objects.hash( + name, type, collectionType, sorting, dictionary, isPrefetch(), fastAccess, removeIfZero, + createIfNonExistent, isPosition, huge, mutable, paged, enableBitVectors, enableOnlyBitVector, + tensorType, referenceDocumentType, distanceMetric, hnswIndexParams); + } + + @Override + public boolean equals(Object object) { + if (! (object instanceof Attribute)) return false; + + Attribute other = (Attribute)object; + if (!this.name.equals(other.name)) return false; + return isCompatible(other); + } + + /** Returns whether these attributes describes the same entity, even if they have different names */ + public boolean isCompatible(Attribute other) { + if (! this.type.equals(other.type)) return false; + if (! this.collectionType.equals(other.collectionType)) return false; + if (this.isPrefetch() != other.isPrefetch()) return false; + if (this.removeIfZero != other.removeIfZero) return false; + if (this.createIfNonExistent != other.createIfNonExistent) return false; + if (this.enableBitVectors != other.enableBitVectors) return false; + if (this.enableOnlyBitVector != other.enableOnlyBitVector) return false; + if (this.fastSearch != other.fastSearch) return false; + if (this.huge != other.huge) return false; + if (this.mutable != other.mutable) return false; + if (this.paged != other.paged) return false; + if (! this.sorting.equals(other.sorting)) return false; + if (! Objects.equals(dictionary, other.dictionary)) return false; + if (! Objects.equals(tensorType, other.tensorType)) return false; + if (! Objects.equals(referenceDocumentType, other.referenceDocumentType)) return false; + if (! Objects.equals(distanceMetric, other.distanceMetric)) return false; + if (! Objects.equals(hnswIndexParams, other.hnswIndexParams)) return false; + + return true; + } + + @Override + public Attribute clone() { + try { + return (Attribute)super.clone(); + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Programming error"); + } + } + + @Override + public String toString() { + return "attribute '" + name + "' (" + type + ")"; + } + + public Set<String> getAliases() { + return aliases; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/BooleanIndexDefinition.java b/config-model/src/main/java/com/yahoo/schema/document/BooleanIndexDefinition.java new file mode 100644 index 00000000000..8563d414c40 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/BooleanIndexDefinition.java @@ -0,0 +1,84 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import java.util.Optional; +import java.util.OptionalDouble; +import java.util.OptionalInt; +import java.util.OptionalLong; + +/** + * Encapsulates values required for predicate fields. + * + * @author lesters + */ +public final class BooleanIndexDefinition { + + public static final int DEFAULT_ARITY = 8; + public static final long DEFAULT_UPPER_BOUND = Long.MAX_VALUE; + public static final long DEFAULT_LOWER_BOUND = Long.MIN_VALUE; + public static final double DEFAULT_DENSE_POSTING_LIST_THRESHOLD = 0.4; + + private final OptionalInt arity; // mandatory field value + private final OptionalLong lowerBound; + private final OptionalLong upperBound; + private final OptionalDouble densePostingListThreshold; + + public BooleanIndexDefinition(Optional<Integer> arity, + Optional<Long> lowerBound, + Optional<Long> upperBound, + Optional<Double> densePLT) + { + this.arity = arity.isPresent() ? OptionalInt.of(arity.get()) : OptionalInt.empty(); + this.lowerBound = lowerBound.isPresent() ? OptionalLong.of(lowerBound.get()) : OptionalLong.empty(); + this.upperBound = upperBound.isPresent() ? OptionalLong.of(upperBound.get()) : OptionalLong.empty(); + this.densePostingListThreshold = densePLT.isPresent() ? OptionalDouble.of(densePLT.get()) : OptionalDouble.empty(); + } + + public BooleanIndexDefinition(OptionalInt arity, OptionalLong lowerBound, + OptionalLong upperBound, OptionalDouble densePostingListThreshold) { + this.arity = arity; + this.lowerBound = lowerBound; + this.upperBound = upperBound; + this.densePostingListThreshold = densePostingListThreshold; + } + + public int getArity() { + return arity.getAsInt(); + } + + public boolean hasArity() { + return arity.isPresent(); + } + + public long getLowerBound() { + return lowerBound.orElse(DEFAULT_LOWER_BOUND); + } + + public boolean hasLowerBound() { + return lowerBound.isPresent(); + } + + public long getUpperBound() { + return upperBound.orElse(DEFAULT_UPPER_BOUND); + } + + public boolean hasUpperBound() { + return upperBound.isPresent(); + } + + public double getDensePostingListThreshold() { + return densePostingListThreshold.orElse(DEFAULT_DENSE_POSTING_LIST_THRESHOLD); + } + + public boolean hasDensePostingListThreshold() { + return densePostingListThreshold.isPresent(); + } + + @Override + public String toString() { + return "BooleanIndexDefinition [arity=" + arity + ", lowerBound=" + + lowerBound + ", upperBound=" + upperBound + ", densePostingListThreshold=" + + densePostingListThreshold + "]"; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/Case.java b/config-model/src/main/java/com/yahoo/schema/document/Case.java new file mode 100644 index 00000000000..443f67940a0 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/Case.java @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +/** + * Describes if items should preserve lower/upper case, or shall be uncased + * which normally mean they are all normalized to lowercase. + * @author baldersheim + */ +public enum Case { + CASED("cased"), + UNCASED("uncased"); + private String name; + Case(String name) { this.name = name; } + public String getName() { return name;} +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/ComplexAttributeFieldUtils.java b/config-model/src/main/java/com/yahoo/schema/document/ComplexAttributeFieldUtils.java new file mode 100644 index 00000000000..993bf16405a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/ComplexAttributeFieldUtils.java @@ -0,0 +1,123 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.MapDataType; +import com.yahoo.document.StructDataType; + +/** + * Utils used to check whether a complex field supports being represented as struct field attributes. + * + * Currently we support: + * - array of simple struct + * - map of primitive type to simple struct + * - map of primitive type to primitive type + * + * A simple struct can contain fields of any type, but only fields of primitive type can be defined as + * struct field attributes in the complex field using the simple struct. + * + * @author geirst + */ +public class ComplexAttributeFieldUtils { + + public static boolean isSupportedComplexField(ImmutableSDField field) { + return (isArrayOfSimpleStruct(field) || + isMapOfSimpleStruct(field) || + isMapOfPrimitiveType(field)); + } + + public static boolean isArrayOfSimpleStruct(ImmutableSDField field) { + if (field.getDataType() instanceof ArrayDataType) { + ArrayDataType arrayType = (ArrayDataType)field.getDataType(); + return isStructWithPrimitiveStructFieldAttributes(arrayType.getNestedType(), field); + } else { + return false; + } + } + + public static boolean isMapOfSimpleStruct(ImmutableSDField field) { + if (field.getDataType() instanceof MapDataType) { + MapDataType mapType = (MapDataType)field.getDataType(); + return isPrimitiveType(mapType.getKeyType()) && + isStructWithPrimitiveStructFieldAttributes(mapType.getValueType(), + field.getStructField("value")); + } else { + return false; + } + } + + public static boolean isMapOfPrimitiveType(ImmutableSDField field) { + if (field.getDataType() instanceof MapDataType) { + MapDataType mapType = (MapDataType)field.getDataType(); + return isPrimitiveType(mapType.getKeyType()) && + isPrimitiveType(mapType.getValueType()); + } else { + return false; + } + } + + private static boolean isStructWithPrimitiveStructFieldAttributes(DataType type, ImmutableSDField field) { + if (type instanceof StructDataType && ! GeoPos.isPos(type)) { + for (ImmutableSDField structField : field.getStructFields()) { + Attribute attribute = structField.getAttributes().get(structField.getName()); + if (attribute != null) { + if (!isPrimitiveType(attribute)) { + return false; + } + } else if (structField.wasConfiguredToDoAttributing()) { + if (!isPrimitiveType(structField.getDataType())) { + return false; + } + } + } + return true; + } else { + return false; + } + } + + public static boolean isPrimitiveType(Attribute attribute) { + return attribute.getCollectionType().equals(Attribute.CollectionType.SINGLE) && + isPrimitiveType(attribute.getDataType()); + } + + public static boolean isPrimitiveType(DataType dataType) { + return dataType.equals(DataType.BYTE) || + dataType.equals(DataType.INT) || + dataType.equals(DataType.LONG) || + dataType.equals(DataType.FLOAT) || + dataType.equals(DataType.DOUBLE) || + dataType.equals(DataType.STRING); + } + + public static boolean isComplexFieldWithOnlyStructFieldAttributes(ImmutableSDField field) { + if (isArrayOfSimpleStruct(field)) { + return hasOnlyStructFieldAttributes(field); + } else if (isMapOfSimpleStruct(field)) { + return hasSingleAttribute(field.getStructField("key")) && + hasOnlyStructFieldAttributes(field.getStructField("value")); + } else if (isMapOfPrimitiveType(field)) { + return hasSingleAttribute(field.getStructField("key")) && + hasSingleAttribute(field.getStructField("value")); + } + return false; + } + + private static boolean hasOnlyStructFieldAttributes(ImmutableSDField field) { + for (ImmutableSDField structField : field.getStructFields()) { + if (!hasSingleAttribute(structField)) { + return false; + } + } + return true; + } + + private static boolean hasSingleAttribute(ImmutableSDField field) { + if (field.getAttributes().size() != 1) { + return false; + } + return (field.getAttributes().get(field.getName()) != null); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/Dictionary.java b/config-model/src/main/java/com/yahoo/schema/document/Dictionary.java new file mode 100644 index 00000000000..4744547c778 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/Dictionary.java @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.schema.document; + +/** + * Represents settings for dictionary control + * + * @author baldersheim + */ +public class Dictionary { + public enum Type { BTREE, HASH, BTREE_AND_HASH }; + private Type type = null; + private Case casing= null; + + public void updateType(Type type) { + if (this.type == null) { + this.type = type; + } else if ((this.type == Type.BTREE) && (type == Type.HASH)) { + this.type = Type.BTREE_AND_HASH; + } else if ((this.type == Type.HASH) && (type == Type.BTREE)) { + this.type = Type.BTREE_AND_HASH; + } else { + throw new IllegalArgumentException("Can not combine previous dictionary setting " + this.type + + " with current " + type); + } + } + public void updateMatch(Case casing) { + if (this.casing != null) { + throw new IllegalArgumentException("dictionary match mode has already been set to " + this.casing); + } + this.casing = casing; + } + public Type getType() { return (type != null) ? type : Type.BTREE; } + public Case getMatch() { return (casing != null) ? casing : Case.UNCASED; } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/FieldSet.java b/config-model/src/main/java/com/yahoo/schema/document/FieldSet.java new file mode 100644 index 00000000000..e62e784d7b3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/FieldSet.java @@ -0,0 +1,41 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.TreeSet; + +import static java.util.Comparator.comparing; + +/** + * Searchable collection of fields. + * + * @author baldersheim + */ +public class FieldSet { + + private final String name; + private final Set<String> queryCommands = new LinkedHashSet<>(); + private final Set<String> fieldNames = new TreeSet<>(); + private final Set<ImmutableSDField> fields = new TreeSet<>(comparing(ImmutableSDField::asField)); + private Matching matching = null; + + public FieldSet(String name) { this.name = name; } + public String getName() { return name; } + public FieldSet addFieldName(String field) { fieldNames.add(field); return this; } + public Set<String> getFieldNames() { return fieldNames; } + public Set<ImmutableSDField> fields() { return fields; } + public Set<String> queryCommands() { return queryCommands; } + + public void setMatching(Matching matching) { + this.matching = matching; + } + + public Matching getMatching() { + return matching; + } + + @Override + public String toString() { return "fieldset '" + name + "'"; } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/GeoPos.java b/config-model/src/main/java/com/yahoo/schema/document/GeoPos.java new file mode 100644 index 00000000000..829555d88c6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/GeoPos.java @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.DataType; +import com.yahoo.document.PositionDataType; + +/** + * Common utilities for recognizing fields with the built-in "position" datatype, + * possibly in array form. + * @author arnej + */ +public class GeoPos { + static public boolean isPos(DataType type) { + return PositionDataType.INSTANCE.equals(type); + } + static public boolean isPosArray(DataType type) { + return DataType.getArray(PositionDataType.INSTANCE).equals(type); + } + static public boolean isAnyPos(DataType type) { + return isPos(type) || isPosArray(type); + } + + static public boolean isPos(ImmutableSDField field) { return isPos(field.getDataType()); } + static public boolean isPosArray(ImmutableSDField field) { return isPosArray(field.getDataType()); } + static public boolean isAnyPos(ImmutableSDField field) { return isAnyPos(field.getDataType()); } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/HnswIndexParams.java b/config-model/src/main/java/com/yahoo/schema/document/HnswIndexParams.java new file mode 100644 index 00000000000..cc427356c78 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/HnswIndexParams.java @@ -0,0 +1,76 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import java.util.Optional; + +/** + * Configuration parameters for a hnsw index used together with a 1-dimensional indexed tensor for approximate nearest neighbor search. + * + * @author geirst + */ +public class HnswIndexParams { + + public static final int DEFAULT_MAX_LINKS_PER_NODE = 16; + public static final int DEFAULT_NEIGHBORS_TO_EXPLORE_AT_INSERT = 200; + + private final Optional<Integer> maxLinksPerNode; + private final Optional<Integer> neighborsToExploreAtInsert; + private final Optional<Boolean> multiThreadedIndexing; + + public static class Builder { + private Optional<Integer> maxLinksPerNode = Optional.empty(); + private Optional<Integer> neighborsToExploreAtInsert = Optional.empty(); + private Optional<Boolean> multiThreadedIndexing = Optional.empty(); + + public void setMaxLinksPerNode(int value) { + maxLinksPerNode = Optional.of(value); + } + public void setNeighborsToExploreAtInsert(int value) { + neighborsToExploreAtInsert = Optional.of(value); + } + public void setMultiThreadedIndexing(boolean value) { + multiThreadedIndexing = Optional.of(value); + } + public HnswIndexParams build() { + return new HnswIndexParams(maxLinksPerNode, neighborsToExploreAtInsert, multiThreadedIndexing); + } + } + + public HnswIndexParams() { + this.maxLinksPerNode = Optional.empty(); + this.neighborsToExploreAtInsert = Optional.empty(); + this.multiThreadedIndexing = Optional.empty(); + } + + public HnswIndexParams(Optional<Integer> maxLinksPerNode, + Optional<Integer> neighborsToExploreAtInsert, + Optional<Boolean> multiThreadedIndexing) { + this.maxLinksPerNode = maxLinksPerNode; + this.neighborsToExploreAtInsert = neighborsToExploreAtInsert; + this.multiThreadedIndexing = multiThreadedIndexing; + } + + /** + * Creates a new instance where values from the given parameter instance are used where they are present, + * otherwise we use values from this. + */ + public HnswIndexParams overrideFrom(Optional<HnswIndexParams> other) { + if (! other.isPresent()) return this; + HnswIndexParams rhs = other.get(); + return new HnswIndexParams(rhs.maxLinksPerNode.or(() -> maxLinksPerNode), + rhs.neighborsToExploreAtInsert.or(() -> neighborsToExploreAtInsert), + rhs.multiThreadedIndexing.or(() -> multiThreadedIndexing)); + } + + public int maxLinksPerNode() { + return maxLinksPerNode.orElse(DEFAULT_MAX_LINKS_PER_NODE); + } + + public int neighborsToExploreAtInsert() { + return neighborsToExploreAtInsert.orElse(DEFAULT_NEIGHBORS_TO_EXPLORE_AT_INSERT); + } + + public boolean multiThreadedIndexing() { + return multiThreadedIndexing.orElse(true); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/ImmutableImportedComplexSDField.java b/config-model/src/main/java/com/yahoo/schema/document/ImmutableImportedComplexSDField.java new file mode 100644 index 00000000000..553b5b4d940 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/ImmutableImportedComplexSDField.java @@ -0,0 +1,29 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import java.util.Collection; + +import static java.util.stream.Collectors.toList; + +/** + * Wraps {@link ImportedComplexField} as {@link ImmutableSDField}. + */ +public class ImmutableImportedComplexSDField extends ImmutableImportedSDField { + private final ImportedComplexField importedComplexField; + + public ImmutableImportedComplexSDField(ImportedComplexField importedField) { + super(importedField); + importedComplexField = importedField; + } + + @Override + public ImmutableSDField getStructField(String name) { + ImportedField field = importedComplexField.getNestedField(name); + return (field != null) ? field.asImmutableSDField() : null; + } + + @Override + public Collection<? extends ImmutableSDField> getStructFields() { + return importedComplexField.getNestedFields().stream().map(field -> field.asImmutableSDField()).collect(toList()); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/ImmutableImportedSDField.java b/config-model/src/main/java/com/yahoo/schema/document/ImmutableImportedSDField.java new file mode 100644 index 00000000000..335942de99d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/ImmutableImportedSDField.java @@ -0,0 +1,218 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * Wraps {@link ImportedField} as {@link ImmutableSDField}. + * Methods that are not meaningful or relevant for imported fields will throw {@link UnsupportedOperationException}. + * + * @author bjorncs + */ +public class ImmutableImportedSDField implements ImmutableSDField { + + private final ImportedField importedField; + + ImmutableImportedSDField(ImportedField importedField) { + this.importedField = importedField; + } + + public ImportedField getImportedField() { + return importedField; + } + + @Override + public <T extends Expression> boolean containsExpression(Class<T> searchFor) { + throw createUnsupportedException(searchFor.getSimpleName()); + } + + @Override + public boolean doesAttributing() { + return importedField.targetField().doesAttributing(); + } + + @Override + public boolean doesIndexing() { + return importedField.targetField().doesIndexing(); + } + + @Override + public boolean doesLowerCasing() { + return importedField.targetField().doesLowerCasing(); + } + + @Override + public boolean isExtraField() { + return false; + } + + @Override + public boolean isImportedField() { + return true; + } + + @Override + public boolean isIndexStructureField() { + return importedField.targetField().isIndexStructureField(); + } + + @Override + public boolean hasIndex() { + return importedField.targetField().hasIndex(); + } + + @Override + public boolean usesStructOrMap() { + return importedField.targetField().usesStructOrMap(); + } + + @Override + public boolean wasConfiguredToDoAttributing() { + return importedField.targetField().wasConfiguredToDoAttributing(); + } + + @Override + public DataType getDataType() { + return importedField.targetField().getDataType(); + } + + @Override + public SummaryField getSummaryField(String name) { + return importedField.targetField().getSummaryField(name); + } + + @Override + public Index getIndex(String name) { + if ( ! importedField.fieldName().equals(name)) { + throw new IllegalArgumentException("Getting an index (" + name + ") with different name than the imported field (" + + importedField.fieldName() + ") is not supported"); + } + String targetIndexName = importedField.targetField().getName(); + return importedField.targetField().getIndex(targetIndexName); + } + + @Override + public List<String> getQueryCommands() { + return importedField.targetField().getQueryCommands(); + } + + @Override + public Map<String, Attribute> getAttributes() { + return importedField.targetField().getAttributes(); + } + + @Override + public Attribute getAttribute() { return importedField.targetField().getAttribute(); } + + @Override + public Map<String, String> getAliasToName() { + return Collections.emptyMap(); + } + + @Override + public ScriptExpression getIndexingScript() { + throw createUnsupportedException("indexing"); + } + + @Override + public Matching getMatching() { + return importedField.targetField().getMatching(); + } + + @Override + public NormalizeLevel getNormalizing() { + return importedField.targetField().getNormalizing(); + } + + @Override + public ImmutableSDField getStructField(String name) { + throw createUnsupportedException("struct"); + } + + @Override + public Collection<? extends ImmutableSDField> getStructFields() { + throw createUnsupportedException("struct"); + } + + @Override + public Stemming getStemming() { + return importedField.targetField().getStemming(); + } + + @Override + public Stemming getStemming(Schema schema) { + throw createUnsupportedException("stemming"); + } + + @Override + public Ranking getRanking() { + throw createUnsupportedException("ranking"); + } + + @Override + public Map<String, SummaryField> getSummaryFields() { + throw createUnsupportedException("summary fields"); + } + + @Override + public String getName() { + return importedField.fieldName(); // Name of the imported field, not the target field + } + + @Override + public int getWeight() { + return importedField.targetField().getWeight(); + } + + @Override + public int getLiteralBoost() { + return importedField.targetField().getLiteralBoost(); + } + + @Override + public RankType getRankType() { + return importedField.targetField().getRankType(); + } + + @Override + public Map<String, Index> getIndices() { + return importedField.targetField().getIndices(); + } + + @Override + public boolean existsIndex(String name) { + return importedField.targetField().existsIndex(name); + } + + /** + * Returns a field representation of the imported field. + * Changes to the returned instance are not propagated back to the underlying imported field! + */ + @Override + public Field asField() { + return new Field( + importedField.fieldName(), + importedField.targetField().getDataType()); + } + + private static UnsupportedOperationException createUnsupportedException(String aspect) { + return new UnsupportedOperationException("'" + aspect + "' is not meaningful or relevant for an imported field."); + } + + @Override + public boolean hasFullIndexingDocprocRights() { + return importedField.targetField().hasFullIndexingDocprocRights(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/ImmutableSDField.java b/config-model/src/main/java/com/yahoo/schema/document/ImmutableSDField.java new file mode 100644 index 00000000000..44e442811ba --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/ImmutableSDField.java @@ -0,0 +1,91 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; + +import java.util.Collection; +import java.util.List; +import java.util.Map; + +/** + * An interface containing the non-mutating methods of {@link SDField}. + * For description of the methods see {@link SDField}. + * + * @author bjorncs + */ +public interface ImmutableSDField { + + <T extends Expression> boolean containsExpression(Class<T> searchFor); + + boolean doesAttributing(); + + boolean doesIndexing(); + + boolean doesLowerCasing(); + + boolean isExtraField(); + + boolean isImportedField(); + + boolean isIndexStructureField(); + + boolean usesStructOrMap(); + + /** + * Whether this field at some time was configured to do attributing. + * + * This function can typically return a different value than doesAttributing(), + * which uses the final state of the underlying indexing script instead. + */ + boolean wasConfiguredToDoAttributing(); + + DataType getDataType(); + + Index getIndex(String name); + + List<String> getQueryCommands(); + + Map<String, Attribute> getAttributes(); + + Attribute getAttribute(); + + Map<String, String> getAliasToName(); + + ScriptExpression getIndexingScript(); + + Matching getMatching(); + + NormalizeLevel getNormalizing(); + + ImmutableSDField getStructField(String name); + + Collection<? extends ImmutableSDField> getStructFields(); + + Stemming getStemming(); + + Stemming getStemming(Schema schema); + + Ranking getRanking(); + + String getName(); + + Map<String, SummaryField> getSummaryFields(); + + /** Returns a {@link Field} representation (which is sadly not immutable) */ + Field asField(); + + boolean hasFullIndexingDocprocRights(); + int getWeight(); + int getLiteralBoost(); + RankType getRankType(); + Map<String, Index> getIndices(); + boolean existsIndex(String name); + SummaryField getSummaryField(String name); + boolean hasIndex(); +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/ImportedComplexField.java b/config-model/src/main/java/com/yahoo/schema/document/ImportedComplexField.java new file mode 100644 index 00000000000..10c400bc4d6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/ImportedComplexField.java @@ -0,0 +1,49 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.schema.DocumentReference; + +import java.util.Collection; +import java.util.Map; + +/** + * A complex field that is imported from a concrete field in a referenced document type and given an alias name. + */ +public class ImportedComplexField extends ImportedField { + + private Map<String, ImportedField> nestedFields; + + public ImportedComplexField(String fieldName, DocumentReference reference, ImmutableSDField targetField) { + super(fieldName, reference, targetField); + nestedFields = new java.util.LinkedHashMap<>(0); + } + + @Override + public ImmutableSDField asImmutableSDField() { + return new ImmutableImportedComplexSDField(this); + } + + public void addNestedField(ImportedField importedField) { + String prefix = fieldName() + "."; + assert(importedField.fieldName().substring(0, prefix.length()).equals(prefix)); + String suffix = importedField.fieldName().substring(prefix.length()); + nestedFields.put(suffix, importedField); + } + + public Collection<ImportedField> getNestedFields() { + return nestedFields.values(); + } + + public ImportedField getNestedField(String name) { + if (name.contains(".")) { + String superFieldName = name.substring(0,name.indexOf(".")); + String subFieldName = name.substring(name.indexOf(".")+1); + ImportedField superField = nestedFields.get(superFieldName); + if (superField != null && superField instanceof ImportedComplexField) { + return ((ImportedComplexField)superField).getNestedField(subFieldName); + } + return null; + } + return nestedFields.get(name); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/ImportedField.java b/config-model/src/main/java/com/yahoo/schema/document/ImportedField.java new file mode 100644 index 00000000000..50f8591bbce --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/ImportedField.java @@ -0,0 +1,38 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.schema.DocumentReference; + +/** + * A field that is imported from a concrete field in a referenced document type and given an alias name. + * + * @author geirst + */ +public abstract class ImportedField { + + private final String fieldName; + private final DocumentReference reference; + private final ImmutableSDField targetField; + + public ImportedField(String fieldName, + DocumentReference reference, + ImmutableSDField targetField) { + this.fieldName = fieldName; + this.reference = reference; + this.targetField = targetField; + } + + public String fieldName() { + return fieldName; + } + + public DocumentReference reference() { + return reference; + } + + public ImmutableSDField targetField() { + return targetField; + } + + public abstract ImmutableSDField asImmutableSDField(); +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/ImportedFields.java b/config-model/src/main/java/com/yahoo/schema/document/ImportedFields.java new file mode 100644 index 00000000000..f6654896fae --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/ImportedFields.java @@ -0,0 +1,23 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import java.util.Collections; +import java.util.Map; + +/** + * A set of fields that are imported from concrete fields in referenced document types. + * + * @author geirst + */ +public class ImportedFields { + + private final Map<String, ImportedField> fields; + + public ImportedFields(Map<String, ImportedField> fields) { + this.fields = fields; + } + + public Map<String, ImportedField> fields() { + return Collections.unmodifiableMap(fields); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/ImportedSimpleField.java b/config-model/src/main/java/com/yahoo/schema/document/ImportedSimpleField.java new file mode 100644 index 00000000000..244135ecc10 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/ImportedSimpleField.java @@ -0,0 +1,18 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.schema.DocumentReference; + +/** + * A simple field that is imported from a concrete field in a referenced document type and given an alias name. + */ +public class ImportedSimpleField extends ImportedField { + public ImportedSimpleField(String fieldName, DocumentReference reference, ImmutableSDField targetField) { + super(fieldName, reference, targetField); + } + + @Override + public ImmutableSDField asImmutableSDField() { + return new ImmutableImportedSDField(this); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/MatchAlgorithm.java b/config-model/src/main/java/com/yahoo/schema/document/MatchAlgorithm.java new file mode 100644 index 00000000000..8556fe491d0 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/MatchAlgorithm.java @@ -0,0 +1,16 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +/** Which match algorithm is used by this matching setup */ + +public enum MatchAlgorithm { + NORMAL("normal"), + PREFIX("prefix"), + SUBSTRING("substring"), + SUFFIX("suffix"); + + private String name; + MatchAlgorithm(String name) { this.name = name; } + + public String getName() { return name; } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/MatchType.java b/config-model/src/main/java/com/yahoo/schema/document/MatchType.java new file mode 100644 index 00000000000..d2088e71282 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/MatchType.java @@ -0,0 +1,14 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +public enum MatchType { + TEXT("text"), + WORD("word"), + EXACT("exact"), + GRAM("gram"); + + private String name; + MatchType(String name) { this.name = name; } + + public String getName() { return name; } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java new file mode 100644 index 00000000000..f70f31be0bd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java @@ -0,0 +1,141 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import java.io.Serializable; + +/** + * Defines how a field should be matched. + * Matching objects can be compared based on their content, but they are <i>not</i> immutable. + * + * @author bratseth + */ +public class Matching implements Cloneable, Serializable { + + public static final MatchType defaultType = MatchType.TEXT; + + private MatchType type = MatchType.TEXT; + private Case casing = Case.UNCASED; + + /** The basic match algorithm */ + private MatchAlgorithm algorithm = MatchAlgorithm.NORMAL; + + private boolean typeUserSet = false; + + private boolean algorithmUserSet = false; + + /** The gram size is the n in n-gram, or -1 if not set. Should only be set with gram matching. */ + private int gramSize=-1; + + /** Maximum number of characters to consider when searching in this field. Used for limiting resources, especially in streaming search. */ + private Integer maxLength; + + private String exactMatchTerminator=null; + + /** Creates a matching of type "text" */ + public Matching() {} + + public Matching(MatchType type) { + this.type = type; + } + + public MatchType getType() { return type; } + public Case getCase() { return casing; } + + public void setType(MatchType type) { + this.type = type; + typeUserSet = true; + } + + public void setCase(Case casing) { this.casing = casing; } + + public Integer maxLength() { return maxLength; } + public Matching maxLength(int maxLength) { this.maxLength = maxLength; return this; } + public boolean isTypeUserSet() { return typeUserSet; } + + public MatchAlgorithm getAlgorithm() { return algorithm; } + + public void setAlgorithm(MatchAlgorithm algorithm) { + this.algorithm = algorithm; + algorithmUserSet = true; + } + + public boolean isAlgorithmUserSet() { return algorithmUserSet; } + + public boolean isPrefix() { return algorithm == MatchAlgorithm.PREFIX; } + + public boolean isSubstring() { return algorithm == MatchAlgorithm.SUBSTRING; } + + public boolean isSuffix() { return algorithm == MatchAlgorithm.SUFFIX; } + + /** Returns the gram size, or -1 if not set. Should only be set with gram matching. */ + public int getGramSize() { return gramSize; } + + public void setGramSize(int gramSize) { this.gramSize=gramSize; } + + /** + * Merge data from another matching object + */ + public void merge(Matching m) { + if (m == null) return; + if (m.isAlgorithmUserSet()) { + this.setAlgorithm(m.getAlgorithm()); + } + if (m.isTypeUserSet()) { + this.setType(m.getType()); + if (m.getType() == MatchType.GRAM) + gramSize = m.gramSize; + } + if (m.getExactMatchTerminator() != null) { + this.setExactMatchTerminator(m.getExactMatchTerminator()); + } + } + + /** + * If exact matching is used, this returns the terminator string + * which terminates an exact matched sequence in queries. If exact + * matching is not used, or no terminator is set, this is null + */ + public String getExactMatchTerminator() { return exactMatchTerminator; } + + /** + * Sets the terminator string which terminates an exact matched + * sequence in queries (used if type is EXACT). + */ + public void setExactMatchTerminator(String exactMatchTerminator) { + this.exactMatchTerminator = exactMatchTerminator; + } + + @Override + public String toString() { + return type + " matching [" + (type==MatchType.GRAM ? "gram size " + gramSize : "supports " + algorithm) + + "], [exact-terminator "+exactMatchTerminator+"]"; + } + + public Matching clone() { + try { + return (Matching)super.clone(); + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Programming error"); + } + } + + @Override + public boolean equals(Object o) { + if (! (o instanceof Matching)) return false; + + Matching other=(Matching)o; + if ( ! other.type.equals(this.type)) return false; + if ( ! other.algorithm.equals(this.algorithm)) return false; + if ( this.exactMatchTerminator == null && other.exactMatchTerminator != null) return false; + if ( this.exactMatchTerminator != null && ( ! this.exactMatchTerminator.equals(other.exactMatchTerminator)) ) + return false; + if ( gramSize != other.gramSize) return false; + return true; + } + + @Override public int hashCode() { + return java.util.Objects.hash(type, algorithm, exactMatchTerminator, gramSize); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java b/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java new file mode 100644 index 00000000000..12880d0e1c8 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/NormalizeLevel.java @@ -0,0 +1,87 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +/** + * class representing the character normalization + * we want to do on query and indexed text. + * Levels are strict subsets, so doing accent + * removal means doing codepoint normalizing + * and case normalizing also. + */ +// TODO: Missing author +public class NormalizeLevel { + + /** + * The current levels are as follows: + * NONE: no changes to input text + * CODEPOINT: convert text into Unicode + * Normalization Form Compatibility Composition + * LOWERCASE: also convert text into lowercase letters + * ACCENT: do both above and remove accents on characters + */ + public enum Level { + NONE, CODEPOINT, LOWERCASE, ACCENT + } + + private boolean userSpecified = false; + private Level level = Level.ACCENT; + + /** + * Returns whether accents should be removed from text + */ + public boolean doRemoveAccents() { return level == Level.ACCENT; } + + /** + * Construct a default (full) normalizelevel, + */ + public NormalizeLevel() {} + + /** + * Construct for a specific level, possibly user specified + * + * @param level which level to use + * @param fromUser whether this was specified by the user + */ + public NormalizeLevel(Level level, boolean fromUser) { + this.level = level; + this.userSpecified = fromUser; + } + + /** + * Change the current level to CODEPOINT as inferred + * by other features' needs. If the current level + * was user specified it will not change; also this + * will not increase the level. + */ + public void inferCodepoint() { + if (userSpecified) { + // ignore inferred changes if user specified something + return; + } + // do not increase level + if (level != Level.NONE) level = Level.CODEPOINT; + } + + /** + * Change the current level to LOWERCASE as inferred + * by other features' needs. If the current level + * was user specified it will not change; also this + * will not increase the level. + */ + public void inferLowercase() { + if (userSpecified) { + // ignore inferred changes if user specified something + return; + } + // do not increase level + if (level == Level.NONE) return; + if (level == Level.CODEPOINT) return; + + level = Level.LOWERCASE; + } + + public Level getLevel() { + return level; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/RankType.java b/config-model/src/main/java/com/yahoo/schema/document/RankType.java new file mode 100644 index 00000000000..067c1e7f266 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/RankType.java @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +/** + * The rank type of a field. For now this is just a container of a string name. + * This class is immutable. + * + * @author bratseth + */ +public enum RankType { + + /** *implicit* default: No type has been set. */ + DEFAULT, + + // Rank types which can be set explicitly. These are defined for Vespa in NativeRankTypeDefinitionSet + IDENTITY, ABOUT, TAGS, EMPTY; + + @Override + public String toString() { + return "rank type " + name().toLowerCase(); + } + + /** + * Returns the rank type from a string, regardless of its case. + * + * @param rankTypeName a rank type name in any casing + * @return the rank type found + * @throws IllegalArgumentException if not found + */ + public static RankType fromString(String rankTypeName) { + try { + return RankType.valueOf(rankTypeName.toUpperCase()); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("Unknown rank type '" + rankTypeName + "'. Supported rank types are " + + "'identity', 'about', 'tags' and 'empty'."); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/Ranking.java b/config-model/src/main/java/com/yahoo/schema/document/Ranking.java new file mode 100644 index 00000000000..31fd9747e2d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/Ranking.java @@ -0,0 +1,76 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import java.io.Serializable; + +/** + * The rank settings given in a rank clause in the search definition. + * + * @author Vegard Havdal + */ +public class Ranking implements Cloneable, Serializable { + + private boolean literal = false; + private boolean filter = false; + private boolean normal = false; + + /** + * <p>Returns whether literal (non-stemmed, non-normalized) forms of the words should + * be indexed in a separate index which is searched by a automatically added rank term + * during searches.</p> + * + * <p>Default is false.</p> + */ + public boolean isLiteral() { return literal; } + + public void setLiteral(boolean literal) { this.literal = literal; } + + /** + * <p>Returns whether this is a filter. Filters will only tell if they are matched or not, + * no detailed relevance information will be available about the match.</p> + * + * <p>Matching a filter is much cheaper for the search engine than matching a regular field.</p> + * + * <p>Default is false.</p> + */ + public boolean isFilter() { return filter && !normal; } + + public void setFilter(boolean filter) { this.filter = filter; } + + /** Whether user has explicitly requested normal (non-filter) behavior */ + public boolean isNormal() { return normal; } + public void setNormal(boolean n) { this.normal = n; } + + /** Returns true if the given rank settings are the same */ + @Override + public boolean equals(Object o) { + if ( ! (o instanceof Ranking)) return false; + + Ranking other=(Ranking)o; + if (this.filter != other.filter) return false; + if (this.literal != other.literal) return false; + if (this.normal != other.normal) return false; + return true; + } + + @Override + public int hashCode() { + return java.util.Objects.hash(filter, literal, normal); + } + + @Override + public String toString() { + return "rank settings [filter: " + filter + ", literal: " + literal + ", normal: "+normal+"]"; + } + + @Override + public Ranking clone() { + try { + return (Ranking)super.clone(); + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Programming error", e); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/SDDocumentType.java b/config-model/src/main/java/com/yahoo/schema/document/SDDocumentType.java new file mode 100644 index 00000000000..d300bd08bfd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/SDDocumentType.java @@ -0,0 +1,347 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.DataType; +import com.yahoo.document.DataTypeName; +import com.yahoo.document.DocumentType; +import com.yahoo.document.Field; +import com.yahoo.document.PositionDataType; +import com.yahoo.document.StructDataType; +import com.yahoo.document.annotation.AnnotationType; +import com.yahoo.document.annotation.AnnotationTypeRegistry; +import com.yahoo.documentmodel.NewDocumentType; +import com.yahoo.documentmodel.VespaDocumentType; +import com.yahoo.schema.DocumentReferences; +import com.yahoo.schema.FieldSets; +import com.yahoo.schema.Schema; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +/** + * A document definition is a list of fields. Documents may inherit other documents, + * implicitly acquiring their fields as it's own. If a document is not set to inherit + * any document, it will always inherit the document "document.0". + * + * @author Thomas Gundersen + * @author bratseth + */ +public class SDDocumentType implements Cloneable, Serializable { + + public static final SDDocumentType VESPA_DOCUMENT; + private final Map<DataTypeName, SDDocumentType> inheritedTypes = new LinkedHashMap<>(); + private final Map<NewDocumentType.Name, SDDocumentType> ownedTypes = new LinkedHashMap<>(); + private final AnnotationTypeRegistry annotationTypes = new AnnotationTypeRegistry(); + private DocumentType docType; + private DataType structType; + // The field sets here are set from the processing step in SD, + // to ensure that the full Search and this SDDocumentType is built first. + private FieldSets fieldSets; + // Document references + private Optional<DocumentReferences> documentReferences = Optional.empty(); + private TemporaryImportedFields temporaryImportedFields; + + static { + VESPA_DOCUMENT = new SDDocumentType(VespaDocumentType.INSTANCE.getFullName().getName()); + VESPA_DOCUMENT.addType(createSDDocumentType(PositionDataType.INSTANCE)); + } + + public SDDocumentType clone() throws CloneNotSupportedException { + SDDocumentType type = (SDDocumentType) super.clone(); + type.docType = docType.clone(); + type.inheritedTypes.putAll(inheritedTypes); + type.structType = structType; + // TODO this isn't complete; should it be..?! + return type; + } + + /** + * For adding structs defined in document scope + * + * @param dt the struct to add + * @return self, for chaining + */ + public SDDocumentType addType(SDDocumentType dt) { + NewDocumentType.Name name = new NewDocumentType.Name(dt.getName()); + if (getType(name) != null) + throw new IllegalArgumentException("Data type '" + name + "' has already been used."); + if (name.getName() == docType.getName()) + throw new IllegalArgumentException("Data type '" + name + "' can not have same name as its defining document."); + ownedTypes.put(name, dt); + return this; + } + public final SDDocumentType getOwnedType(String name) { + return getOwnedType(new NewDocumentType.Name(name)); + } + public SDDocumentType getOwnedType(DataTypeName name) { + return getOwnedType(name.getName()); + } + + public SDDocumentType getOwnedType(NewDocumentType.Name name) { + return ownedTypes.get(name); + } + + public final SDDocumentType getType(String name) { + return getType(new NewDocumentType.Name(name)); + } + + public SDDocumentType getType(NewDocumentType.Name name) { + SDDocumentType type = ownedTypes.get(name); + if (type == null) { + for (SDDocumentType inherited : inheritedTypes.values()) { + type = inherited.getType(name); + if (type != null) { + return type; + } + } + } + return type; + } + + public SDDocumentType addAnnotation(AnnotationType annotation) { + annotationTypes.register(annotation); + return this; + } + + /** Returns all owned datatypes. */ + public Collection<SDDocumentType> getTypes() { return ownedTypes.values(); } + + // TODO: Include inherited + public Map<String, AnnotationType> getAnnotations() { return annotationTypes.getTypes(); } + public AnnotationType findAnnotation(String name) { return annotationTypes.getType(name); } + + public Collection<SDDocumentType> getAllTypes() { + Collection<SDDocumentType> list = new ArrayList<>(); + list.addAll(getTypes()); + for (SDDocumentType inherited : inheritedTypes.values()) { + list.addAll(inherited.getAllTypes()); + } + return list; + } + + public Map<NewDocumentType.Name, SDDocumentType> allTypes() { + Map<NewDocumentType.Name, SDDocumentType> map = new LinkedHashMap<>(); + for (SDDocumentType inherited : inheritedTypes.values()) + map.putAll(inherited.allTypes()); + map.putAll(ownedTypes); + return map; + } + + /** + * Creates a new document type. + * The document type id will be generated as a hash from the document type name. + * + * @param name The name of the new document type + */ + public SDDocumentType(String name) { + this(name,null); + } + + public SDDocumentType(DataTypeName name) { + this(name.getName()); + } + + /** + * Creates a new document type. + * The document type id will be generated as a hash from the document type name. + * + * @param name the name of the new document type + * @param schema check for type ID collisions in this search definition + */ + public SDDocumentType(String name, Schema schema) { + docType = new DocumentType(name); + validateId(schema); + inherit(VESPA_DOCUMENT); + } + + public boolean isStruct() { return getStruct() != null; } + public DataType getStruct() { return structType; } + public SDDocumentType setStruct(DataType structType) { + if (structType != null) { + this.structType = structType; + inheritedTypes.remove(VESPA_DOCUMENT.getDocumentName()); + } else { + if (docType.contentStruct() != null) { + this.structType = docType.contentStruct(); + inheritedTypes.clear(); + } else { + throw new IllegalArgumentException("You can not set a null struct"); + } + } + return this; + } + + public String getName() { return docType.getName(); } + public DataTypeName getDocumentName() { return docType.getDataTypeName(); } + public DocumentType getDocumentType() { return docType; } + + public void inherit(DataTypeName name) { + inherit(new TemporarySDDocumentType(name)); + } + + public void inherit(SDDocumentType type) { + if (type == null) return; + if (type.getName().equals(this.getName())) + throw new IllegalArgumentException("Document type '" + getName() + "' cannot inherit itself"); + if ( ! inheritedTypes.containsKey(type.getDocumentName()) || + (inheritedTypes.get(type.getDocumentName()) instanceof TemporarySDDocumentType)) { + inheritedTypes.put(type.getDocumentName(), type); + } + } + + public Collection<SDDocumentType> getInheritedTypes() { return inheritedTypes.values(); } + + public Map<DataTypeName, SDDocumentType> inheritedTypes() { return inheritedTypes; } + + protected void validateId(Schema schema) { + if (schema == null) return; + if (schema.getDocument(getName()) == null) return; + SDDocumentType doc = schema.getDocument(); + throw new IllegalArgumentException("Failed creating document type '" + getName() + "', " + + "document type '" + doc.getName() + "' already uses ID '" + doc.getName() + "'"); + } + + public void setFieldId(SDField field, int id) { + field.setId(id, docType); + } + + /** Override getField, as it may need to ask inherited types that isn't registered in document type. */ + public Field getField(String name) { + if (name.contains(".")) { + String superFieldName = name.substring(0,name.indexOf(".")); + String subFieldName = name.substring(name.indexOf(".")+1); + Field f = docType.getField(superFieldName); + if (f != null) { + if (f instanceof SDField) { + SDField superField = (SDField)f; + return superField.getStructField(subFieldName); + } else { + throw new IllegalArgumentException("Field " + f.getName() + " is not an SDField"); + } + } + } + Field f = docType.getField(name); + if (f == null) { + for(SDDocumentType parent : inheritedTypes.values()) { + f = parent.getField(name); + if (f != null) return f; + } + } + return f; + } + + public void addField(Field field) { + verifyInheritance(field); + for (Iterator<Field> i = docType.fieldIteratorThisTypeOnly(); i.hasNext(); ) { + if (field.getName().equalsIgnoreCase((i.next()).getName())) { + throw new IllegalArgumentException("Duplicate (case insensitively) " + field + " in " + this); + } + } + docType.addField(field); + } + + /** Parse-time inheritance check. */ + private void verifyInheritance(Field field) { + for (SDDocumentType parent : inheritedTypes.values()) { + for (Field pField : parent.fieldSet()) { + if (pField.getName().equals(field.getName())) { + if (!pField.getDataType().equals(field.getDataType())) { + throw new IllegalArgumentException("For " + this + ", field '" + field.getName() + + "': Datatype can not be different from that of same field " + + "in the supertype '" + parent.getName() + "'"); + } + } + } + } + } + + public SDField addField(String string, DataType dataType) { + SDField field = new SDField(this, string, dataType); + addField(field); + return field; + } + + public Field addField(String fName, DataType dataType, boolean header, int code) { + SDField field = new SDField(this, fName, code, dataType); + addField(field); + return field; + } + + private Map<String, Field> fieldsInherited() { + Map<String, Field> map = new LinkedHashMap<>(); + for (SDDocumentType parent : inheritedTypes.values()) { + for (Field field : parent.fieldSet()) { + map.put(field.getName(), field); + } + } + return map; + } + + public Set<Field> fieldSet() { + Map<String, Field> map = fieldsInherited(); + Iterator<Field> it = docType.fieldIteratorThisTypeOnly(); + while (it.hasNext()) { + Field field = it.next(); + map.put(field.getName(), field); + } + return new LinkedHashSet<>(map.values()); + } + + public Iterator<Field> fieldIterator() { + return fieldSet().iterator(); + } + + /** Returns the number of fields in this only, not including inherited fields */ + // TODO: Remove + public int getFieldCount() { + return docType.getFieldCount(); + } + + @Override + public String toString() { + return "document type '" + docType.getName() + "'"; + } + + private static SDDocumentType createSDDocumentType(StructDataType structType) { + SDDocumentType docType = new SDDocumentType(structType.getName()); + for (Field field : structType.getFields()) { + docType.addField(new SDField(docType, field.getName(), field.getDataType())); + } + docType.setStruct(structType); + return docType; + } + + /** The field sets defined for this type and its {@link Schema} */ + public FieldSets getFieldSets() { + return fieldSets; + } + + /** Sets the field sets for this */ + public void setFieldSets(FieldSets fieldSets) { + this.fieldSets = fieldSets; + } + + public Optional<DocumentReferences> getDocumentReferences() { + return documentReferences; + } + + public void setDocumentReferences(DocumentReferences documentReferences) { + this.documentReferences = Optional.of(documentReferences); + } + + public TemporaryImportedFields getTemporaryImportedFields() { + return temporaryImportedFields; + } + + public void setTemporaryImportedFields(TemporaryImportedFields temporaryImportedFields) { + this.temporaryImportedFields = temporaryImportedFields; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/SDField.java b/config-model/src/main/java/com/yahoo/schema/document/SDField.java new file mode 100644 index 00000000000..668b6388620 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/SDField.java @@ -0,0 +1,802 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.DocumentType; +import com.yahoo.document.Field; +import com.yahoo.document.MapDataType; +import com.yahoo.document.StructDataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.documentmodel.OwnedTemporaryType; +import com.yahoo.documentmodel.TemporaryUnknownType; +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Embedder; +import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.schema.fieldoperation.FieldOperation; +import com.yahoo.schema.fieldoperation.FieldOperationContainer; +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.indexinglanguage.ExpressionSearcher; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.ScriptParserContext; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.LowerCaseExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; +import com.yahoo.vespa.indexinglanguage.parser.IndexingInput; +import com.yahoo.vespa.indexinglanguage.parser.ParseException; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.TreeMap; + + +/** + * The field class represents a document field. It is used in + * the Document class to get and set fields. Each SDField has a name, a numeric ID, + * a data type. The numeric ID is used when the fields are stored + * in serialized form. + * + * @author bratseth + */ +public class SDField extends Field implements TypedKey, FieldOperationContainer, ImmutableSDField { + + /** Use this field for modifying index-structure, even if it doesn't have any indexing code */ + private boolean indexStructureField = false; + + /** The indexing statements to be applied to this value during indexing */ + private ScriptExpression indexingScript = new ScriptExpression(); + + /** The default rank type for indices of this field */ + private RankType rankType = RankType.DEFAULT; + + /** Rank settings in a "rank" block for the field. */ + private final Ranking ranking = new Ranking(); + + /** + * The literal boost of this field. This boost is added to a rank score + * when a query term matched as query term exactly (unnormalized and unstemmed). + * Non-positive boosts causes no boosting, 0 allows boosts + * to be specified in other rank profiles, while negative values + * turns the capability off. + */ + private int literalBoost = -1; + + /** + * The weight of this field. This is a percentage, + * so 100 is default to provide the identity transform. + */ + private int weight = 100; + + /** + * Indicates what kind of matching should be done on this field + */ + private Matching matching = new Matching(); + + private Dictionary dictionary = null; + + /** Attribute settings, or null if there are none */ + private final Map<String, Attribute> attributes = new TreeMap<>(); + + /** + * The stemming setting of this field, or null to use the default. + * Default is determined by the owning search definition. + */ + private Stemming stemming = null; + + /** How content of this field should be accent normalized etc. */ + private NormalizeLevel normalizing = new NormalizeLevel(); + + /** Extra query commands of this field */ + private final List<String> queryCommands = new java.util.ArrayList<>(0); + + /** Summary fields defined in this field */ + private final Map<String, SummaryField> summaryFields = new java.util.LinkedHashMap<>(0); + + /** The explicitly index settings on this field */ + private final Map<String, Index> indices = new java.util.LinkedHashMap<>(); + + private boolean idOverride = false; + + /** Struct fields defined in this field */ + private final Map<String,SDField> structFields = new java.util.LinkedHashMap<>(0); + + /** The document that this field was declared in, or null */ + private SDDocumentType repoDocType = null; + + /** The aliases declared for this field. May pertain to indexes or attributes */ + private final Map<String, String> aliasToName = new HashMap<>(); + + /** Pending operations that must be applied after parsing, due to use of not-yet-defined structs. */ + private final List<FieldOperation> pendingOperations = new LinkedList<>(); + + private boolean isExtraField = false; + + private boolean wasConfiguredToDoAttributing = false; + + /** + * Creates a new field. This method is only used to create reserved fields. + * + * @param name the name of the field + * @param dataType the datatype of the field + */ + public SDField(SDDocumentType repo, String name, int id, DataType dataType) { + super(name, id, dataType); + this.repoDocType = repo; + populate(name, dataType); + } + + public SDField(String name, DataType dataType) { + this(null, name, dataType); + } + + /** Creates a new field */ + public SDField(SDDocumentType repo, String name, DataType dataType) { + this(repo, name, dataType, null); + } + + /** Creates a new field */ + protected SDField(SDDocumentType repo, String name, DataType dataType, SDDocumentType owner) { + this(repo, name, dataType, owner, null, 0); + } + + /** + * Creates a new field + * + * @param name the name of the field + * @param dataType the datatype of the field + * @param owner the owning document (used to check for id collisions) + * @param fieldMatching the matching object to set for the field + */ + protected SDField(SDDocumentType repo, + String name, + DataType dataType, + SDDocumentType owner, + Matching fieldMatching, + int recursion) + { + super(name, dataType, owner == null ? null : owner.getDocumentType()); + this.repoDocType = repo; + this.structFieldDepth = recursion; + if (fieldMatching != null) + this.setMatching(fieldMatching); + populate(name, dataType); + } + + private int structFieldDepth = 0; + + private void populate(String name, DataType dataType) { + if (dataType instanceof TensorDataType) { + TensorType type = ((TensorDataType)dataType).getTensorType(); + if (type.dimensions().stream().anyMatch(d -> d.isIndexed() && d.size().isEmpty())) + throw new IllegalArgumentException("Illegal type in field " + name + " type " + type + + ": Dense tensor dimensions must have a size"); + addQueryCommand("type " + type); + } + else if (dataType instanceof WeightedSetDataType) { + var nested = ((WeightedSetDataType) dataType).getNestedType().getName(); + addQueryCommand("type WeightedSet<" + nested + ">"); + } + else { + addQueryCommand("type " + dataType.getName()); + } + } + + public void setIsExtraField(boolean isExtra) { + isExtraField = isExtra; + } + + @Override + public boolean isExtraField() { + return isExtraField; + } + + @Override + public boolean isImportedField() { + return false; + } + + @Override + public boolean doesAttributing() { + return containsExpression(AttributeExpression.class); + } + + @Override + public boolean doesIndexing() { + return containsExpression(IndexExpression.class); + } + + public boolean doesSummarying() { + if (usesStruct()) { + for (SDField structField : getStructFields()) { + if (structField.doesSummarying()) { + return true; + } + } + } + return containsExpression(SummaryExpression.class); + } + + @Override + public boolean doesLowerCasing() { + return containsExpression(LowerCaseExpression.class); + } + + @Override + public <T extends Expression> boolean containsExpression(Class<T> searchFor) { + return findExpression(searchFor) != null; + } + + private <T extends Expression> T findExpression(Class<T> searchFor) { + return new ExpressionSearcher<>(searchFor).searchIn(indexingScript); + } + + public void addSummaryFieldSources(SummaryField summaryField) { + if (usesStruct()) { + /* + * How this works for structs: When at least one sub-field in a struct is to + * be used for summary, that whole struct field is included in summary.cfg. Then, + * vsmsummary.cfg specifies the sub-fields used for each struct field. + * So we recurse into each struct, adding the destination classes set for each sub-field + * to the main summary-field for the struct field. + */ + for (SDField structField : getStructFields()) { + for (SummaryField sumF : structField.getSummaryFields().values()) { + for (String dest : sumF.getDestinations()) { + summaryField.addDestination(dest); + } + } + structField.addSummaryFieldSources(summaryField); + } + } else { + if (doesSummarying()) { + summaryField.addSource(getName()); + } + } + } + + private boolean doneStructFields = false; + + @SuppressWarnings("deprecation") + private void actuallyMakeStructFields() { + if (doneStructFields) return; + if (getFirstStructOrMapRecursive() == null) { + doneStructFields = true; + return; + } + var sdoc = repoDocType; + var dataType = getDataType(); + + java.util.function.BiConsumer<String, DataType> supplyStructField = (fieldName, fieldType) -> { + if (structFields.containsKey(fieldName)) return; + Matching subFieldMatching = new Matching(); + subFieldMatching.merge(this.matching); + String subName = getName().concat(".").concat(fieldName); + var subField = new SDField(sdoc, subName, fieldType, null, + subFieldMatching, structFieldDepth + 1); + structFields.put(fieldName, subField); + }; + + if (dataType instanceof MapDataType) { + MapDataType mdt = (MapDataType) dataType; + supplyStructField.accept("key", mdt.getKeyType()); + supplyStructField.accept("value", mdt.getValueType()); + } else { + if (structFieldDepth >= 10) { + // too risky, infinite recursion + doneStructFields = true; + return; + } + if (dataType instanceof CollectionDataType) { + dataType = ((CollectionDataType)dataType).getNestedType(); + } + if ((dataType instanceof MapDataType) || (dataType instanceof CollectionDataType)) { + // "array of map" or "array of array" will not have any struct fields + // TODO: consider what this would mean + doneStructFields = true; + return; + } + SDDocumentType subType = sdoc != null ? sdoc.getType(dataType.getName()) : null; + if (dataType instanceof TemporaryUnknownType && subType != null) { + for (Field field : subType.fieldSet()) { + supplyStructField.accept(field.getName(), field.getDataType()); + } + } else if (dataType instanceof OwnedTemporaryType && subType != null) { + for (Field field : subType.fieldSet()) { + supplyStructField.accept(field.getName(), field.getDataType()); + } + } else if (dataType instanceof StructDataType) { + var sdt = (StructDataType) dataType; + for (Field field : sdt.getFields()) { + supplyStructField.accept(field.getName(), field.getDataType()); + } + } + if ((subType == null) && (structFields.size() > 0)) { + throw new IllegalArgumentException("Cannot find matching (repo=" + sdoc + ") for subfields in " + + this + " [" + getDataType() + getDataType().getClass() + + "] with " + structFields.size() + " struct fields"); + } + // populate struct fields with matching + if (subType != null) { + for (Field f : subType.fieldSet()) { + if (f instanceof SDField) { + SDField field = (SDField) f; + SDField subField = structFields.get(field.getName()); + if (subField != null) { + // we just made this with a copy of our matching (see above) + Matching subFieldMatching = subField.getMatching(); + subFieldMatching.merge(field.getMatching()); + subField.setMatching(subFieldMatching); + } + } else { + throw new IllegalArgumentException("Field in struct is not SDField " + f.getName()); + } + } + } + // else ("missing subtype for struct fields in: " + this + " type " + getDataType() + " [" + getDataType().getClass().getSimpleName() + "]"); + } + doneStructFields = true; + } + + private Matching matchingForStructFields = null; + + public void addOperation(FieldOperation op) { + pendingOperations.add(op); + } + + @Override + public void applyOperations(SDField field) { + if (pendingOperations.isEmpty()) return; + + Collections.sort(pendingOperations); + ListIterator<FieldOperation> ops = pendingOperations.listIterator(); + while (ops.hasNext()) { + FieldOperation op = ops.next(); + ops.remove(); + op.apply(field); + } + } + + public void applyOperations() { + applyOperations(this); + } + + public void setId(int fieldId, DocumentType owner) { + super.setId(fieldId, owner); + idOverride = true; + } + + public StructDataType getFirstStructRecursive() { + DataType dataType = getDataType(); + while (true) { // Currently no nesting of collections + if (dataType instanceof CollectionDataType) { + dataType = ((CollectionDataType)dataType).getNestedType(); + } else if (dataType instanceof MapDataType) { + dataType = ((MapDataType)dataType).getValueType(); + } else { + break; + } + } + return (dataType instanceof StructDataType) ? (StructDataType)dataType : null; + } + + private DataType getFirstStructOrMapRecursive() { + DataType dataType = getDataType(); + while (dataType instanceof CollectionDataType) { // Currently no nesting of collections + dataType = ((CollectionDataType)dataType).getNestedType(); + } + return (dataType instanceof StructDataType || dataType instanceof MapDataType) ? dataType : null; + } + + private boolean usesStruct() { + DataType dt = getFirstStructRecursive(); + return (dt != null); + } + + @Override + public boolean usesStructOrMap() { + DataType dt = getFirstStructOrMapRecursive(); + return (dt != null); + } + + @Override + public boolean wasConfiguredToDoAttributing() { + return wasConfiguredToDoAttributing; + } + + /** Parse an indexing expression which will use the simple linguistics implementation suitable for testing */ + public void parseIndexingScript(String script) { + parseIndexingScript(script, new SimpleLinguistics(), Embedder.throwsOnUse.asMap()); + } + + public void parseIndexingScript(String script, Linguistics linguistics, Map<String, Embedder> embedders) { + try { + ScriptParserContext config = new ScriptParserContext(linguistics, embedders); + config.setInputStream(new IndexingInput(script)); + setIndexingScript(ScriptExpression.newInstance(config)); + } catch (ParseException e) { + throw new IllegalArgumentException("Failed to parse script '" + script + "'", e); + } + } + + /** Sets the indexing script of this, or null to not use a script */ + public void setIndexingScript(ScriptExpression exp) { + if (exp == null) { + exp = new ScriptExpression(); + } + indexingScript = exp; + if (indexingScript.isEmpty()) { + return; // TODO: This causes empty expressions not to be propagate to struct fields!! BAD BAD BAD!! + } + if (!wasConfiguredToDoAttributing()) { + wasConfiguredToDoAttributing = doesAttributing(); + } + if (!usesStructOrMap()) { + new ExpressionVisitor() { + + @Override + protected void doVisit(Expression exp) { + if (!(exp instanceof AttributeExpression)) { + return; + } + String fieldName = ((AttributeExpression)exp).getFieldName(); + if (fieldName == null) { + fieldName = getName(); + } + Attribute attribute = attributes.get(fieldName); + if (attribute == null) { + addAttribute(new Attribute(fieldName, getDataType())); + } + } + }.visit(indexingScript); + } + for (SDField structField : getStructFields()) { + structField.setIndexingScript(exp); + } + } + + @Override + public ScriptExpression getIndexingScript() { return indexingScript; } + + @SuppressWarnings("deprecation") + @Override + public void setDataType(DataType type) { + if (type.equals(DataType.URI)) { // Different defaults, naturally + normalizing.inferLowercase(); + stemming = Stemming.NONE; + } + this.dataType = type; + if ( ! idOverride) { + this.fieldId = calculateIdV7(null); + } + } + + @Override + public boolean isIndexStructureField() { + return indexStructureField; + } + + public void setIndexStructureField(boolean indexStructureField) { + this.indexStructureField = indexStructureField; + } + + @Override + public boolean hasIndex() { + return (getIndexingScript() != null) && doesIndexing(); + } + + /** Sets the literal boost of this field */ + public void setLiteralBoost(int literalBoost) { this.literalBoost=literalBoost; } + + /** + * Returns the literal boost of this field. This boost is added to a literal score + * when a query term matched as query term exactly (unnormalized and unstemmed). + * Default is non-positive. + */ + @Override + public int getLiteralBoost() { return literalBoost; } + + /** Sets the weight of this field */ + public void setWeight(int weight) { this.weight=weight; } + + /** Returns the weight of this field, or 0 if nothing is set */ + @Override + public int getWeight() { return weight; } + + /** + * Returns what kind of matching type should be applied. + */ + @Override + public Matching getMatching() { return matching; } + + /** + * Sets what kind of matching type should be applied. + * (Token matching is default, PREFIX, SUBSTRING, SUFFIX are alternatives) + */ + public void setMatching(Matching matching) { this.matching=matching; } + + /** + * Returns Dictionary settings. + */ + public Dictionary getDictionary() { return dictionary; } + public Dictionary getOrSetDictionary() { + if (dictionary == null) { + dictionary = new Dictionary(); + } + return dictionary; + } + + /** + * Set the matching type for this field and all subfields. + */ + // TODO: When this is not the same as getMatching().setthis we have a potential for inconsistency. Find the right + // Matching object for struct fields at lookup time instead. + public void setMatchingType(MatchType type) { + this.getMatching().setType(type); + for (SDField structField : getStructFields()) { + structField.setMatchingType(type); + } + } + + /** + * Set the matching type for this field and all subfields. + */ + // TODO: When this is not the same as getMatching().setthis we have a potential for inconsistency. Find the right + // Matching object for struct fields at lookup time instead. + public void setMatchingCase(Case casing) { + this.getMatching().setCase(casing); + for (SDField structField : getStructFields()) { + structField.setMatchingCase(casing); + } + } + /** + * Set matching algorithm for this field and all subfields. + */ + // TODO: When this is not the same as getMatching().setthis we have a potential for inconsistency. Find the right + // Matching object for struct fields at lookup time instead. + public void setMatchingAlgorithm(MatchAlgorithm algorithm) { + this.getMatching().setAlgorithm(algorithm); + for (SDField structField : getStructFields()) { + structField.getMatching().setAlgorithm(algorithm); + } + } + + /** Adds an explicit index defined in this field */ + public void addIndex(Index index) { + indices.put(index.getName(),index); + } + + /** + * Returns an index, or null if no index with this name has had + * some <b>explicit settings</b> applied in this field (even if this returns null, + * the index may be implicitly defined by an indexing statement) + */ + @Override + public Index getIndex(String name) { + return indices.get(name); + } + + /** + * Returns an index if this field has one (implicitly or + * explicitly) targeting the given name. + */ + @Override + public boolean existsIndex(String name) { + if (indices.get(name) != null) return true; + return name.equals(getName()) && doesIndexing(); + } + + /** + * Defined indices on this field + * @return defined indices on this + */ + @Override + public Map<String, Index> getIndices() { + return indices; + } + + /** + * Sets the default rank type of this fields indices, and sets this rank type + * to all indices explicitly defined here which has no index set. + * (This complex behavior is dues to the fact than we would prefer to have rank types + * per field, not per index) + */ + public void setRankType(RankType rankType) { + this.rankType=rankType; + for (Index index : getIndices().values()) { + if (index.getRankType()==null) + index.setRankType(rankType); + } + + } + + /** Returns the rank settings set in a "rank" block for this field. This is never null. */ + @Override + public Ranking getRanking() { return ranking; } + + /** Returns the default rank type of indices of this field, or null if nothing is set */ + @Override + public RankType getRankType() { return this.rankType; } + + /** + * Returns the search-time attribute settings of this field or null if none is set. + * + * <p>TODO: Make unmodifiable.</p> + */ + @Override + public Map<String, Attribute> getAttributes() { return attributes; } + + public Attribute getAttribute() { + return attributes.get(getName()); + } + + public void addAttribute(Attribute attribute) { + String name = attribute.getName(); + if (name == null || "".equals(name)) { + name = getName(); + attribute.setName(name); + } + attributes.put(attribute.getName(),attribute); + } + + /** + * Returns the stemming setting of this field. + * Default is determined by the owning search definition. + * + * @return the stemming setting of this, or null, to use the default + */ + @Override + public Stemming getStemming() { return stemming; } + + /** + * Whether this field should be stemmed in this search definition + */ + @Override + public Stemming getStemming(Schema schema) { + if (stemming != null) + return stemming; + else + return schema.getStemming(); + } + + @Override + public Field asField() { + return this; + } + + /** + * Sets how this field should be stemmed, or set to null to use the default. + */ + public void setStemming(Stemming stemming) { + this.stemming = stemming; + } + + /** Returns an unmodifiable map of the summary fields defined in this */ + @Override + public Map<String, SummaryField> getSummaryFields() { + return Collections.unmodifiableMap(summaryFields); + } + + public void removeSummaryFields() { + summaryFields.clear(); + } + + /** Adds a summary field */ + public void addSummaryField(SummaryField summaryField) { + summaryFields.put(summaryField.getName(),summaryField); + } + + /** + * Returns a summary field defined (implicitly or explicitly) by this field. + * Returns null if there is no such summary field defined. + */ + @Override + public SummaryField getSummaryField(String name) { + return summaryFields.get(name); + } + + /** + * Returns a summary field defined (implicitly or explicitly) by this field. + * + * @param create true to create the summary field and add it to this field before returning if it is missing + * @return the summary field, or null if not present and create is false + */ + public SummaryField getSummaryField(String name, boolean create) { + SummaryField summaryField=summaryFields.get(name); + if (summaryField==null && create) { + summaryField=new SummaryField(name, getDataType()); + addSummaryField(summaryField); + } + return summaryFields.get(name); + } + + /** Returns list of static struct fields */ + @Override + public Collection<SDField> getStructFields() { + actuallyMakeStructFields(); + return structFields.values(); + } + + /** + * Returns a struct field defined in this field, + * potentially traversing into nested structs. + * Returns null if there is no such struct field defined. + */ + @Override + public SDField getStructField(String name) { + actuallyMakeStructFields(); + if (name.contains(".")) { + String superFieldName = name.substring(0,name.indexOf(".")); + String subFieldName = name.substring(name.indexOf(".")+1); + SDField superField = structFields.get(superFieldName); + if (superField != null) { + return superField.getStructField(subFieldName); + } + return null; + } + return structFields.get(name); + } + + /** + * Returns how the content of this field should be accent normalized etc + */ + @Override + public NormalizeLevel getNormalizing() { return normalizing; } + + /** + * Change how the content of this field should be accent normalized etc + */ + public void setNormalizing(NormalizeLevel level) { normalizing = level; } + + public void addQueryCommand(String name) { + queryCommands.add(name); + } + + public boolean hasQueryCommand(String name) { + return queryCommands.contains(name); + } + + /** Returns a list of query commands */ + @Override + public List<String> getQueryCommands() { return queryCommands; } + + @Override + public boolean equals(Object other) { + if ( ! (other instanceof SDField)) return false; + return super.equals(other); + } + + @Override + public int hashCode() { + return getName().hashCode(); + } + + @Override + public String toString() { + return "field '" + getName() + "'"; + } + + /** The aliases declared for this field */ + @Override + public Map<String, String> getAliasToName() { + return aliasToName; + } + + @Override + public boolean hasFullIndexingDocprocRights() { + Attribute self = getAttributes().get(getName()); + return (!isExtraField() || ((self != null) && self.isMutable())); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/Sorting.java b/config-model/src/main/java/com/yahoo/schema/document/Sorting.java new file mode 100644 index 00000000000..2d0c9a5d27b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/Sorting.java @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import java.io.Serializable; + +/** + * A search-time document attribute sort specification(per-document in-memory value). + * This belongs to the attribute or field(implicitt attribute). + * + * @author baldersheim + */ +public final class Sorting implements Cloneable, Serializable { + + // Remember to change hashCode and equals when you add new fields + public enum Function {UCA, RAW, LOWERCASE} + public enum Strength {PRIMARY, SECONDARY, TERTIARY, QUATERNARY, IDENTICAL} + private boolean ascending = true; + private Function function = Function.UCA; + private String locale = ""; + private Strength strength = Strength.PRIMARY; + + public boolean isAscending() { return ascending; } + public boolean isDescending() { return ! ascending; } + public String getLocale() { return locale; } + public Function getFunction() { return function; } + public Strength getStrength() { return strength; } + + public void setAscending() { ascending = true; } + public void setDescending() { ascending = false; } + public void setFunction(Function function) { this.function = function; } + public void setLocale(String locale) { this.locale = locale; } + public void setStrength(Strength strength) { this.strength = strength; } + + public int hashCode() { + return locale.hashCode() + + strength.hashCode() + + function.hashCode() + + (isDescending() ? 13 : 0); + } + + public boolean equals(Object object) { + if (! (object instanceof Sorting)) return false; + + Sorting other=(Sorting)object; + return this.locale.equals(other.locale) && + (ascending == other.ascending) && + (function == other.function) && + (strength == other.strength); + } + + @Override + public Sorting clone() { + try { + return (Sorting)super.clone(); + } + catch (CloneNotSupportedException e) { + throw new RuntimeException("Programming error"); + } + } + + public String toString() { + return "sorting '" + (isAscending() ? '+' : '-') + function.toString() + "(" + strength.toString() + ", " + locale + ")"; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/Stemming.java b/config-model/src/main/java/com/yahoo/schema/document/Stemming.java new file mode 100644 index 00000000000..5ec844e2540 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/Stemming.java @@ -0,0 +1,64 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.language.process.StemMode; + +/** + * The stemming setting of a field. This describes how the search engine + * should transform content of this field into base forms (stems) to increase + * recall (find "car" when you search for "cars" etc.). + * + * @author bratseth + */ +public enum Stemming { + + /** No stemming */ + NONE("none"), + + /** select shortest possible stem */ + SHORTEST("shortest"), + + /** select the "best" stem alternative */ + BEST("best"), + + /** index multiple stems */ + MULTIPLE("multiple"); + + private final String name; + + /** + * Returns the stemming object for the given string. + * The legal stemming names are the stemming constants in any capitalization. + * + * @throws IllegalArgumentException if there is no stemming type with the given name + */ + public static Stemming get(String stemmingName) { + try { + return Stemming.valueOf(stemmingName.toUpperCase()); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException("'" + stemmingName + "' is not a valid stemming setting"); + } + } + + Stemming(String name) { + this.name = name; + } + + public String getName() { return name; } + + @Override + public String toString() { + return "stemming " + name; + } + + public StemMode toStemMode() { + switch(this) { + case SHORTEST: return StemMode.SHORTEST; + case MULTIPLE: return StemMode.ALL; + case BEST : return StemMode.BEST; + case NONE: return StemMode.NONE; + default: throw new IllegalStateException("Inconvertible stem mode " + this); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/TemporaryImportedField.java b/config-model/src/main/java/com/yahoo/schema/document/TemporaryImportedField.java new file mode 100644 index 00000000000..efc0674586d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/TemporaryImportedField.java @@ -0,0 +1,37 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +/** + * A field that is imported from a field in a referenced document type and given an alias name. + * + * This is temporary AST structure that only refers to the imported field by name. + * + * @author geirst + */ +public class TemporaryImportedField { + + private final String fieldName; + private final String referenceFieldName; + private final String targetFieldName; + + public TemporaryImportedField(String fieldName, + String referenceFieldName, + String targetFieldName) { + this.fieldName = fieldName; + this.referenceFieldName = referenceFieldName; + this.targetFieldName = targetFieldName; + } + + public String fieldName() { + return fieldName; + } + + public String referenceFieldName() { + return referenceFieldName; + } + + public String targetFieldName() { + return targetFieldName; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/TemporaryImportedFields.java b/config-model/src/main/java/com/yahoo/schema/document/TemporaryImportedFields.java new file mode 100644 index 00000000000..7ad4feb6d32 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/TemporaryImportedFields.java @@ -0,0 +1,44 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.schema.Schema; + +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * A set of fields that are imported from referenced document types. + * + * This is temporary AST structure that only refers to the imported fields by name. + * + * @author geirst + */ +public class TemporaryImportedFields { + + private final Schema owner; + private final Map<String, TemporaryImportedField> fields = new LinkedHashMap<>(); + + public TemporaryImportedFields(Schema owner) { + this.owner = owner; + } + + public void add(TemporaryImportedField importedField) { + fields.put(importedField.fieldName(), importedField); + } + + public boolean hasField(String fieldName) { + return fields.get(fieldName) != null; + } + + public Map<String, TemporaryImportedField> fields() { + if (owner.inherited().isEmpty()) return Collections.unmodifiableMap(fields); + if (owner.inherited().get().temporaryImportedFields().isEmpty()) return Collections.unmodifiableMap(fields); + + var allFields = new HashMap<>(owner.inherited().get().temporaryImportedFields().get().fields()); + allFields.putAll(fields); + return allFields; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/TemporarySDDocumentType.java b/config-model/src/main/java/com/yahoo/schema/document/TemporarySDDocumentType.java new file mode 100644 index 00000000000..3194a14a143 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/TemporarySDDocumentType.java @@ -0,0 +1,13 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.DataTypeName; + +/** + * @author baldersheim + */ +public class TemporarySDDocumentType extends SDDocumentType { + public TemporarySDDocumentType(DataTypeName name) { + super(name); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/TemporarySDField.java b/config-model/src/main/java/com/yahoo/schema/document/TemporarySDField.java new file mode 100644 index 00000000000..e455fa78455 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/TemporarySDField.java @@ -0,0 +1,19 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.DataType; + +/** + * @author Einar M R Rosenvinge + */ +public class TemporarySDField extends SDField { + + public TemporarySDField(SDDocumentType repo, String name, DataType dataType, SDDocumentType owner) { + super(repo, name, dataType, owner); + } + + public TemporarySDField(SDDocumentType repo, String name, DataType dataType) { + super(repo, name, dataType); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/TypedKey.java b/config-model/src/main/java/com/yahoo/schema/document/TypedKey.java new file mode 100644 index 00000000000..8de8c7b64fd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/TypedKey.java @@ -0,0 +1,20 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document; + +import com.yahoo.document.DataType; + +/** + * Common interface for various typed key (or field definitions). + * Used by code which wants to use common algorithms for dealing with typed keys, like the logical mapping + * + * @author bratseth + */ +public interface TypedKey { + + String getName(); + + void setDataType(DataType type); + + DataType getDataType(); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/annotation/SDAnnotationType.java b/config-model/src/main/java/com/yahoo/schema/document/annotation/SDAnnotationType.java new file mode 100644 index 00000000000..3dc46a91c1b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/annotation/SDAnnotationType.java @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document.annotation; + +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.document.annotation.AnnotationType; + +/** + * @author Einar M R Rosenvinge + */ +public class SDAnnotationType extends AnnotationType { + + private SDDocumentType sdDocType; + private String inherits; + + public SDAnnotationType(String name) { + super(name); + } + + public SDAnnotationType(String name, SDDocumentType dataType, String inherits) { + super(name); + this.sdDocType = dataType; + this.inherits = inherits; + } + + public SDDocumentType getSdDocType() { + return sdDocType; + } + + public void setSdDocType(SDDocumentType value) { + assert(sdDocType == null); + sdDocType = value; + } + + public String getInherits() { + return inherits; + } + + public void inherit(String inherits) { + this.inherits = inherits; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/document/annotation/TemporaryAnnotationReferenceDataType.java b/config-model/src/main/java/com/yahoo/schema/document/annotation/TemporaryAnnotationReferenceDataType.java new file mode 100644 index 00000000000..de9bd977823 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/document/annotation/TemporaryAnnotationReferenceDataType.java @@ -0,0 +1,28 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.document.annotation; + +import com.yahoo.document.annotation.AnnotationReferenceDataType; +import com.yahoo.document.annotation.AnnotationType; + +/** + * @author Einar M R Rosenvinge + */ +public class TemporaryAnnotationReferenceDataType extends AnnotationReferenceDataType { + + private final String target; + + public TemporaryAnnotationReferenceDataType(String target) { + this.target = target; + } + + public String getTarget() { + return target; + } + + @Override + public void setAnnotationType(AnnotationType type) { + super.setName("annotationreference<" + type.getName() + ">"); + super.setAnnotationType(type); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/ConstantTensorTransformer.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/ConstantTensorTransformer.java new file mode 100644 index 00000000000..4e320594918 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/ConstantTensorTransformer.java @@ -0,0 +1,71 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.yahoo.schema.FeatureNames; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; + +import java.util.ArrayList; +import java.util.List; + +/** + * Transforms named references to constant tensors with the rank feature 'constant'. + * + * @author geirst + */ +public class ConstantTensorTransformer extends ExpressionTransformer<RankProfileTransformContext> { + + @Override + public ExpressionNode transform(ExpressionNode node, RankProfileTransformContext context) { + if (node instanceof ReferenceNode) { + return transformFeature((ReferenceNode) node, context); + } else if (node instanceof CompositeNode) { + return transformChildren((CompositeNode) node, context); + } else { + return node; + } + } + + private ExpressionNode transformFeature(ReferenceNode node, RankProfileTransformContext context) { + if ( ! node.getArguments().isEmpty() && ! FeatureNames.isSimpleFeature(node.reference())) { + return transformArguments(node, context); + } else { + return transformConstantReference(node, context); + } + } + + private ExpressionNode transformArguments(ReferenceNode node, RankProfileTransformContext context) { + List<ExpressionNode> arguments = node.getArguments().expressions(); + List<ExpressionNode> transformedArguments = new ArrayList<>(arguments.size()); + for (ExpressionNode argument : arguments) { + transformedArguments.add(transform(argument, context)); + } + return node.setArguments(transformedArguments); + } + + private ExpressionNode transformConstantReference(ReferenceNode node, RankProfileTransformContext context) { + String constantName = node.getName(); + Reference constantReference = node.reference(); + if (FeatureNames.isConstantFeature(constantReference)) { + constantName = constantReference.simpleArgument().orElse(null); + } else if (constantReference.isIdentifier()) { + constantReference = FeatureNames.asConstantFeature(constantName); + } else { + return node; + } + Value value = context.constants().get(constantName); + if (value == null || value.type().rank() == 0) return node; + + TensorValue tensorValue = (TensorValue)value; + String tensorType = tensorValue.asTensor().type().toString(); + context.rankProperties().put(constantReference + ".value", tensorValue.toString()); + context.rankProperties().put(constantReference + ".type", tensorType); + return new ReferenceNode(constantReference); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/ExpressionTransforms.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/ExpressionTransforms.java new file mode 100644 index 00000000000..86aedd4332a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/ExpressionTransforms.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.google.common.collect.ImmutableList; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.transform.ConstantDereferencer; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; +import com.yahoo.searchlib.rankingexpression.transform.Simplifier; +import com.yahoo.searchlib.rankingexpression.transform.TensorMaxMinTransformer; + +import java.util.List; + +/** + * The transformations done on ranking expressions done at config time before passing them on to the Vespa + * engine for execution. + * + * An instance of this class has scope of a compilation of a single rank profile. + * + * @author bratseth + */ +public class ExpressionTransforms { + + private final List<ExpressionTransformer> transforms; + + public ExpressionTransforms() { + transforms = + ImmutableList.of(new TensorFlowFeatureConverter(), + new OnnxFeatureConverter(), + new OnnxModelTransformer(), + new XgboostFeatureConverter(), + new LightGBMFeatureConverter(), + new TokenTransformer(), + new ConstantDereferencer(), + new ConstantTensorTransformer(), + new FunctionInliner(), + new FunctionShadower(), + new TensorMaxMinTransformer(), + new Simplifier()); + } + + public RankingExpression transform(RankingExpression expression, RankProfileTransformContext context) { + for (ExpressionTransformer transformer : transforms) + expression = transformer.transform(expression, context); + return expression; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/FunctionInliner.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/FunctionInliner.java new file mode 100644 index 00000000000..382d51747bb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/FunctionInliner.java @@ -0,0 +1,33 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.yahoo.schema.RankProfile; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; + +/** + * Inlines functions in ranking expressions + * + * @author bratseth + */ +public class FunctionInliner extends ExpressionTransformer<RankProfileTransformContext> { + + @Override + public ExpressionNode transform(ExpressionNode node, RankProfileTransformContext context) { + if (node instanceof ReferenceNode) + return transformFeatureNode((ReferenceNode)node, context); + if (node instanceof CompositeNode) + return transformChildren((CompositeNode)node, context); + return node; + } + + private ExpressionNode transformFeatureNode(ReferenceNode feature, RankProfileTransformContext context) { + if (feature.getArguments().size() > 0) return feature; // From RankProfile: only inline no-arg functions + RankProfile.RankingExpressionFunction rankingExpressionFunction = context.inlineFunctions().get(feature.getName()); + if (rankingExpressionFunction == null) return feature; + return transform(rankingExpressionFunction.function().getBody().getRoot(), context); // inline recursively and return + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/FunctionShadower.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/FunctionShadower.java new file mode 100644 index 00000000000..702e4ea220e --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/FunctionShadower.java @@ -0,0 +1,59 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.yahoo.schema.RankProfile; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.FunctionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; + +/** + * Transforms function nodes to reference nodes if a rank profile function shadows a built-in function. + * This has the effect of allowing rank profile functions to redefine built-in functions. + * Another effect is that we can add built-in functions over time + * without fear of breaking existing users' functions with the same name. + * + * However, there is a (largish) caveat. If a user has a function with a certain number + * of arguments, and we add in a built-in function with a different arity, + * this will cause parse errors as the Java parser gives precedence to + * built-in functions. + * + * @author lesters + */ +public class FunctionShadower extends ExpressionTransformer<RankProfileTransformContext> { + + @Override + public RankingExpression transform(RankingExpression expression, RankProfileTransformContext context) { + ExpressionNode node = expression.getRoot(); + ExpressionNode result = transform(node, context); + return (result == node) + ? expression + : new RankingExpression(expression.getName(), result); + } + + @Override + public ExpressionNode transform(ExpressionNode node, RankProfileTransformContext context) { + if (node instanceof FunctionNode) + return transformFunctionNode((FunctionNode) node, context); + if (node instanceof CompositeNode) + return transformChildren((CompositeNode)node, context); + return node; + } + + private ExpressionNode transformFunctionNode(FunctionNode function, RankProfileTransformContext context) { + String name = function.getFunction().toString(); + RankProfile.RankingExpressionFunction rankingExpressionFunction = context.rankProfile().findFunction(name); + if (rankingExpressionFunction == null) + return transformChildren(function, context); + + int functionArity = function.getFunction().arity(); + if (functionArity != rankingExpressionFunction.function().arguments().size()) + return transformChildren(function, context); + + ReferenceNode node = new ReferenceNode(name, function.children(), null); + return transformChildren(node, context); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/LightGBMFeatureConverter.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/LightGBMFeatureConverter.java new file mode 100644 index 00000000000..af5fa5ebeab --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/LightGBMFeatureConverter.java @@ -0,0 +1,59 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.yahoo.path.Path; +import com.yahoo.searchlib.rankingexpression.rule.Arguments; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; +import com.yahoo.vespa.model.ml.ConvertedModel; +import com.yahoo.vespa.model.ml.FeatureArguments; + +import java.io.UncheckedIOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Replaces instances of the lightgbm(model-path) pseudofeature with the + * native Vespa ranking expression implementing the same computation. + * + * @author lesters + */ +public class LightGBMFeatureConverter extends ExpressionTransformer<RankProfileTransformContext> { + + /** A cache of imported models indexed by model path. This avoids importing the same model multiple times. */ + private final Map<Path, ConvertedModel> convertedLightGBMModels = new HashMap<>(); + + @Override + public ExpressionNode transform(ExpressionNode node, RankProfileTransformContext context) { + if (node instanceof ReferenceNode) + return transformFeature((ReferenceNode) node, context); + else if (node instanceof CompositeNode) + return super.transformChildren((CompositeNode) node, context); + else + return node; + } + + private ExpressionNode transformFeature(ReferenceNode feature, RankProfileTransformContext context) { + if ( ! feature.getName().equals("lightgbm")) return feature; + + try { + FeatureArguments arguments = asFeatureArguments(feature.getArguments()); + ConvertedModel convertedModel = + convertedLightGBMModels.computeIfAbsent(arguments.path(), + path -> ConvertedModel.fromSourceOrStore(path, true, context)); + return convertedModel.expression(arguments, context); + } catch (IllegalArgumentException | UncheckedIOException e) { + throw new IllegalArgumentException("Could not use LightGBM model from " + feature, e); + } + } + + private FeatureArguments asFeatureArguments(Arguments arguments) { + if (arguments.size() != 1) + throw new IllegalArgumentException("A lightgbm node must take a single argument pointing to " + + "the LightGBM model file under [application]/models"); + return new FeatureArguments(arguments); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/OnnxFeatureConverter.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/OnnxFeatureConverter.java new file mode 100644 index 00000000000..2277491cd47 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/OnnxFeatureConverter.java @@ -0,0 +1,64 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.yahoo.path.Path; +import com.yahoo.searchlib.rankingexpression.rule.Arguments; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; +import com.yahoo.vespa.model.ml.ConvertedModel; +import com.yahoo.vespa.model.ml.FeatureArguments; + +import java.io.UncheckedIOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Replaces instances of the onnx(model-path, output) + * pseudofeature with the native Vespa ranking expression implementing + * the same computation. + * + * @author bratseth + * @author lesters + */ +public class OnnxFeatureConverter extends ExpressionTransformer<RankProfileTransformContext> { + + /** A cache of imported models indexed by model path. This avoids importing the same model multiple times. */ + private final Map<Path, ConvertedModel> convertedOnnxModels = new HashMap<>(); + + @Override + public ExpressionNode transform(ExpressionNode node, RankProfileTransformContext context) { + if (node instanceof ReferenceNode) + return transformFeature((ReferenceNode) node, context); + else if (node instanceof CompositeNode) + return super.transformChildren((CompositeNode) node, context); + else + return node; + } + + private ExpressionNode transformFeature(ReferenceNode feature, RankProfileTransformContext context) { + if ( ! feature.getName().equals("onnx_vespa")) return feature; + try { + FeatureArguments arguments = asFeatureArguments(feature.getArguments()); + ConvertedModel convertedModel = + convertedOnnxModels.computeIfAbsent(arguments.path(), + path -> ConvertedModel.fromSourceOrStore(path, true, context)); + return convertedModel.expression(arguments, context); + } + catch (IllegalArgumentException | UncheckedIOException e) { + throw new IllegalArgumentException("Could not use Onnx model from " + feature, e); + } + } + + private FeatureArguments asFeatureArguments(Arguments arguments) { + if (arguments.isEmpty()) + throw new IllegalArgumentException("An ONNX node must take an argument pointing to " + + "the ONNX model file under [application]/models"); + if (arguments.expressions().size() > 3) + throw new IllegalArgumentException("An onnx feature can have at most 3 arguments"); + + return new FeatureArguments(arguments); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/OnnxModelTransformer.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/OnnxModelTransformer.java new file mode 100644 index 00000000000..4c38c257602 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/OnnxModelTransformer.java @@ -0,0 +1,139 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.yahoo.path.Path; +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfile; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.rule.Arguments; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; +import com.yahoo.vespa.model.ml.ConvertedModel; +import com.yahoo.vespa.model.ml.FeatureArguments; +import com.yahoo.vespa.model.ml.ModelName; + +import java.util.List; + +/** + * Transforms ONNX model features of the forms: + * + * onnxModel(config_name) + * onnxModel(config_name).output + * onnxModel("path/to/model") + * onnxModel("path/to/model").output + * onnxModel("path/to/model", "path/to/output") + * onnxModel("path/to/model", "unused", "path/to/output") // signature is unused + * onnx(...) // same as with onnxModel, onnx is an alias of onnxModel + * + * To the format expected by the backend: + * + * onnxModel(config_name).output + * + * @author lesters + */ +public class OnnxModelTransformer extends ExpressionTransformer<RankProfileTransformContext> { + + @Override + public ExpressionNode transform(ExpressionNode node, RankProfileTransformContext context) { + if (node instanceof ReferenceNode) + return transformFeature((ReferenceNode) node, context); + else if (node instanceof CompositeNode) + return super.transformChildren((CompositeNode) node, context); + else + return node; + } + + private ExpressionNode transformFeature(ReferenceNode feature, RankProfileTransformContext context) { + if (context.rankProfile() == null) return feature; + if (context.rankProfile().schema() == null) return feature; + return transformFeature(feature, context.rankProfile()); + } + + public static ExpressionNode transformFeature(ReferenceNode feature, RankProfile profile) { + String featureName = feature.getName(); + if ( ! featureName.equals("onnxModel") && ! featureName.equals("onnx")) return feature; + + Arguments arguments = feature.getArguments(); + if (arguments.isEmpty()) + throw new IllegalArgumentException("An " + featureName + " feature must take an argument referring to a " + + "onnx-model config or an ONNX file."); + if (arguments.expressions().size() > 3) + throw new IllegalArgumentException("An " + featureName + " feature can have at most 3 arguments."); + + // Check that the model configuration "onnx-model" exists. If not defined, it should have been added + // by the "OnnxModelConfigGenerator" processor. If it still doesn't exist, it is because we can't find + // the actual ONNX file, which can happen if we are restarting or upgrading an application using an + // ONNX file that was transformed to Vespa ranking expressions. We then assume it is in the model store. + + String modelConfigName = getModelConfigName(feature.reference()); + OnnxModel onnxModel = profile.onnxModels().get(modelConfigName); + if (onnxModel == null) { + String path = asString(arguments.expressions().get(0)); + ModelName modelName = new ModelName(null, Path.fromString(path), true); + ConvertedModel convertedModel = ConvertedModel.fromStore(profile.schema().applicationPackage(), modelName, path, profile); + FeatureArguments featureArguments = new FeatureArguments(arguments); + return convertedModel.expression(featureArguments, null); + } + + String defaultOutput = onnxModel.getOutputMap().get(onnxModel.getDefaultOutput()); + String output = getModelOutput(feature.reference(), defaultOutput); + if (! onnxModel.getOutputMap().containsValue(output)) { + throw new IllegalArgumentException(featureName + " argument '" + output + + "' output not found in model '" + onnxModel.getFileName() + "'"); + } + return new ReferenceNode("onnxModel", List.of(new ReferenceNode(modelConfigName)), output); + } + + public static String getModelConfigName(Reference reference) { + if (reference.arguments().size() > 0) { + ExpressionNode expr = reference.arguments().expressions().get(0); + if (expr instanceof ReferenceNode) { // refers to onnx-model config + return expr.toString(); + } + if (expr instanceof ConstantNode) { // refers to a file path + return asValidIdentifier(expr); + } + } + return null; + } + + public static String getModelOutput(Reference reference, String defaultOutput) { + if (reference.output() != null) { + return reference.output(); + } else if (reference.arguments().expressions().size() == 2) { + return asValidIdentifier(reference.arguments().expressions().get(1)); + } else if (reference.arguments().expressions().size() > 2) { + return asValidIdentifier(reference.arguments().expressions().get(2)); + } + return defaultOutput; + } + + public static String stripQuotes(String s) { + if (isNotQuoteSign(s.codePointAt(0))) return s; + if (isNotQuoteSign(s.codePointAt(s.length() - 1))) + throw new IllegalArgumentException("argument [" + s + "] is missing end quote"); + return s.substring(1, s.length()-1); + } + + public static String asValidIdentifier(String str) { + return str.replaceAll("[^\\w\\d\\$@_]", "_"); + } + + private static String asValidIdentifier(ExpressionNode node) { + return asValidIdentifier(asString(node)); + } + + private static boolean isNotQuoteSign(int c) { + return c != '\'' && c != '"'; + } + + public static String asString(ExpressionNode node) { + if ( ! (node instanceof ConstantNode)) + throw new IllegalArgumentException("Expected a constant string as argument, but got '" + node); + return stripQuotes(node.toString()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/RankProfileTransformContext.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/RankProfileTransformContext.java new file mode 100644 index 00000000000..cfc859345ad --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/RankProfileTransformContext.java @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import ai.vespa.rankingexpression.importer.configmodelview.ImportedMlModels; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.schema.RankProfile; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; +import com.yahoo.searchlib.rankingexpression.evaluation.Value; +import com.yahoo.searchlib.rankingexpression.transform.TransformContext; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; + +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Extends the transform context with rank profile information + * + * @author bratseth + */ +public class RankProfileTransformContext extends TransformContext { + + private final RankProfile rankProfile; + private final QueryProfileRegistry queryProfiles; + private final ImportedMlModels importedModels; + private final Map<String, RankProfile.RankingExpressionFunction> inlineFunctions; + private final Map<String, String> rankProperties = new HashMap<>(); + + public RankProfileTransformContext(RankProfile rankProfile, + QueryProfileRegistry queryProfiles, + Map<Reference, TensorType> featureTypes, + ImportedMlModels importedModels, + Map<Reference, RankProfile.Constant> constants, + Map<String, RankProfile.RankingExpressionFunction> inlineFunctions) { + super(valuesOf(constants), rankProfile.typeContext(queryProfiles, featureTypes)); + this.rankProfile = rankProfile; + this.queryProfiles = queryProfiles; + this.importedModels = importedModels; + this.inlineFunctions = inlineFunctions; + } + + public RankProfile rankProfile() { return rankProfile; } + public QueryProfileRegistry queryProfiles() { return queryProfiles; } + public ImportedMlModels importedModels() { return importedModels; } + public Map<String, RankProfile.RankingExpressionFunction> inlineFunctions() { return inlineFunctions; } + public Map<String, String> rankProperties() { return rankProperties; } + + private static Map<String, Value> valuesOf(Map<Reference, RankProfile.Constant> constants) { + return constants.values().stream() + .filter(constant -> constant.value().isPresent()) + .collect(Collectors.toMap(constant -> constant.name().simpleArgument().get(), + constant -> asValue(constant.value().get()))); + } + + private static Value asValue(Tensor tensor) { + if (tensor.type().rank() == 0) + return DoubleValue.of(tensor.asDouble()); + else + return TensorValue.of(tensor); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/TensorFlowFeatureConverter.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/TensorFlowFeatureConverter.java new file mode 100644 index 00000000000..fd19c97036e --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/TensorFlowFeatureConverter.java @@ -0,0 +1,66 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.yahoo.path.Path; +import com.yahoo.searchlib.rankingexpression.rule.Arguments; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; +import com.yahoo.vespa.model.ml.ConvertedModel; +import com.yahoo.vespa.model.ml.FeatureArguments; + +import java.io.UncheckedIOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Replaces instances of the tensorflow(model-path, signature, output) + * pseudofeature with the native Vespa ranking expression implementing + * the same computation. + * + * @author bratseth + */ +public class TensorFlowFeatureConverter extends ExpressionTransformer<RankProfileTransformContext> { + + /** A cache of imported models indexed by model path. This avoids importing the same model multiple times. */ + private final Map<Path, ConvertedModel> convertedTensorFlowModels = new HashMap<>(); + + public TensorFlowFeatureConverter() {} + + @Override + public ExpressionNode transform(ExpressionNode node, RankProfileTransformContext context) { + if (node instanceof ReferenceNode) + return transformFeature((ReferenceNode) node, context); + else if (node instanceof CompositeNode) + return super.transformChildren((CompositeNode) node, context); + else + return node; + } + + private ExpressionNode transformFeature(ReferenceNode feature, RankProfileTransformContext context) { + if ( ! feature.getName().equals("tensorflow")) return feature; + + try { + FeatureArguments arguments = asFeatureArguments(feature.getArguments()); + ConvertedModel convertedModel = + convertedTensorFlowModels.computeIfAbsent(arguments.path(), + path -> ConvertedModel.fromSourceOrStore(path, false, context)); + return convertedModel.expression(arguments, context); + } + catch (IllegalArgumentException | UncheckedIOException e) { + throw new IllegalArgumentException("Could not use tensorflow model from " + feature, e); + } + } + + private FeatureArguments asFeatureArguments(Arguments arguments) { + if (arguments.isEmpty()) + throw new IllegalArgumentException("A tensorflow node must take an argument pointing to " + + "the tensorflow model directory under [application]/models"); + if (arguments.expressions().size() > 3) + throw new IllegalArgumentException("A tensorflow feature can have at most 3 arguments"); + + return new FeatureArguments(arguments); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/TokenTransformer.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/TokenTransformer.java new file mode 100644 index 00000000000..30d9a3766b3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/TokenTransformer.java @@ -0,0 +1,313 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.evaluation.DoubleValue; +import com.yahoo.searchlib.rankingexpression.rule.ArithmeticNode; +import com.yahoo.searchlib.rankingexpression.rule.ArithmeticOperator; +import com.yahoo.searchlib.rankingexpression.rule.ComparisonNode; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; +import com.yahoo.searchlib.rankingexpression.rule.EmbracedNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.IfNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode; +import com.yahoo.searchlib.rankingexpression.rule.TruthOperator; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; +import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.functions.Generate; +import com.yahoo.tensor.functions.Slice; +import com.yahoo.tensor.functions.TensorFunction; + +import java.util.ArrayList; +import java.util.List; + +import static com.yahoo.searchlib.rankingexpression.rule.TensorFunctionNode.wrapScalar; + +/** + * Convenience feature transforms for inputs to Transformer type models. + * + * Replaces features of the form + * + * tokenInputIds + * tokenTypeIds + * tokenAttentionMask + * + * to tensor generation expressions that generate the required input. + * In general, these models expect input of the form: + * + * CLS + arg1 + SEP + arg2 + SEP + 0's + * + * @author lesters + */ +public class TokenTransformer extends ExpressionTransformer<RankProfileTransformContext> { + + static private final ConstantNode ZERO = new ConstantNode(new DoubleValue(0.0)); + static private final ConstantNode ONE = new ConstantNode(new DoubleValue(1.0)); + static private final ConstantNode TWO = new ConstantNode(new DoubleValue(2.0)); + static private final ConstantNode CLS = new ConstantNode(new DoubleValue(101)); + static private final ConstantNode SEP = new ConstantNode(new DoubleValue(102)); + + @Override + public ExpressionNode transform(ExpressionNode node, RankProfileTransformContext context) { + if (node instanceof ReferenceNode) + return transformFeature((ReferenceNode) node, context); + else if (node instanceof CompositeNode) + return super.transformChildren((CompositeNode) node, context); + else + return node; + } + + private ExpressionNode transformFeature(ReferenceNode feature, RankProfileTransformContext context) { + if (feature.getName().equals("tokenInputIds") && shouldTransform(feature, context)) + return transformTokenInputIds(feature, context); + if (feature.getName().equals("tokenTypeIds") && shouldTransform(feature, context)) + return transformTokenTypeIds(feature, context); + if (feature.getName().equals("tokenAttentionMask") && shouldTransform(feature, context)) + return transformTokenAttentionMask(feature, context); + return feature; + } + + /** + * Transforms a feature of the form + * + * tokenInputIds(128, a, b, ...) + * + * to an expression that concatenates the arguments a, b, ... using the + * special Transformers sequences of CLS and SEP, up to length 128, so + * that the sequence becomes + * + * CLS + a + SEP + b + SEP + 0's + * + * Concretely, transforms to a tensor generation expression: + * + * tensor(d0[1],d1[128])( + * if (d1 < 1, + * 101, + * if (d1 < 1 + length_a, + * a{d0:(d1 - (1)}, + * if (d1 < 1 + length_a + 1, + * 102, + * if (d1 < 1 + length_a + 1 + length_b, + * b{d0:(d1 - (1 + length_a + 1))}, + * if (d1 < 1 + length_a + 1 + length_b + 1, + * 102, + * 0.0 + * )))))) + * + * Functions calculating lengths of arguments are added to the rank profile. + */ + private ExpressionNode transformTokenInputIds(ReferenceNode feature, RankProfileTransformContext context) { + checkArguments(feature); + + TensorType type = createTensorType(feature.getName(), feature.getArguments().expressions().get(0)); + + // we need to add functions calculating the token lengths of the arguments + createTokenLengthFunctions(feature, context); + + // create token sequence: CLS + arg1 + SEP + arg2 + SEP + .... + ExpressionNode tokenSequenceExpr = createTokenSequenceExpr(0, createTokenSequence(feature)); + return new TensorFunctionNode(Generate.bound(type, wrapScalar(tokenSequenceExpr))); + } + + /** + * Transforms a feature of the form + * + * tokenTypeIds(128, a, b, ...) + * + * to an expression that generates a tensor that has values 0 for "a" + * (including CLS and SEP tokens) and 1 for the rest of the sequence. + * + * Concretely, transforms to a tensor generation expression: + * + * tensor(d0[1],d1[128])( + * if (d1 < 1 + length_a + 1, + * 0, + * if (d1 < 1 + length_a + 1 + length_b + 1 + ..., + * 1, + * 0 + * ))) + */ + private ExpressionNode transformTokenTypeIds(ReferenceNode feature, RankProfileTransformContext context) { + checkArguments(feature); + + TensorType type = createTensorType(feature.getName(), feature.getArguments().expressions().get(0)); + + // we need to add functions calculating the token lengths of the arguments + createTokenLengthFunctions(feature, context); + + List<ExpressionNode> tokenSequence = createTokenSequence(feature); + ExpressionNode queryLengthExpr = createLengthExpr(2, tokenSequence); + ExpressionNode restLengthExpr = createLengthExpr(tokenSequence.size() - 1, tokenSequence); + ExpressionNode expr = new IfNode( + new ComparisonNode(new ReferenceNode("d1"), TruthOperator.SMALLER, queryLengthExpr), + ZERO, + new IfNode( + new ComparisonNode(new ReferenceNode("d1"), TruthOperator.SMALLER, restLengthExpr), + ONE, + ZERO + ) + ); + return new TensorFunctionNode(Generate.bound(type, wrapScalar(expr))); + } + + /** + * Transforms a feature of the form + * + * tokenAttentionMask(128, a, b, ...) + * + * to an expression that generates a tensor that has values 1 for all + * arguments (including CLS and SEP tokens) and 0 for the rest of the + * sequence. + * + * Concretely, transforms to a tensor generation expression: + * + * tensor(d0[1],d1[128])(if(d1 < 1 + length_a + 1 + length_b + 1 + ..., 1, 0)) + * + */ + private ExpressionNode transformTokenAttentionMask(ReferenceNode feature, RankProfileTransformContext context) { + checkArguments(feature); + + TensorType type = createTensorType(feature.getName(), feature.getArguments().expressions().get(0)); + + // we need to add functions calculating the token lengths of the arguments + createTokenLengthFunctions(feature, context); + + List<ExpressionNode> tokenSequence = createTokenSequence(feature); + ExpressionNode lengthExpr = createLengthExpr(tokenSequence.size() - 1, tokenSequence); + ComparisonNode comparison = new ComparisonNode(new ReferenceNode("d1"), TruthOperator.SMALLER, lengthExpr); + ExpressionNode expr = new IfNode(comparison, ONE, ZERO); + return new TensorFunctionNode(Generate.bound(type, wrapScalar(expr))); + } + + private boolean shouldTransform(ReferenceNode feature, RankProfileTransformContext context) { + if (context.rankProfile().getFunctions().containsKey(feature.getName())) + return false; + if (feature.getArguments().size() < 2) + return false; + return true; + } + + private void checkArguments(ReferenceNode feature) { + for (int i = 1; i < feature.getArguments().size(); ++i) { + ExpressionNode arg = feature.getArguments().expressions().get(i); + if ( ! (arg instanceof ReferenceNode)) { + throw new IllegalArgumentException("Invalid argument " + i + " to " + feature.getName() + ": " + + "the argument must be a reference. Got " + arg.toString()); + } + } + } + + public static TensorType createTensorType(String featureName, ExpressionNode argument) { + try { + int length = Integer.parseInt(argument.toString()); + return new TensorType.Builder(TensorType.Value.FLOAT).indexed("d0", 1).indexed("d1", length).build(); + } catch (NumberFormatException ex) { + throw new IllegalArgumentException("Invalid argument to " + featureName + ": the first argument must be " + + "the length to the token sequence to generate. Got " + argument); + } + } + + private String lengthFunctionName(ReferenceNode arg) { + return "__token_length@" + arg.hashCode(); + } + + private List<ExpressionNode> createTokenSequence(ReferenceNode feature) { + List<ExpressionNode> sequence = new ArrayList<>(); + sequence.add(CLS); + for (int i = 1; i < feature.getArguments().size(); ++i) { + sequence.add(feature.getArguments().expressions().get(i)); + sequence.add(SEP); + } + return sequence; + } + + /** + * Adds functions for calculating the token length input. Assumes that + * token sequences are 0-padded, so this returns the number of non-0 + * tokens using a map and reduce-sum. + */ + private void createTokenLengthFunctions(ReferenceNode feature, RankProfileTransformContext context) { + for (int i = 1; i < feature.getArguments().size(); ++i) { + ExpressionNode arg = feature.getArguments().expressions().get(i); + if ( ! (arg instanceof ReferenceNode)) { + throw new IllegalArgumentException("Invalid argument " + i + " to " + feature.getName() + ": " + + "the argument must be a reference. Got " + arg.toString()); + } + ReferenceNode ref = (ReferenceNode) arg; + String functionName = lengthFunctionName(ref); + if ( ! context.rankProfile().getFunctions().containsKey(functionName)) { + context.rankProfile().addFunction(functionName, List.of(), "sum(map(" + ref + ", f(x)(x > 0)))", false); + } + } + } + + /** + * Recursively creates partial expressions of the form + * + * if (d1 < 1 + length_a, + * a{d0:(d1 - 1}, + * ... + * + * for each part of the token sequence. CLS and SEP are added directly, + * and we create a slice expression for each argument to extract the + * actual tokens. + */ + private ExpressionNode createTokenSequenceExpr(int iter, List<ExpressionNode> sequence) { + ExpressionNode lengthExpr = createLengthExpr(iter, sequence); + ComparisonNode comparison = new ComparisonNode(new ReferenceNode("d1"), TruthOperator.SMALLER, lengthExpr); + + ExpressionNode trueExpr = sequence.get(iter); + if (sequence.get(iter) instanceof ReferenceNode) { + trueExpr = createTokenExtractExpr(iter, sequence); + } + + ExpressionNode falseExpr; + if (iter < sequence.size() - 1) { + falseExpr = createTokenSequenceExpr(iter + 1, sequence); + } else { + falseExpr = ZERO; // 0-padding for rest of sequence + } + + return new IfNode(comparison, trueExpr, falseExpr); + } + + /** + * Creates an expression for the length of the token sequence so far, where + * the lengths of CLS and SEP are 1, and the length of the arguments are + * calculated using auxiliary functions. + */ + private ExpressionNode createLengthExpr(int iter, List<ExpressionNode> sequence) { + List<ExpressionNode> factors = new ArrayList<>(); + List<ArithmeticOperator> operators = new ArrayList<>(); + for (int i = 0; i < iter + 1; ++i) { + if (sequence.get(i) instanceof ConstantNode) { + factors.add(ONE); + } else if (sequence.get(i) instanceof ReferenceNode) { + factors.add(new ReferenceNode(lengthFunctionName((ReferenceNode) sequence.get(i)))); + } + if (i >= 1) { + operators.add(ArithmeticOperator.PLUS); + } + } + return new ArithmeticNode(factors, operators); + } + + /** + * Create the slice expression to extract the tokens from arguments + */ + private ExpressionNode createTokenExtractExpr(int iter, List<ExpressionNode> sequence) { + ExpressionNode expr; + if (iter >= 1) { + ExpressionNode lengthExpr = new EmbracedNode(createLengthExpr(iter - 1, sequence)); + expr = new EmbracedNode(new ArithmeticNode(new ReferenceNode("d1"), ArithmeticOperator.MINUS, lengthExpr)); + } else { + expr = new ReferenceNode("d1"); + } + List<Slice.DimensionValue<Reference>> slices = List.of(new Slice.DimensionValue<>("d0", wrapScalar(expr)) ); + TensorFunction<Reference> argument = new TensorFunctionNode.ExpressionTensorFunction(sequence.get(iter)); + return new TensorFunctionNode(new Slice<>(argument, slices)); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/expressiontransforms/XgboostFeatureConverter.java b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/XgboostFeatureConverter.java new file mode 100644 index 00000000000..b05f9ba9166 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/expressiontransforms/XgboostFeatureConverter.java @@ -0,0 +1,61 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.expressiontransforms; + +import com.yahoo.path.Path; +import com.yahoo.searchlib.rankingexpression.rule.Arguments; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.searchlib.rankingexpression.transform.ExpressionTransformer; +import com.yahoo.vespa.model.ml.ConvertedModel; +import com.yahoo.vespa.model.ml.FeatureArguments; + +import java.io.UncheckedIOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Replaces instances of the xgboost(model-path) + * pseudofeature with the native Vespa ranking expression implementing + * the same computation. + * + * @author grace-lam + * @author bratseth + */ +public class XgboostFeatureConverter extends ExpressionTransformer<RankProfileTransformContext> { + + /** A cache of imported models indexed by model path. This avoids importing the same model multiple times. */ + private final Map<Path, ConvertedModel> convertedXGBoostModels = new HashMap<>(); + + @Override + public ExpressionNode transform(ExpressionNode node, RankProfileTransformContext context) { + if (node instanceof ReferenceNode) + return transformFeature((ReferenceNode) node, context); + else if (node instanceof CompositeNode) + return super.transformChildren((CompositeNode) node, context); + else + return node; + } + + private ExpressionNode transformFeature(ReferenceNode feature, RankProfileTransformContext context) { + if ( ! feature.getName().equals("xgboost")) return feature; + + try { + FeatureArguments arguments = asFeatureArguments(feature.getArguments()); + ConvertedModel convertedModel = + convertedXGBoostModels.computeIfAbsent(arguments.path(), + path -> ConvertedModel.fromSourceOrStore(path, true, context)); + return convertedModel.expression(arguments, context); + } catch (IllegalArgumentException | UncheckedIOException e) { + throw new IllegalArgumentException("Could not use XGBoost model from " + feature, e); + } + } + + private FeatureArguments asFeatureArguments(Arguments arguments) { + if (arguments.size() != 1) + throw new IllegalArgumentException("An xgboost node must take a single argument pointing to " + + "the xgboost model directory under [application]/models"); + return new FeatureArguments(arguments); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/AliasOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/AliasOperation.java new file mode 100644 index 00000000000..b5648dde4fc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/AliasOperation.java @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; + +/** + * @author Einar M R Rosenvinge + */ +public class AliasOperation implements FieldOperation { + + private String aliasedName; + private String alias; + + public AliasOperation(String aliasedName, String alias) { + this.aliasedName = aliasedName; + this.alias = alias; + } + + public String getAliasedName() { + return aliasedName; + } + + public void setAliasedName(String aliasedName) { + this.aliasedName = aliasedName; + } + + public String getAlias() { + return alias; + } + + public void setAlias(String alias) { + this.alias = alias; + } + + public void apply(SDField field) { + if (aliasedName == null) { + aliasedName = field.getName(); + } + field.getAliasToName().put(alias, aliasedName); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/AttributeOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/AttributeOperation.java new file mode 100644 index 00000000000..3983137129d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/AttributeOperation.java @@ -0,0 +1,172 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.SDField; +import com.yahoo.tensor.TensorType; + +import java.util.Locale; +import java.util.Optional; + +/** + * @author Einar M R Rosenvinge + */ +public class AttributeOperation implements FieldOperation, FieldOperationContainer { + + private final String name; + private Boolean huge; + private Boolean fastSearch; + private Boolean fastAccess; + private Boolean mutable; + private Boolean paged; + private Boolean enableBitVectors; + private Boolean enableOnlyBitVector; + //TODO: Remember sorting!! + private boolean doAlias = false; + private String alias; + private String aliasedName; + private Optional<TensorType> tensorType = Optional.empty(); + private Optional<String> distanceMetric = Optional.empty(); + + public AttributeOperation(String name) { + this.name = name; + } + + @Override + public void addOperation(FieldOperation op) { + //TODO: Implement this method: + + } + + @Override + public void applyOperations(SDField field) { + //TODO: Implement this method: + } + + @Override + public String getName() { + return name; + } + + public Boolean getHuge() { + return huge; + } + + public void setHuge(Boolean huge) { + this.huge = huge; + } + + public Boolean getFastSearch() { + return fastSearch; + } + + public void setFastSearch(Boolean fastSearch) { + this.fastSearch = fastSearch; + } + + public Boolean getFastAccess() { + return fastAccess; + } + + public void setFastAccess(Boolean fastAccess) { + this.fastAccess = fastAccess; + } + public void setMutable(Boolean mutable) { + this.mutable = mutable; + } + public void setPaged(Boolean paged) { + this.paged = paged; + } + + public Boolean getEnableBitVectors() { + return enableBitVectors; + } + + public void setEnableBitVectors(Boolean enableBitVectors) { + this.enableBitVectors = enableBitVectors; + } + + public Boolean getEnableOnlyBitVector() { + return enableOnlyBitVector; + } + + public void setEnableOnlyBitVector(Boolean enableOnlyBitVector) { + this.enableOnlyBitVector = enableOnlyBitVector; + } + + public void setDoAlias(boolean doAlias) { + this.doAlias = doAlias; + } + + public String getAlias() { + return alias; + } + + public void setAlias(String alias) { + this.alias = alias; + } + + + public void setAliasedName(String aliasedName) { + this.aliasedName = aliasedName; + } + + public void setTensorType(TensorType tensorType) { + this.tensorType = Optional.of(tensorType); + } + + public void setDistanceMetric(String value) { + this.distanceMetric = Optional.of(value); + } + + public void apply(SDField field) { + Attribute attribute = null; + if (attributeIsSuffixOfStructField(field.getName())) { + attribute = field.getAttributes().get(field.getName()); + } + if (attribute == null) { + attribute = field.getAttributes().get(name); + if (attribute == null) { + attribute = new Attribute(name, field.getDataType()); + field.addAttribute(attribute); + } + } + + if (huge != null) { + attribute.setHuge(huge); + } + if (paged != null) { + attribute.setPaged(paged); + } + if (fastSearch != null) { + attribute.setFastSearch(fastSearch); + } + if (fastAccess != null) { + attribute.setFastAccess(fastAccess); + } + if (mutable != null) { + attribute.setMutable(mutable); + } + if (enableBitVectors != null) { + attribute.setEnableBitVectors(enableBitVectors); + } + if (enableOnlyBitVector != null) { + attribute.setEnableOnlyBitVector(enableOnlyBitVector); + } + if (doAlias) { + field.getAliasToName().put(alias, aliasedName); + } + if (tensorType.isPresent()) { + attribute.setTensorType(tensorType.get()); + } + if (distanceMetric.isPresent()) { + String upper = distanceMetric.get().toUpperCase(Locale.ENGLISH); + attribute.setDistanceMetric(Attribute.DistanceMetric.valueOf(upper)); + } + } + + private boolean attributeIsSuffixOfStructField(String fieldName) { + return ((fieldName.indexOf('.') != -1) && fieldName.endsWith(name)); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/BoldingOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/BoldingOperation.java new file mode 100644 index 00000000000..74e69e047ef --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/BoldingOperation.java @@ -0,0 +1,25 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; + +/** + * @author Einar M R Rosenvinge + */ +public class BoldingOperation implements FieldOperation { + + private final boolean bold; + + public BoldingOperation(boolean bold) { + this.bold = bold; + } + + public void apply(SDField field) { + SummaryField summaryField = field.getSummaryField(field.getName(), true); + summaryField.addSource(field.getName()); + summaryField.addDestination("default"); + summaryField.setTransform(bold ? summaryField.getTransform().bold() : summaryField.getTransform().unbold()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/DictionaryOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/DictionaryOperation.java new file mode 100644 index 00000000000..a9a2ce7cbb1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/DictionaryOperation.java @@ -0,0 +1,41 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.Dictionary; +import com.yahoo.schema.document.SDField; + +/** + * Represents operations controlling setup of dictionary used for queries + * + * @author baldersheim + */ +public class DictionaryOperation implements FieldOperation { + public enum Operation { HASH, BTREE, CASED, UNCASED } + private final Operation operation; + + public DictionaryOperation(Operation type) { + this.operation = type; + } + @Override + public void apply(SDField field) { + Dictionary dictionary = field.getOrSetDictionary(); + switch (operation) { + case HASH: + dictionary.updateType(Dictionary.Type.HASH); + break; + case BTREE: + dictionary.updateType(Dictionary.Type.BTREE); + break; + case CASED: + dictionary.updateMatch(Case.CASED); + break; + case UNCASED: + dictionary.updateMatch(Case.UNCASED); + break; + default: + throw new IllegalArgumentException("Unhandled operation " + operation); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/FieldOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/FieldOperation.java new file mode 100644 index 00000000000..126f594c371 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/FieldOperation.java @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; + +/** + * An operation on a field. + * Operations has a natural order of execution. + * + * @author Einar M R Rosenvinge + */ +public interface FieldOperation extends Comparable<FieldOperation> { + + /** Apply this operation on the given field */ + void apply(SDField field); + + @Override + default int compareTo(FieldOperation other) { + return 0; // no order by default + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/FieldOperationContainer.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/FieldOperationContainer.java new file mode 100644 index 00000000000..d5e52fadffa --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/FieldOperationContainer.java @@ -0,0 +1,19 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; + +/** + * @author Einar M R Rosenvinge + */ +public interface FieldOperationContainer { + + /** Adds an operation */ + void addOperation(FieldOperation op); + + /** Apply all operations. Operations must be sorted in their natural order before applying each operation. */ + void applyOperations(SDField field); + + String getName(); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/IdOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/IdOperation.java new file mode 100644 index 00000000000..5e62742085f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/IdOperation.java @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; + +/** + * @author Einar M R Rosenvinge + */ +public class IdOperation implements FieldOperation { + + private SDDocumentType document; + private int fieldId; + + public SDDocumentType getDocument() { + return document; + } + + public void setDocument(SDDocumentType document) { + this.document = document; + } + + public int getFieldId() { + return fieldId; + } + + public void setFieldId(int fieldId) { + this.fieldId = fieldId; + } + + public void apply(SDField field) { + document.setFieldId(field, fieldId); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/IndexOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/IndexOperation.java new file mode 100644 index 00000000000..ab5ffa25f33 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/IndexOperation.java @@ -0,0 +1,134 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.Index; +import com.yahoo.schema.Index.Type; +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.HnswIndexParams; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; + +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; +import java.util.OptionalDouble; +import java.util.OptionalInt; +import java.util.OptionalLong; + +/** + * @author Einar M R Rosenvinge + */ +public class IndexOperation implements FieldOperation { + + private String indexName; + private Optional<Boolean> prefix = Optional.empty(); + private List<String> aliases = new LinkedList<>(); + private Optional<String> stemming = Optional.empty(); + private Optional<Type> type = Optional.empty(); + + private OptionalInt arity = OptionalInt.empty(); // For predicate data type + private OptionalLong lowerBound = OptionalLong.empty(); + private OptionalLong upperBound = OptionalLong.empty(); + private OptionalDouble densePostingListThreshold = OptionalDouble.empty(); + private Optional<Boolean> enableBm25 = Optional.empty(); + + private Optional<HnswIndexParams.Builder> hnswIndexParams = Optional.empty(); + + public String getIndexName() { + return indexName; + } + + public void setIndexName(String indexName) { + this.indexName = indexName; + } + + public boolean getPrefix() { + return prefix.get(); + } + + public void setPrefix(Boolean prefix) { + this.prefix = Optional.of(prefix); + } + + public void addAlias(String alias) { + aliases.add(alias); + } + + public String getStemming() { + return stemming.get(); + } + + public void setStemming(String stemming) { + this.stemming = Optional.of(stemming); + } + + public void apply(SDField field) { + Index index = field.getIndex(indexName); + + if (index == null) { + index = new Index(indexName); + field.addIndex(index); + } + + applyToIndex(index); + } + + public void applyToIndex(Index index) { + if (prefix.isPresent()) { + index.setPrefix(prefix.get()); + } + for (String alias : aliases) { + index.addAlias(alias); + } + if (stemming.isPresent()) { + index.setStemming(Stemming.get(stemming.get())); + } + if (type.isPresent()) { + index.setType(type.get()); + } + if (arity.isPresent() || lowerBound.isPresent() || + upperBound.isPresent() || densePostingListThreshold.isPresent()) { + index.setBooleanIndexDefiniton( + new BooleanIndexDefinition(arity, lowerBound, upperBound, densePostingListThreshold)); + } + if (enableBm25.isPresent()) { + index.setInterleavedFeatures(enableBm25.get()); + } + if (hnswIndexParams.isPresent()) { + index.setHnswIndexParams(hnswIndexParams.get().build()); + } + } + + public Type getType() { + return type.get(); + } + + public void setType(Type type) { + this.type = Optional.of(type); + } + + public void setArity(int arity) { + this.arity = OptionalInt.of(arity); + } + + public void setLowerBound(long value) { + this.lowerBound = OptionalLong.of(value); + } + + public void setUpperBound(long value) { + this.upperBound = OptionalLong.of(value); + } + + public void setDensePostingListThreshold(double densePostingListThreshold) { + this.densePostingListThreshold = OptionalDouble.of(densePostingListThreshold); + } + + public void setEnableBm25(boolean value) { + enableBm25 = Optional.of(value); + } + + public void setHnswIndexParams(HnswIndexParams.Builder params) { + this.hnswIndexParams = Optional.of(params); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/IndexingOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/IndexingOperation.java new file mode 100644 index 00000000000..bb79a45831e --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/IndexingOperation.java @@ -0,0 +1,61 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.language.Linguistics; +import com.yahoo.language.process.Embedder; +import com.yahoo.language.simple.SimpleLinguistics; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.parser.ParseException; +import com.yahoo.schema.parser.SimpleCharStream; +import com.yahoo.vespa.indexinglanguage.ScriptParserContext; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; +import com.yahoo.yolean.Exceptions; + +import java.util.Map; + +/** + * @author Einar M R Rosenvinge + */ +public class IndexingOperation implements FieldOperation { + + private final ScriptExpression script; + + public IndexingOperation(ScriptExpression script) { + this.script = script; + } + + public ScriptExpression getScript() { return script; } + + public void apply(SDField field) { + field.setIndexingScript(script); + } + + /** Creates an indexing operation which will use the simple linguistics implementation suitable for testing */ + public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine) throws ParseException { + return fromStream(input, multiLine, new SimpleLinguistics(), Embedder.throwsOnUse.asMap()); + } + + public static IndexingOperation fromStream(SimpleCharStream input, boolean multiLine, + Linguistics linguistics, Map<String, Embedder> embedders) + throws ParseException { + ScriptParserContext config = new ScriptParserContext(linguistics, embedders); + config.setAnnotatorConfig(new AnnotatorConfig()); + config.setInputStream(input); + ScriptExpression exp; + try { + if (multiLine) { + exp = ScriptExpression.newInstance(config); + } else { + exp = new ScriptExpression(StatementExpression.newInstance(config)); + } + } catch (com.yahoo.vespa.indexinglanguage.parser.ParseException e) { + ParseException t = new ParseException("Could not parse indexing statement: " + Exceptions.toMessageString(e)); + t.initCause(e); + throw t; + } + return new IndexingOperation(exp); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/MatchOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/MatchOperation.java new file mode 100644 index 00000000000..a568b5b0f66 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/MatchOperation.java @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.MatchAlgorithm; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; + +/** + * @author Einar M R Rosenvinge + */ +public class MatchOperation implements FieldOperation { + + private MatchType matchingType; + private Case casing; + private Integer gramSize; + private Integer maxLength; + private MatchAlgorithm matchingAlgorithm; + private String exactMatchTerminator; + + public void setMatchingType(MatchType matchingType) { + this.matchingType = matchingType; + } + + public void setGramSize(Integer gramSize) { + this.gramSize = gramSize; + } + public void setMaxLength(Integer maxLength) { + this.maxLength = maxLength; + } + + public void setMatchingAlgorithm(MatchAlgorithm matchingAlgorithm) { + this.matchingAlgorithm = matchingAlgorithm; + } + + public void setExactMatchTerminator(String exactMatchTerminator) { + this.exactMatchTerminator = exactMatchTerminator; + } + + public void setCase(Case casing) { + this.casing = casing; + } + + public void apply(SDField field) { + if (matchingType != null) { + field.setMatchingType(matchingType); + } + if (casing != null) { + field.setMatchingCase(casing); + } + if (gramSize != null) { + field.getMatching().setGramSize(gramSize); + } + if (maxLength != null) { + field.getMatching().maxLength(maxLength); + } + if (matchingAlgorithm != null) { + field.setMatchingAlgorithm(matchingAlgorithm); + } + if (exactMatchTerminator != null) { + field.getMatching().setExactMatchTerminator(exactMatchTerminator); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/NormalizingOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/NormalizingOperation.java new file mode 100644 index 00000000000..561c5b87899 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/NormalizingOperation.java @@ -0,0 +1,34 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.NormalizeLevel; +import com.yahoo.schema.document.SDField; + +/** + * @author Einar M R Rosenvinge + */ +public class NormalizingOperation implements FieldOperation { + + private final NormalizeLevel.Level level; + + public NormalizingOperation(String setting) { + if ("none".equals(setting)) { + this.level = NormalizeLevel.Level.NONE; + } else if ("codepoint".equals(setting)) { + this.level = NormalizeLevel.Level.CODEPOINT; + } else if ("lowercase".equals(setting)) { + this.level = NormalizeLevel.Level.LOWERCASE; + } else if ("accent".equals(setting)) { + this.level = NormalizeLevel.Level.ACCENT; + } else if ("all".equals(setting)) { + this.level = NormalizeLevel.Level.ACCENT; + } else { + throw new IllegalArgumentException("invalid normalizing setting: " + setting); + } + } + + public void apply(SDField field) { + field.setNormalizing(new NormalizeLevel(level, true)); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/QueryCommandOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/QueryCommandOperation.java new file mode 100644 index 00000000000..d0e9feb41e4 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/QueryCommandOperation.java @@ -0,0 +1,25 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; + +import java.util.List; + +/** + * @author Einar M R Rosenvinge + */ +public class QueryCommandOperation implements FieldOperation { + + private final List<String> queryCommands = new java.util.ArrayList<>(0); + + public void addQueryCommand(String name) { + queryCommands.add(name); + } + + public void apply(SDField field) { + for (String command : queryCommands) { + field.addQueryCommand(command); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/RankOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/RankOperation.java new file mode 100644 index 00000000000..bbc6208ba1b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/RankOperation.java @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; + +/** + * @author Einar M R Rosenvinge + */ +public class RankOperation implements FieldOperation { + + private Boolean literal = null; + private Boolean filter = null; + private Boolean normal = null; + + public Boolean getLiteral() { return literal; } + public void setLiteral(Boolean literal) { this.literal = literal; } + + public Boolean getFilter() { return filter; } + public void setFilter(Boolean filter) { this.filter = filter; } + + public Boolean getNormal() { return normal; } + public void setNormal(Boolean n) { this.normal = n; } + + public void apply(SDField field) { + if (literal != null) { + field.getRanking().setLiteral(literal); + } + if (filter != null) { + field.getRanking().setFilter(filter); + } + if (normal != null) { + field.getRanking().setNormal(normal); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/RankTypeOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/RankTypeOperation.java new file mode 100644 index 00000000000..4a43a907549 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/RankTypeOperation.java @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Index; + +/** + * @author Einar M R Rosenvinge + */ +public class RankTypeOperation implements FieldOperation { + + private String indexName; + private RankType type; + + public String getIndexName() { + return indexName; + } + public void setIndexName(String indexName) { + this.indexName = indexName; + } + + public RankType getType() { + return type; + } + public void setType(RankType type) { + this.type = type; + } + + public void apply(SDField field) { + if (indexName == null) { + field.setRankType(type); // Set default if the index is not specified. + } else { + Index index = field.getIndex(indexName); + if (index == null) { + index = new Index(indexName); + field.addIndex(index); + } + index.setRankType(type); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/SortingOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SortingOperation.java new file mode 100644 index 00000000000..2e981a893ce --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SortingOperation.java @@ -0,0 +1,93 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Sorting; + +/** + * @author Einar M R Rosenvinge + */ +public class SortingOperation implements FieldOperation { + + private final String attributeName; + private Boolean ascending; + private Boolean descending; + private Sorting.Function function; + private Sorting.Strength strength; + private String locale; + + public SortingOperation(String attributeName) { + this.attributeName = attributeName; + } + + public String getAttributeName() { + return attributeName; + } + + public Boolean getAscending() { + return ascending; + } + + public void setAscending() { + this.ascending = true; + } + + public Boolean getDescending() { + return descending; + } + + public void setDescending() { + this.descending = true; + } + + public Sorting.Function getFunction() { + return function; + } + + public void setFunction(Sorting.Function function) { + this.function = function; + } + + public Sorting.Strength getStrength() { + return strength; + } + + public void setStrength(Sorting.Strength strength) { + this.strength = strength; + } + + public String getLocale() { + return locale; + } + + public void setLocale(String locale) { + this.locale = locale; + } + + public void apply(SDField field) { + Attribute attribute = field.getAttributes().get(attributeName); + if (attribute == null) { + attribute = new Attribute(attributeName, field.getDataType()); + field.addAttribute(attribute); + } + Sorting sorting = attribute.getSorting(); + + if (ascending != null) { + sorting.setAscending(); + } + if (descending != null) { + sorting.setDescending(); + } + if (function != null) { + sorting.setFunction(function); + } + if (strength != null) { + sorting.setStrength(strength); + } + if (locale != null) { + sorting.setLocale(locale); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/StemmingOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/StemmingOperation.java new file mode 100644 index 00000000000..a4bb00b0d07 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/StemmingOperation.java @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; + +/** + * @author Einar M R Rosenvinge + */ +public class StemmingOperation implements FieldOperation { + + private String setting; + + public String getSetting() { + return setting; + } + + public void setSetting(String setting) { + this.setting = setting; + } + + public void apply(SDField field) { + field.setStemming(Stemming.get(setting)); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/StructFieldOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/StructFieldOperation.java new file mode 100644 index 00000000000..ac80f5023fc --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/StructFieldOperation.java @@ -0,0 +1,56 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; + +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.ListIterator; + +/** + * @author Einar M R Rosenvinge + */ +public class StructFieldOperation implements FieldOperation, FieldOperationContainer { + + private final String structFieldName; + private final List<FieldOperation> pendingOperations = new LinkedList<>(); + + public StructFieldOperation(String structFieldName) { + this.structFieldName = structFieldName; + } + + public void apply(SDField field) { + SDField structField = field.getStructField(structFieldName); + if (structField == null ) { + throw new IllegalArgumentException("Struct field '" + structFieldName + "' has not been defined in struct " + + "for field '" + field.getName() + "'."); + } + + applyOperations(structField); + } + + @Override + public void addOperation(FieldOperation op) { + pendingOperations.add(op); + } + + @Override + public void applyOperations(SDField field) { + if (pendingOperations.isEmpty()) return; + + Collections.sort(pendingOperations); + ListIterator<FieldOperation> ops = pendingOperations.listIterator(); + while (ops.hasNext()) { + FieldOperation op = ops.next(); + ops.remove(); + op.apply(field); + } + } + + @Override + public String getName() { + return structFieldName; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryInFieldLongOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryInFieldLongOperation.java new file mode 100644 index 00000000000..4576b7a34fe --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryInFieldLongOperation.java @@ -0,0 +1,70 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.document.DataType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; + +import java.util.Iterator; +import java.util.Set; + +/** + * @author Einar M R Rosenvinge + */ +public class SummaryInFieldLongOperation extends SummaryInFieldOperation { + + private DataType type; + private Boolean bold; + private Set<String> destinations = new java.util.LinkedHashSet<>(); + + public SummaryInFieldLongOperation(String name) { + super(name); + } + + public SummaryInFieldLongOperation() { + super(null); + } + + public void setType(DataType type) { + this.type = type; + } + + public void setBold(Boolean bold) { + this.bold = bold; + } + + public void addDestination(String destination) { + destinations.add(destination); + } + + public Iterator<String> destinationIterator() { + return destinations.iterator(); + } + + public void apply(SDField field) { + if (type == null) { + type = field.getDataType(); + } + SummaryField summary = new SummaryField(name, type); + applyToSummary(summary); + field.addSummaryField(summary); + } + + public void applyToSummary(SummaryField summary) { + if (transform != null) { + summary.setTransform(transform); + } + + if (bold != null) { + summary.setTransform(bold ? summary.getTransform().bold() : summary.getTransform().unbold()); + } + + for (SummaryField.Source source : sources) { + summary.addSource(source); + } + + for (String destination : destinations) { + summary.addDestination(destination); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryInFieldOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryInFieldOperation.java new file mode 100644 index 00000000000..dd06d920aac --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryInFieldOperation.java @@ -0,0 +1,46 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +import java.util.Set; + +/** + * @author Einar M R Rosenvinge + */ +public abstract class SummaryInFieldOperation implements FieldOperation { + + protected String name; + protected SummaryTransform transform; + protected Set<SummaryField.Source> sources = new java.util.LinkedHashSet<>(); + + public SummaryInFieldOperation(String name) { + this.name = name; + } + + public void setName(String name) { + this.name = name; + } + + public String getName() { + return name; + } + + public void setTransform(SummaryTransform transform) { + this.transform = transform; + } + + public SummaryTransform getTransform() { + return transform; + } + + public void addSource(String name) { + sources.add(new SummaryField.Source(name)); + } + + public void addSource(SummaryField.Source source) { + sources.add(source); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryInFieldShortOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryInFieldShortOperation.java new file mode 100644 index 00000000000..ccc22719f25 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryInFieldShortOperation.java @@ -0,0 +1,32 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; + +/** + * @author Einar M R Rosenvinge + */ +public class SummaryInFieldShortOperation extends SummaryInFieldOperation { + + public SummaryInFieldShortOperation(String name) { + super(name); + } + + public void apply(SDField field) { + SummaryField ret = field.getSummaryField(name); + if (ret == null) { + ret = new SummaryField(name, field.getDataType()); + ret.addSource(field.getName()); + ret.addDestination("default"); + } + ret.setImplicit(false); + + ret.setTransform(transform); + for (SummaryField.Source source : sources) { + ret.addSource(source); + } + field.addSummaryField(ret); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryToOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryToOperation.java new file mode 100644 index 00000000000..2d9cf3acf4e --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/SummaryToOperation.java @@ -0,0 +1,41 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; + +import java.util.Set; + +/** + * @author Einar M R Rosenvinge + */ +public class SummaryToOperation implements FieldOperation { + + private Set<String> destinations = new java.util.LinkedHashSet<>(); + private String name; + + public void setName(String name) { + this.name = name; + } + + public void addDestination(String destination) { + destinations.add(destination); + } + + public void apply(SDField field) { + SummaryField summary; + summary = field.getSummaryField(name); + if (summary == null) { + summary = new SummaryField(field); + summary.addSource(field.getName()); + summary.addDestination("default"); + field.addSummaryField(summary); + } + summary.setImplicit(false); + + for (String destination : destinations) { + summary.addDestination(destination); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/WeightOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/WeightOperation.java new file mode 100644 index 00000000000..57c28d9bdb5 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/WeightOperation.java @@ -0,0 +1,25 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.SDField; + +/** + * @author Einar M R Rosenvinge + */ +public class WeightOperation implements FieldOperation { + + private int weight; + + public int getWeight() { + return weight; + } + + public void setWeight(int weight) { + this.weight = weight; + } + + public void apply(SDField field) { + field.setWeight(weight); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/fieldoperation/WeightedSetOperation.java b/config-model/src/main/java/com/yahoo/schema/fieldoperation/WeightedSetOperation.java new file mode 100644 index 00000000000..8fb0cc9fcdb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/fieldoperation/WeightedSetOperation.java @@ -0,0 +1,70 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.fieldoperation; + +import com.yahoo.schema.document.Attribute; +import com.yahoo.document.DataType; +import com.yahoo.schema.document.SDField; +import com.yahoo.document.WeightedSetDataType; + +/** + * @author Einar M R Rosenvinge + */ +public class WeightedSetOperation implements FieldOperation { + + private Boolean createIfNonExistent; + private Boolean removeIfZero; + + public Boolean getCreateIfNonExistent() { + return createIfNonExistent; + } + + public void setCreateIfNonExistent(Boolean createIfNonExistent) { + this.createIfNonExistent = createIfNonExistent; + } + + public Boolean getRemoveIfZero() { + return removeIfZero; + } + + public void setRemoveIfZero(Boolean removeIfZero) { + this.removeIfZero = removeIfZero; + } + + public void apply(SDField field) { + WeightedSetDataType ctype = (WeightedSetDataType) field.getDataType(); + + if (createIfNonExistent != null) { + field.setDataType(DataType.getWeightedSet(ctype.getNestedType(), createIfNonExistent, + ctype.removeIfZero())); + } + + ctype = (WeightedSetDataType) field.getDataType(); + if (removeIfZero != null) { + field.setDataType(DataType.getWeightedSet(ctype.getNestedType(), + ctype.createIfNonExistent(), removeIfZero)); + } + + ctype = (WeightedSetDataType) field.getDataType(); + for (Object o : field.getAttributes().values()) { + Attribute attribute = (Attribute) o; + attribute.setRemoveIfZero(ctype.removeIfZero()); + attribute.setCreateIfNonExistent(ctype.createIfNonExistent()); + } + } + + @Override + public int compareTo(FieldOperation other) { + // this operation should be executed first because it modifies the type of weighted sets, and other + // operation depends on the type of the weighted set + return -1; + } + + @Override + public String toString() { + return "WeightedSetOperation{" + + "createIfNonExistent=" + createIfNonExistent + + ", removeIfZero=" + removeIfZero + + '}'; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java new file mode 100644 index 00000000000..fa656b72530 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java @@ -0,0 +1,331 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.document.DataType; +import com.yahoo.document.DataTypeName; +import com.yahoo.schema.parser.ConvertParsedTypes.TypeResolver; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.Dictionary; +import com.yahoo.schema.document.NormalizeLevel; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Sorting; +import com.yahoo.schema.document.annotation.SDAnnotationType; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +import java.util.Locale; + +/** + * Helper for converting ParsedField etc to SDField with settings + * + * @author arnej27959 + **/ +public class ConvertParsedFields { + + private final TypeResolver context; + + ConvertParsedFields(TypeResolver context) { + this.context = context; + } + + static void convertMatchSettings(SDField field, ParsedMatchSettings parsed) { + parsed.getMatchType().ifPresent(matchingType -> field.setMatchingType(matchingType)); + parsed.getMatchCase().ifPresent(casing -> field.setMatchingCase(casing)); + parsed.getGramSize().ifPresent(gramSize -> field.getMatching().setGramSize(gramSize)); + parsed.getMaxLength().ifPresent(maxLength -> field.getMatching().maxLength(maxLength)); + parsed.getMatchAlgorithm().ifPresent + (matchingAlgorithm -> field.setMatchingAlgorithm(matchingAlgorithm)); + parsed.getExactTerminator().ifPresent + (exactMatchTerminator -> field.getMatching().setExactMatchTerminator(exactMatchTerminator)); + } + + void convertSorting(SDField field, ParsedSorting parsed, String name) { + Attribute attribute = field.getAttributes().get(name); + if (attribute == null) { + attribute = new Attribute(name, field.getDataType()); + field.addAttribute(attribute); + } + Sorting sorting = attribute.getSorting(); + if (parsed.getAscending()) { + sorting.setAscending(); + } else { + sorting.setDescending(); + } + parsed.getFunction().ifPresent(function -> sorting.setFunction(function)); + parsed.getStrength().ifPresent(strength -> sorting.setStrength(strength)); + parsed.getLocale().ifPresent(locale -> sorting.setLocale(locale)); + } + + void convertAttribute(SDField field, ParsedAttribute parsed) { + String name = parsed.name(); + String fieldName = field.getName(); + Attribute attribute = null; + if (fieldName.endsWith("." + name)) { + attribute = field.getAttributes().get(field.getName()); + } + if (attribute == null) { + attribute = field.getAttributes().get(name); + if (attribute == null) { + attribute = new Attribute(name, field.getDataType()); + field.addAttribute(attribute); + } + } + attribute.setHuge(parsed.getHuge()); + attribute.setPaged(parsed.getPaged()); + attribute.setFastSearch(parsed.getFastSearch()); + if (parsed.getFastRank()) { + attribute.setFastRank(parsed.getFastRank()); + } + attribute.setFastAccess(parsed.getFastAccess()); + attribute.setMutable(parsed.getMutable()); + attribute.setEnableBitVectors(parsed.getEnableBitVectors()); + attribute.setEnableOnlyBitVector(parsed.getEnableOnlyBitVector()); + + // attribute.setTensorType(?) + + for (String alias : parsed.getAliases()) { + field.getAliasToName().put(alias, parsed.lookupAliasedFrom(alias)); + } + var distanceMetric = parsed.getDistanceMetric(); + if (distanceMetric.isPresent()) { + String upper = distanceMetric.get().toUpperCase(Locale.ENGLISH); + attribute.setDistanceMetric(Attribute.DistanceMetric.valueOf(upper)); + } + var sorting = parsed.getSorting(); + if (sorting.isPresent()) { + convertSorting(field, sorting.get(), name); + } + } + + private void convertRankType(SDField field, String indexName, String rankType) { + RankType type = RankType.fromString(rankType); + if (indexName == null || indexName.equals("")) { + field.setRankType(type); // Set default if the index is not specified. + } else { + Index index = field.getIndex(indexName); + if (index == null) { + index = new Index(indexName); + field.addIndex(index); + } + index.setRankType(type); + } + } + + private void convertNormalizing(SDField field, String setting) { + NormalizeLevel.Level level; + if ("none".equals(setting)) { + level = NormalizeLevel.Level.NONE; + } else if ("codepoint".equals(setting)) { + level = NormalizeLevel.Level.CODEPOINT; + } else if ("lowercase".equals(setting)) { + level = NormalizeLevel.Level.LOWERCASE; + } else if ("accent".equals(setting)) { + level = NormalizeLevel.Level.ACCENT; + } else if ("all".equals(setting)) { + level = NormalizeLevel.Level.ACCENT; + } else { + throw new IllegalArgumentException("invalid normalizing setting: " + setting); + } + field.setNormalizing(new NormalizeLevel(level, true)); + } + + // from grammar, things that can be inside struct-field block + private void convertCommonFieldSettings(SDField field, ParsedField parsed) { + convertMatchSettings(field, parsed.matchSettings()); + var indexing = parsed.getIndexing(); + if (indexing.isPresent()) { + field.setIndexingScript(indexing.get().script()); + } + parsed.getWeight().ifPresent(value -> field.setWeight(value)); + parsed.getStemming().ifPresent(value -> field.setStemming(value)); + parsed.getNormalizing().ifPresent(value -> convertNormalizing(field, value)); + for (var attribute : parsed.getAttributes()) { + convertAttribute(field, attribute); + } + for (var summaryField : parsed.getSummaryFields()) { + var dataType = field.getDataType(); + var otherType = summaryField.getType(); + if (otherType != null) { + dataType = context.resolveType(otherType); + } + convertSummaryField(field, summaryField, dataType); + } + for (String command : parsed.getQueryCommands()) { + field.addQueryCommand(command); + } + for (var structField : parsed.getStructFields()) { + convertStructField(field, structField); + } + if (parsed.hasLiteral()) { + field.getRanking().setLiteral(true); + } + if (parsed.hasFilter()) { + field.getRanking().setFilter(true); + } + if (parsed.hasNormal()) { + field.getRanking().setNormal(true); + } + } + + private void convertStructField(SDField field, ParsedField parsed) { + SDField structField = field.getStructField(parsed.name()); + if (structField == null ) { + throw new IllegalArgumentException("Struct field '" + parsed.name() + "' has not been defined in struct " + + "for field '" + field.getName() + "'."); + } + convertCommonFieldSettings(structField, parsed); + } + + private void convertExtraFieldSettings(SDField field, ParsedField parsed) { + String name = parsed.name(); + for (var dictOp : parsed.getDictionaryOptions()) { + var dictionary = field.getOrSetDictionary(); + switch (dictOp) { + case HASH: dictionary.updateType(Dictionary.Type.HASH); break; + case BTREE: dictionary.updateType(Dictionary.Type.BTREE); break; + case CASED: dictionary.updateMatch(Case.CASED); break; + case UNCASED: dictionary.updateMatch(Case.UNCASED); break; + } + } + for (var index : parsed.getIndexes()) { + convertIndex(field, index); + } + for (var alias : parsed.getAliases()) { + field.getAliasToName().put(alias, parsed.lookupAliasedFrom(alias)); + } + parsed.getRankTypes().forEach((indexName, rankType) -> convertRankType(field, indexName, rankType)); + parsed.getSorting().ifPresent(sortInfo -> convertSorting(field, sortInfo, name)); + if (parsed.hasBolding()) { + // TODO must it be so ugly: + SummaryField summaryField = field.getSummaryField(name, true); + summaryField.addSource(name); + summaryField.addDestination("default"); + summaryField.setTransform(summaryField.getTransform().bold()); + } + } + + static void convertSummaryFieldSettings(SummaryField summary, ParsedSummaryField parsed) { + var transform = SummaryTransform.NONE; + if (parsed.getMatchedElementsOnly()) { + transform = SummaryTransform.MATCHED_ELEMENTS_FILTER; + } else if (parsed.getDynamic()) { + transform = SummaryTransform.DYNAMICTEASER; + } + if (parsed.getBolded()) { + transform = transform.bold(); + } + summary.setTransform(transform); + for (String source : parsed.getSources()) { + summary.addSource(source); + } + for (String destination : parsed.getDestinations()) { + summary.addDestination(destination); + } + summary.setImplicit(false); + } + + private void convertSummaryField(SDField field, ParsedSummaryField parsed, DataType type) { + var summary = new SummaryField(parsed.name(), type); + convertSummaryFieldSettings(summary, parsed); + summary.addDestination("default"); + if (parsed.getSources().isEmpty()) { + summary.addSource(field.getName()); + } + field.addSummaryField(summary); + } + + private void convertIndex(SDField field, ParsedIndex parsed) { + String indexName = parsed.name(); + Index index = field.getIndex(indexName); + if (index == null) { + index = new Index(indexName); + field.addIndex(index); + } + convertIndexSettings(index, parsed); + } + + private void convertIndexSettings(Index index, ParsedIndex parsed) { + parsed.getPrefix().ifPresent(prefix -> index.setPrefix(prefix)); + for (String alias : parsed.getAliases()) { + index.addAlias(alias); + } + parsed.getStemming().ifPresent(stemming -> index.setStemming(stemming)); + var arity = parsed.getArity(); + var lowerBound = parsed.getLowerBound(); + var upperBound = parsed.getUpperBound(); + var densePostingListThreshold = parsed.getDensePostingListThreshold(); + if (arity.isPresent() || + lowerBound.isPresent() || + upperBound.isPresent() || + densePostingListThreshold.isPresent()) + { + var bid = new BooleanIndexDefinition(arity, lowerBound, upperBound, densePostingListThreshold); + index.setBooleanIndexDefiniton(bid); + } + parsed.getEnableBm25().ifPresent(enableBm25 -> index.setInterleavedFeatures(enableBm25)); + parsed.getHnswIndexParams().ifPresent + (hnswIndexParams -> index.setHnswIndexParams(hnswIndexParams)); + } + + SDField convertDocumentField(Schema schema, SDDocumentType document, ParsedField parsed) { + String name = parsed.name(); + DataType dataType = context.resolveType(parsed.getType()); + var field = new SDField(document, name, dataType); + convertCommonFieldSettings(field, parsed); + convertExtraFieldSettings(field, parsed); + document.addField(field); + return field; + } + + void convertExtraField(Schema schema, ParsedField parsed) { + String name = parsed.name(); + DataType dataType = context.resolveType(parsed.getType()); + var field = new SDField(schema.getDocument(), name, dataType); + convertCommonFieldSettings(field, parsed); + convertExtraFieldSettings(field, parsed); + schema.addExtraField(field); + } + + void convertExtraIndex(Schema schema, ParsedIndex parsed) { + Index index = new Index(parsed.name()); + convertIndexSettings(index, parsed); + schema.addIndex(index); + } + + SDDocumentType convertStructDeclaration(Schema schema, SDDocumentType document, ParsedStruct parsed) { + // TODO - can we cleanup this mess + var structProxy = new SDDocumentType(parsed.name(), schema); + for (var parsedField : parsed.getFields()) { + var fieldType = context.resolveType(parsedField.getType()); + var field = new SDField(document, parsedField.name(), fieldType); + convertCommonFieldSettings(field, parsedField); + structProxy.addField(field); + if (parsedField.hasIdOverride()) { + structProxy.setFieldId(field, parsedField.idOverride()); + } + } + for (String inherit : parsed.getInherited()) { + structProxy.inherit(new DataTypeName(inherit)); + } + structProxy.setStruct(context.resolveStruct(parsed)); + return structProxy; + } + + void convertAnnotation(Schema schema, SDDocumentType document, ParsedAnnotation parsed) { + SDAnnotationType annType = context.resolveAnnotation(parsed.name()); + var withStruct = parsed.getStruct(); + if (withStruct.isPresent()) { + ParsedStruct parsedStruct = withStruct.get(); + SDDocumentType structProxy = convertStructDeclaration(schema, document, parsedStruct); + structProxy.setStruct(context.resolveStruct(parsedStruct)); + annType.setSdDocType(structProxy); + } + document.addAnnotation(annType); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java new file mode 100644 index 00000000000..bd628779b24 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java @@ -0,0 +1,124 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.RankType; + +import java.util.List; + +/** + * Helper for converting ParsedRankProfile etc to RankProfile with settings + * + * @author arnej27959 + */ +public class ConvertParsedRanking { + + private final RankProfileRegistry rankProfileRegistry; + + // for unit test + ConvertParsedRanking() { + this(new RankProfileRegistry()); + } + + public ConvertParsedRanking(RankProfileRegistry rankProfileRegistry) { + this.rankProfileRegistry = rankProfileRegistry; + } + + private RankProfile makeRankProfile(Schema schema, String name) { + if (name.equals("default")) { + return rankProfileRegistry.get(schema, "default"); + } + return new RankProfile(name, schema, rankProfileRegistry); + } + + void convertRankProfile(Schema schema, ParsedRankProfile parsed) { + RankProfile profile = makeRankProfile(schema, parsed.name()); + for (String name : parsed.getInherited()) + profile.inherit(name); + + parsed.isStrict().ifPresent(value -> profile.setStrict(value)); + + for (var constant : parsed.getConstants().values()) + profile.add(constant); + + for (var onnxModel : parsed.getOnnxModels()) + profile.add(onnxModel); + + for (var input : parsed.getInputs().entrySet()) + profile.addInput(input.getKey(), input.getValue()); + + for (var func : parsed.getFunctions()) { + String name = func.name(); + List<String> parameters = func.getParameters(); + String expression = func.getExpression(); + boolean inline = func.getInline(); + profile.addFunction(name, parameters, expression, inline); + } + + parsed.getRankScoreDropLimit().ifPresent + (value -> profile.setRankScoreDropLimit(value)); + parsed.getTermwiseLimit().ifPresent + (value -> profile.setTermwiseLimit(value)); + parsed.getPostFilterThreshold().ifPresent + (value -> profile.setPostFilterThreshold(value)); + parsed.getApproximateThreshold().ifPresent + (value -> profile.setApproximateThreshold(value)); + parsed.getKeepRankCount().ifPresent + (value -> profile.setKeepRankCount(value)); + parsed.getMinHitsPerThread().ifPresent + (value -> profile.setMinHitsPerThread(value)); + parsed.getNumSearchPartitions().ifPresent + (value -> profile.setNumSearchPartitions(value)); + parsed.getNumThreadsPerSearch().ifPresent + (value -> profile.setNumThreadsPerSearch(value)); + parsed.getReRankCount().ifPresent + (value -> profile.setRerankCount(value)); + + parsed.getMatchPhaseSettings().ifPresent + (value -> profile.setMatchPhaseSettings(value)); + + parsed.getFirstPhaseExpression().ifPresent + (value -> profile.setFirstPhaseRanking(value)); + parsed.getSecondPhaseExpression().ifPresent + (value -> profile.setSecondPhaseRanking(value)); + + for (var value : parsed.getMatchFeatures()) { + profile.addMatchFeatures(value); + } + for (var value : parsed.getRankFeatures()) { + profile.addRankFeatures(value); + } + for (var value : parsed.getSummaryFeatures()) { + profile.addSummaryFeatures(value); + } + + parsed.getInheritedMatchFeatures().ifPresent + (value -> profile.setInheritedMatchFeatures(value)); + parsed.getInheritedSummaryFeatures().ifPresent + (value -> profile.setInheritedSummaryFeatures(value)); + if (parsed.getIgnoreDefaultRankFeatures()) { + profile.setIgnoreDefaultRankFeatures(true); + } + + for (var mutateOp : parsed.getMutateOperations()) { + profile.addMutateOperation(mutateOp); + } + parsed.getFieldsWithRankFilter().forEach + ((fieldName, isFilter) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.PREFERBITVECTOR, isFilter)); + + parsed.getFieldsWithRankWeight().forEach + ((fieldName, weight) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.WEIGHT, weight)); + + parsed.getFieldsWithRankType().forEach + ((fieldName, rankType) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.RANKTYPE, RankType.fromString(rankType))); + + parsed.getRankProperties().forEach + ((key, values) -> {for (String value : values) profile.addRankProperty(key, value);}); + + // always? + rankProfileRegistry.add(profile); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedSchemas.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedSchemas.java new file mode 100644 index 00000000000..f3289621ce1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedSchemas.java @@ -0,0 +1,221 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.config.model.application.provider.MockFileRegistry; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.document.DataType; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.schema.DefaultRankProfile; +import com.yahoo.schema.DocumentOnlySchema; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.UnrankedRankProfile; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.TemporaryImportedField; +import com.yahoo.schema.parser.ConvertParsedTypes.TypeResolver; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * Class converting a collection of schemas from the intermediate format. + * + * @author arnej27959 + **/ +public class ConvertParsedSchemas { + + private final List<ParsedSchema> orderedInput; + private final DocumentTypeManager docMan; + private final ApplicationPackage applicationPackage; + private final FileRegistry fileRegistry; + private final DeployLogger deployLogger; + private final ModelContext.Properties properties; + private final RankProfileRegistry rankProfileRegistry; + private final boolean documentsOnly; + private final ConvertParsedTypes typeConverter; + + // for unit test + ConvertParsedSchemas(List<ParsedSchema> orderedInput, + DocumentTypeManager documentTypeManager) + { + this(orderedInput, documentTypeManager, + MockApplicationPackage.createEmpty(), + new MockFileRegistry(), + new BaseDeployLogger(), + new TestProperties(), + new RankProfileRegistry(), + true); + } + + public ConvertParsedSchemas(List<ParsedSchema> orderedInput, + DocumentTypeManager documentTypeManager, + ApplicationPackage applicationPackage, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry, + boolean documentsOnly) + { + this.orderedInput = orderedInput; + this.docMan = documentTypeManager; + this.applicationPackage = applicationPackage; + this.fileRegistry = fileRegistry; + this.deployLogger = deployLogger; + this.properties = properties; + this.rankProfileRegistry = rankProfileRegistry; + this.documentsOnly = documentsOnly; + this.typeConverter = new ConvertParsedTypes(orderedInput, docMan); + } + + private final Map<String, SDDocumentType> convertedDocuments = new LinkedHashMap<>(); + + public List<Schema> convertToSchemas() { + typeConverter.convert(false); + var resultList = new ArrayList<Schema>(); + for (var parsed : orderedInput) { + Optional<String> inherited; + var inheritList = parsed.getInherited(); + if (inheritList.size() == 0) { + inherited = Optional.empty(); + } else if (inheritList.size() == 1) { + inherited = Optional.of(inheritList.get(0)); + } else { + throw new IllegalArgumentException("schema " + parsed.name() + "cannot inherit more than once"); + } + Schema schema = parsed.getDocumentWithoutSchema() + ? new DocumentOnlySchema(applicationPackage, fileRegistry, deployLogger, properties) + : new Schema(parsed.name(), applicationPackage, inherited, fileRegistry, deployLogger, properties); + convertSchema(schema, parsed); + resultList.add(schema); + } + return resultList; + } + + private void convertDocument(Schema schema, ParsedDocument parsed, + ConvertParsedFields fieldConverter) + { + SDDocumentType document = new SDDocumentType(parsed.name()); + for (var struct : parsed.getStructs()) { + var structProxy = fieldConverter.convertStructDeclaration(schema, document, struct); + document.addType(structProxy); + } + for (String inherit : parsed.getInherited()) { + var parent = convertedDocuments.get(inherit); + assert(parent != null); + document.inherit(parent); + } + for (var annotation : parsed.getAnnotations()) { + fieldConverter.convertAnnotation(schema, document, annotation); + } + for (var field : parsed.getFields()) { + var sdf = fieldConverter.convertDocumentField(schema, document, field); + if (field.hasIdOverride()) { + document.setFieldId(sdf, field.idOverride()); + } + } + convertedDocuments.put(parsed.name(), document); + schema.addDocument(document); + } + + private void convertDocumentSummary(Schema schema, ParsedDocumentSummary parsed, TypeResolver typeContext) { + var docsum = new DocumentSummary(parsed.name(), schema); + var inheritList = parsed.getInherited(); + if (inheritList.size() == 1) { + docsum.setInherited(inheritList.get(0)); + } else if (inheritList.size() != 0) { + throw new IllegalArgumentException("document-summary "+parsed.name()+" cannot inherit more than once"); + } + if (parsed.getFromDisk()) { + docsum.setFromDisk(true); + } + if (parsed.getOmitSummaryFeatures()) { + docsum.setOmitSummaryFeatures(true); + } + for (var parsedField : parsed.getSummaryFields()) { + DataType dataType = typeContext.resolveType(parsedField.getType()); + var summaryField = new SummaryField(parsedField.name(), dataType); + // XXX does not belong here: + summaryField.setVsmCommand(SummaryField.VsmCommand.FLATTENSPACE); + ConvertParsedFields.convertSummaryFieldSettings(summaryField, parsedField); + docsum.add(summaryField); + } + schema.addSummary(docsum); + } + + private void convertImportField(Schema schema, ParsedSchema.ImportedField f) { + // needs rethinking + var importedFields = schema.temporaryImportedFields().get(); + if (importedFields.hasField(f.asFieldName)) { + throw new IllegalArgumentException("For schema '" + schema.getName() + + "', import field as '" + f.asFieldName + + "': Field already imported"); + } + importedFields.add(new TemporaryImportedField(f.asFieldName, f.refFieldName, f.foreignFieldName)); + } + + private void convertFieldSet(Schema schema, ParsedFieldSet parsed) { + String setName = parsed.name(); + for (String field : parsed.getFieldNames()) { + schema.fieldSets().addUserFieldSetItem(setName, field); + } + for (String command : parsed.getQueryCommands()) { + schema.fieldSets().userFieldSets().get(setName).queryCommands().add(command); + } + if (parsed.getMatchSettings().isPresent()) { + // same ugliness as SDParser.jj used to have: + var tmp = new SDField(setName, DataType.STRING); + ConvertParsedFields.convertMatchSettings(tmp, parsed.matchSettings()); + schema.fieldSets().userFieldSets().get(setName).setMatching(tmp.getMatching()); + } + } + + private void convertSchema(Schema schema, ParsedSchema parsed) { + if (parsed.hasStemming()) { + schema.setStemming(parsed.getStemming()); + } + parsed.getRawAsBase64().ifPresent(value -> schema.enableRawAsBase64(value)); + var typeContext = typeConverter.makeContext(parsed.getDocument()); + var fieldConverter = new ConvertParsedFields(typeContext); + convertDocument(schema, parsed.getDocument(), fieldConverter); + for (var field : parsed.getFields()) { + fieldConverter.convertExtraField(schema, field); + } + for (var index : parsed.getIndexes()) { + fieldConverter.convertExtraIndex(schema, index); + } + for (var docsum : parsed.getDocumentSummaries()) { + convertDocumentSummary(schema, docsum, typeContext); + } + for (var importedField : parsed.getImportedFields()) { + convertImportField(schema, importedField); + } + for (var fieldSet : parsed.getFieldSets()) { + convertFieldSet(schema, fieldSet); + } + if (documentsOnly) { + return; // skip ranking-only content, not used for document type generation + } + for (var constant : parsed.getConstants()) + schema.add(constant); + for (var onnxModel : parsed.getOnnxModels()) + schema.add(onnxModel); + rankProfileRegistry.add(new DefaultRankProfile(schema, rankProfileRegistry)); + rankProfileRegistry.add(new UnrankedRankProfile(schema, rankProfileRegistry)); + var rankConverter = new ConvertParsedRanking(rankProfileRegistry); + for (var rankProfile : parsed.getRankProfiles()) { + rankConverter.convertRankProfile(schema, rankProfile); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedTypes.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedTypes.java new file mode 100644 index 00000000000..9f1203ffc9f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedTypes.java @@ -0,0 +1,337 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.document.DataType; +import com.yahoo.document.DocumentType; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.PositionDataType; +import com.yahoo.document.StructDataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.document.annotation.AnnotationReferenceDataType; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.documentmodel.OwnedStructDataType; +import com.yahoo.schema.document.annotation.SDAnnotationType; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Helper class for converting ParsedType instances to DataType + * + * @author arnej27959 + **/ +public class ConvertParsedTypes { + + private final List<ParsedSchema> orderedInput; + private final DocumentTypeManager docMan; + + ConvertParsedTypes(List<ParsedSchema> input) { + this.orderedInput = input; + this.docMan = new DocumentTypeManager(); + } + + public ConvertParsedTypes(List<ParsedSchema> input, DocumentTypeManager docMan) { + this.orderedInput = input; + this.docMan = docMan; + } + + public void convert(boolean andRegister) { + startDataTypes(); + fillDataTypes(); + if (andRegister) { + registerDataTypes(); + } + } + + private Map<String, DocumentType> documentsFromSchemas = new HashMap<>(); + private Map<String, StructDataType> structsFromSchemas = new HashMap<>(); + private Map<String, SDAnnotationType> annotationsFromSchemas = new HashMap<>(); + + private void startDataTypes() { + for (var schema : orderedInput) { + String name = schema.getDocument().name(); + documentsFromSchemas.put(name, new DocumentType(name)); + } + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var struct : doc.getStructs()) { + String structId = doc.name() + "->" + struct.name(); + var dt = new OwnedStructDataType(struct.name(), doc.name()); + structsFromSchemas.put(structId, dt); + } + for (var annotation : doc.getAnnotations()) { + String annId = doc.name() + "->" + annotation.name(); + var at = new SDAnnotationType(annotation.name()); + annotationsFromSchemas.put(annId, at); + for (String inherit : annotation.getInherited()) { + at.inherit(inherit); + } + var withStruct = annotation.getStruct(); + if (withStruct.isPresent()) { + ParsedStruct struct = withStruct.get(); + String structId = doc.name() + "->" + struct.name(); + var old = structsFromSchemas.put(structId, new OwnedStructDataType(struct.name(), doc.name())); + assert(old == null); + } + } + } + } + + void fillAnnotationStruct(ParsedAnnotation annotation) { + var withStruct = annotation.getStruct(); + if (withStruct.isPresent()) { + var doc = annotation.getOwnerDoc(); + var toFill = findStructFromParsed(withStruct.get()); + for (ParsedField field : withStruct.get().getFields()) { + var t = resolveFromContext(field.getType(), doc); + var f = field.hasIdOverride() + ? new com.yahoo.document.Field(field.name(), field.idOverride(), t) + : new com.yahoo.document.Field(field.name(), t); + toFill.addField(f); + } + for (var parent : annotation.getResolvedInherits()) { + parent.getStruct().ifPresent + (ps -> toFill.inherit(findStructFromParsed(ps))); + } + var at = findAnnotationFromParsed(annotation); + at.setDataType(toFill); + } + } + + private void fillDataTypes() { + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var annotation : doc.getAnnotations()) { + var at = findAnnotationFromParsed(annotation); + for (var parent : annotation.getResolvedInherits()) { + at.inherit(findAnnotationFromParsed(parent)); + } + fillAnnotationStruct(annotation); + } + for (var struct : doc.getStructs()) { + var toFill = findStructFromParsed(struct); + // evil ugliness + for (ParsedField field : struct.getFields()) { + if (! field.hasIdOverride()) { + var t = resolveFromContext(field.getType(), doc); + var f = new com.yahoo.document.Field(field.name(), t); + toFill.addField(f); + } + } + for (ParsedField field : struct.getFields()) { + if (field.hasIdOverride()) { + var t = resolveFromContext(field.getType(), doc); + var f = new com.yahoo.document.Field(field.name(), field.idOverride(), t); + toFill.addField(f); + } + } + for (var inherit : struct.getResolvedInherits()) { + var parent = findStructFromParsed(inherit); + // ensure a nice, compatible exception message + for (var field : toFill.getFields()) { + if (parent.hasField(field)) { + for (var base : parent.getInheritedTypes()) { + if (base.hasField(field)) { + parent = base; + } + } + throw new IllegalArgumentException + ("In document " + doc.name() + ": struct " + struct.name() + + " cannot inherit from " + parent.getName() + " and redeclare field " + field.getName()); + } + } + toFill.inherit(parent); + } + } + var docToFill = documentsFromSchemas.get(doc.name()); + Map<String, Collection<String>> fieldSets = new HashMap<>(); + List<String> inDocFields = new ArrayList<>(); + for (var docField : doc.getFields()) { + String name = docField.name(); + var t = resolveFromContext(docField.getType(), doc); + var f = new com.yahoo.document.Field(docField.name(), t); + docToFill.addField(f); + if (docField.hasIdOverride()) { + f.setId(docField.idOverride(), docToFill); + } + inDocFields.add(name); + } + fieldSets.put("[document]", inDocFields); + for (var extraField : schema.getFields()) { + String name = extraField.name(); + if (docToFill.hasField(name)) continue; + var t = resolveFromContext(extraField.getType(), doc); + var f = new com.yahoo.document.Field(name, t); + docToFill.addField(f); + } + for (var fieldset : schema.getFieldSets()) { + fieldSets.put(fieldset.name(), fieldset.getFieldNames()); + } + docToFill.addFieldSets(fieldSets); + for (String inherit : doc.getInherited()) { + docToFill.inherit(findDocFromSchemas(inherit)); + } + } + } + + private StructDataType findStructFromParsed(ParsedStruct resolved) { + String structId = resolved.getOwnerName() + "->" + resolved.name(); + var struct = structsFromSchemas.get(structId); + assert(struct != null); + return struct; + } + + private StructDataType findStructFromSchemas(String name, ParsedDocument context) { + var resolved = context.findParsedStruct(name); + if (resolved == null) { + throw new IllegalArgumentException("no struct named " + name + " in context " + context); + } + return findStructFromParsed(resolved); + } + + private SDAnnotationType findAnnotationFromSchemas(String name, ParsedDocument context) { + var resolved = context.findParsedAnnotation(name); + String annotationId = resolved.getOwnerName() + "->" + resolved.name(); + var annotation = annotationsFromSchemas.get(annotationId); + if (annotation == null) { + throw new IllegalArgumentException("no annotation named " + name + " in context " + context); + } + return annotation; + } + + private SDAnnotationType findAnnotationFromParsed(ParsedAnnotation resolved) { + String annotationId = resolved.getOwnerName() + "->" + resolved.name(); + var annotation = annotationsFromSchemas.get(annotationId); + if (annotation == null) { + throw new IllegalArgumentException("no annotation " + resolved.name() + " in " + resolved.getOwnerName()); + } + return annotation; + } + + private DataType createArray(ParsedType pType, ParsedDocument context) { + DataType nested = resolveFromContext(pType.nestedType(), context); + return DataType.getArray(nested); + } + + private DataType createWset(ParsedType pType, ParsedDocument context) { + DataType nested = resolveFromContext(pType.nestedType(), context); + boolean cine = pType.getCreateIfNonExistent(); + boolean riz = pType.getRemoveIfZero(); + return new WeightedSetDataType(nested, cine, riz); + } + + private DataType createMap(ParsedType pType, ParsedDocument context) { + DataType kt = resolveFromContext(pType.mapKeyType(), context); + DataType vt = resolveFromContext(pType.mapValueType(), context); + return DataType.getMap(kt, vt); + } + + private DocumentType findDocFromSchemas(String name) { + var dt = documentsFromSchemas.get(name); + if (dt == null) { + throw new IllegalArgumentException("missing document type for: " + name); + } + return dt; + } + + private DataType createAnnRef(ParsedType pType, ParsedDocument context) { + SDAnnotationType annotation = findAnnotationFromSchemas(pType.getNameOfReferencedAnnotation(), context); + return new AnnotationReferenceDataType(annotation); + } + + private DataType createDocRef(ParsedType pType) { + var ref = pType.getReferencedDocumentType(); + assert(ref.getVariant() == ParsedType.Variant.DOCUMENT); + return new NewDocumentReferenceDataType(findDocFromSchemas(ref.name())); + } + + private DataType getBuiltinType(String name) { + switch (name) { + case "bool": return DataType.BOOL; + case "byte": return DataType.BYTE; + case "int": return DataType.INT; + case "long": return DataType.LONG; + case "string": return DataType.STRING; + case "float": return DataType.FLOAT; + case "double": return DataType.DOUBLE; + case "uri": return DataType.URI; + case "predicate": return DataType.PREDICATE; + case "raw": return DataType.RAW; + case "tag": return DataType.TAG; + case "float16": return DataType.FLOAT16; + default: + throw new IllegalArgumentException("Unknown builtin type: "+name); + } + } + + private DataType resolveFromContext(ParsedType pType, ParsedDocument context) { + String name = pType.name(); + switch (pType.getVariant()) { + case NONE: return DataType.NONE; + case BUILTIN: return getBuiltinType(name); + case POSITION: return PositionDataType.INSTANCE; + case ARRAY: return createArray(pType, context); + case WSET: return createWset(pType, context); + case MAP: return createMap(pType, context); + case TENSOR: return DataType.getTensor(pType.getTensorType()); + case DOC_REFERENCE: return createDocRef(pType); + case ANN_REFERENCE: return createAnnRef(pType, context); + case DOCUMENT: return findDocFromSchemas(name); + case STRUCT: return findStructFromSchemas(name, context); + case UNKNOWN: + // fallthrough + } + // unknown is probably struct + var found = context.findParsedStruct(name); + if (found != null) { + pType.setVariant(ParsedType.Variant.STRUCT); + return findStructFromSchemas(name, context); + } + if (documentsFromSchemas.containsKey(name)) { + pType.setVariant(ParsedType.Variant.DOCUMENT); + return findDocFromSchemas(name); + } + throw new IllegalArgumentException("unknown type named '" + name + "' in context "+context); + } + + @SuppressWarnings("deprecation") + private void registerDataTypes() { + for (DataType t : structsFromSchemas.values()) { + docMan.register(t); + } + for (DocumentType t : documentsFromSchemas.values()) { + docMan.registerDocumentType(t); + } + for (SDAnnotationType t : annotationsFromSchemas.values()) { + docMan.getAnnotationTypeRegistry().register(t); + } + } + + public class TypeResolver { + private final ParsedDocument context; + public DataType resolveType(ParsedType parsed) { + return resolveFromContext(parsed, context); + } + public DataType resolveStruct(ParsedStruct parsed) { + String structId = context.name() + "->" + parsed.name(); + var r = structsFromSchemas.get(structId); + if (r == null) { + throw new IllegalArgumentException("no datatype found for struct: " + structId); + } + return r; + } + public SDAnnotationType resolveAnnotation(String name) { + return findAnnotationFromSchemas(name, context); + } + TypeResolver(ParsedDocument context) { + this.context = context; + } + } + + public TypeResolver makeContext(ParsedDocument doc) { + return new TypeResolver(doc); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertSchemaCollection.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertSchemaCollection.java new file mode 100644 index 00000000000..5509d11885c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertSchemaCollection.java @@ -0,0 +1,212 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.config.model.application.provider.MockFileRegistry; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; + +import java.util.ArrayList; +import java.util.List; + +/** + * Class converting a collection of schemas from the intermediate format. + * + * @author arnej27959 + **/ +public class ConvertSchemaCollection { + + private final IntermediateCollection input; + private final List<ParsedSchema> orderedInput = new ArrayList<>(); + private final DocumentTypeManager docMan; + private final ApplicationPackage applicationPackage; + private final FileRegistry fileRegistry; + private final DeployLogger deployLogger; + private final ModelContext.Properties properties; + private final RankProfileRegistry rankProfileRegistry; + private final boolean documentsOnly; + + // for unit test + ConvertSchemaCollection(IntermediateCollection input, + DocumentTypeManager documentTypeManager) + { + this(input, documentTypeManager, + MockApplicationPackage.createEmpty(), + new MockFileRegistry(), + new BaseDeployLogger(), + new TestProperties(), + new RankProfileRegistry(), + true); + } + + public ConvertSchemaCollection(IntermediateCollection input, + DocumentTypeManager documentTypeManager, + ApplicationPackage applicationPackage, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry, + boolean documentsOnly) + { + this.input = input; + this.docMan = documentTypeManager; + this.applicationPackage = applicationPackage; + this.fileRegistry = fileRegistry; + this.deployLogger = deployLogger; + this.properties = properties; + this.rankProfileRegistry = rankProfileRegistry; + this.documentsOnly = documentsOnly; + + input.resolveInternalConnections(); + order(); + pushTypesToDocuments(); + } + + void order() { + var map = input.getParsedSchemas(); + for (var schema : map.values()) { + findOrdering(schema); + } + } + + void findOrdering(ParsedSchema schema) { + if (orderedInput.contains(schema)) return; + for (var parent : schema.getAllResolvedInherits()) { + findOrdering(parent); + } + orderedInput.add(schema); + } + + void pushTypesToDocuments() { + for (var schema : orderedInput) { + for (var struct : schema.getStructs()) { + schema.getDocument().addStruct(struct); + } + for (var annotation : schema.getAnnotations()) { + schema.getDocument().addAnnotation(annotation); + } + } + } + + private ConvertParsedTypes typeConverter; + + public void convertTypes() { + typeConverter = new ConvertParsedTypes(orderedInput, docMan); + typeConverter.convert(true); + } + + public List<Schema> convertToSchemas() { + resolveStructInheritance(); + resolveAnnotationInheritance(); + addMissingAnnotationStructs(); + var converter = new ConvertParsedSchemas(orderedInput, + docMan, + applicationPackage, + fileRegistry, + deployLogger, + properties, + rankProfileRegistry, + documentsOnly); + return converter.convertToSchemas(); + } + + private void resolveStructInheritance() { + List<ParsedStruct> all = new ArrayList<>(); + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var struct : doc.getStructs()) { + for (String inherit : struct.getInherited()) { + var parent = doc.findParsedStruct(inherit); + if (parent == null) { + throw new IllegalArgumentException("Can not find parent for "+struct+" in "+doc); + } + struct.resolveInherit(inherit, parent); + } + all.add(struct); + } + } + List<String> seen = new ArrayList<>(); + for (ParsedStruct struct : all) { + inheritanceCycleCheck(struct, seen); + } + } + + private void resolveAnnotationInheritance() { + List<ParsedAnnotation> all = new ArrayList(); + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var annotation : doc.getAnnotations()) { + for (String inherit : annotation.getInherited()) { + var parent = doc.findParsedAnnotation(inherit); + if (parent == null) { + throw new IllegalArgumentException("Can not find parent for "+annotation+" in "+doc); + } + annotation.resolveInherit(inherit, parent); + } + all.add(annotation); + } + } + List<String> seen = new ArrayList<>(); + for (ParsedAnnotation annotation : all) { + inheritanceCycleCheck(annotation, seen); + } + } + + private void fixupAnnotationStruct(ParsedAnnotation parsed) { + for (var parent : parsed.getResolvedInherits()) { + fixupAnnotationStruct(parent); + parent.getStruct().ifPresent(ps -> { + var myStruct = parsed.ensureStruct(); + if (! myStruct.getInherited().contains(ps.name())) { + myStruct.inherit(ps.name()); + myStruct.resolveInherit(ps.name(), ps); + } + }); + } + } + + private void addMissingAnnotationStructs() { + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var annotation : doc.getAnnotations()) { + fixupAnnotationStruct(annotation); + } + } + } + + private void inheritanceCycleCheck(ParsedStruct struct, List<String> seen) { + String name = struct.name(); + if (seen.contains(name)) { + seen.add(name); + throw new IllegalArgumentException("Inheritance/reference cycle for structs: " + + String.join(" -> ", seen)); + } + seen.add(name); + for (ParsedStruct parent : struct.getResolvedInherits()) { + inheritanceCycleCheck(parent, seen); + } + seen.remove(name); + } + + private void inheritanceCycleCheck(ParsedAnnotation annotation, List<String> seen) { + String name = annotation.name(); + if (seen.contains(name)) { + seen.add(name); + throw new IllegalArgumentException("Inheritance/reference cycle for annotations: " + + String.join(" -> ", seen)); + } + seen.add(name); + for (ParsedAnnotation parent : annotation.getResolvedInherits()) { + inheritanceCycleCheck(parent, seen); + } + seen.remove(name); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/DictionaryOption.java b/config-model/src/main/java/com/yahoo/schema/parser/DictionaryOption.java new file mode 100644 index 00000000000..3acb51ace3f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/DictionaryOption.java @@ -0,0 +1,5 @@ +package com.yahoo.schema.parser; + +public enum DictionaryOption { + HASH, BTREE, CASED, UNCASED +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/InheritanceResolver.java b/config-model/src/main/java/com/yahoo/schema/parser/InheritanceResolver.java new file mode 100644 index 00000000000..ad9acf2f095 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/InheritanceResolver.java @@ -0,0 +1,130 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Class resolving some inheritance relationships. + * + * @author arnej27959 + **/ +public class InheritanceResolver { + + private final Map<String, ParsedSchema> parsedSchemas; + private final Map<String, ParsedDocument> parsedDocs = new HashMap<>(); + private final Map<String, ParsedSchema> schemaForDocs = new HashMap<>(); + + public InheritanceResolver(Map<String, ParsedSchema> parsedSchemas) { + this.parsedSchemas = parsedSchemas; + } + + private void inheritanceCycleCheck(ParsedSchema schema, List<String> seen) { + String name = schema.name(); + if (seen.contains(name)) { + seen.add(name); + throw new IllegalArgumentException("Inheritance/reference cycle for schemas: " + + String.join(" -> ", seen)); + } + seen.add(name); + for (ParsedSchema parent : schema.getAllResolvedInherits()) { + inheritanceCycleCheck(parent, seen); + } + seen.remove(name); + } + + private void resolveSchemaInheritance() { + for (ParsedSchema schema : parsedSchemas.values()) { + for (String inherit : schema.getInherited()) { + var parent = parsedSchemas.get(inherit); + if (parent == null) { + throw new IllegalArgumentException("schema '" + schema.name() + "' inherits '" + inherit + "', but this schema does not exist"); + } + schema.resolveInherit(inherit, parent); + } + } + } + + private void checkSchemaCycles() { + List<String> seen = new ArrayList<>(); + for (ParsedSchema schema : parsedSchemas.values()) { + inheritanceCycleCheck(schema, seen); + } + } + + private void resolveDocumentInheritance() { + for (ParsedSchema schema : parsedSchemas.values()) { + if (! schema.hasDocument()) { + throw new IllegalArgumentException("For schema '" + schema.name() + + "': A search specification must have an equally named document inside of it."); + } + ParsedDocument doc = schema.getDocument(); + var old = parsedDocs.put(doc.name(), doc); + if (old != null) { + throw new IllegalArgumentException("duplicate document declaration for " + doc.name()); + } + schemaForDocs.put(doc.name(), schema); + for (String docInherit : doc.getInherited()) { + schema.inheritByDocument(docInherit); + } + for (String docReferenced : doc.getReferencedDocuments()) { + schema.inheritByDocument(docReferenced); + } + } + for (ParsedDocument doc : parsedDocs.values()) { + for (String inherit : doc.getInherited()) { + var parentDoc = parsedDocs.get(inherit); + if (parentDoc == null) { + throw new IllegalArgumentException("document " + doc.name() + " inherits from unavailable document " + inherit); + } + doc.resolveInherit(inherit, parentDoc); + } + for (String docRefName : doc.getReferencedDocuments()) { + var refDoc = parsedDocs.get(docRefName); + if (refDoc == null) { + throw new IllegalArgumentException("document " + doc.name() + " references unavailable document " + docRefName); + } + doc.resolveReferenced(refDoc); + } + } + for (ParsedSchema schema : parsedSchemas.values()) { + for (String docName : schema.getInheritedByDocument()) { + var parent = schemaForDocs.get(docName); + assert(parent.hasDocument()); + assert(parent.getDocument().name().equals(docName)); + schema.resolveInheritByDocument(docName, parent); + } + } + } + + private void inheritanceCycleCheck(ParsedDocument document, List<String> seen) { + String name = document.name(); + if (seen.contains(name)) { + seen.add(name); + throw new IllegalArgumentException("Inheritance/reference cycle for documents: " + + String.join(" -> ", seen)); + } + seen.add(name); + for (ParsedDocument parent : document.getAllResolvedParents()) { + inheritanceCycleCheck(parent, seen); + } + seen.remove(name); + } + + private void checkDocumentCycles() { + List<String> seen = new ArrayList<>(); + for (ParsedDocument doc : parsedDocs.values()) { + inheritanceCycleCheck(doc, seen); + } + } + + public void resolveInheritance() { + resolveSchemaInheritance(); + resolveDocumentInheritance(); + checkDocumentCycles(); + checkSchemaCycles(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java b/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java new file mode 100644 index 00000000000..8bb9bca3249 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java @@ -0,0 +1,159 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.io.IOUtils; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.yolean.Exceptions; + +import java.io.File; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Class wrapping parsing of schema files and holding a collection of + * schemas in the intermediate format. + * + * @author arnej27959 + **/ +public class IntermediateCollection { + + private final DeployLogger deployLogger; + private final ModelContext.Properties modelProperties; + + private Map<String, ParsedSchema> parsedSchemas = new LinkedHashMap<>(); + + IntermediateCollection() { + this.deployLogger = new BaseDeployLogger(); + this.modelProperties = new TestProperties(); + } + + public IntermediateCollection(DeployLogger logger, ModelContext.Properties properties) { + this.deployLogger = logger; + this.modelProperties = properties; + } + + public Map<String, ParsedSchema> getParsedSchemas() { return Collections.unmodifiableMap(parsedSchemas); } + + public ParsedSchema getParsedSchema(String name) { return parsedSchemas.get(name); } + + public ParsedSchema addSchemaFromString(String input) throws ParseException { + var stream = new SimpleCharStream(input); + var parser = new SchemaParser(stream, deployLogger, modelProperties); + try { + var schema = parser.schema(); + if (parsedSchemas.containsKey(schema.name())) { + throw new IllegalArgumentException("Duplicate schemas named: " + schema.name()); + } + parsedSchemas.put(schema.name(), schema); + return schema; + } catch (TokenMgrException e) { + throw new ParseException("Unknown symbol: " + e.getMessage()); + } catch (ParseException pe) { + throw new ParseException(stream.formatException(Exceptions.toMessageString(pe))); + } + } + + private String addSchemaFromStringWithFileName(String input, String fileName) throws ParseException { + var parsed = addSchemaFromString(input); + String nameFromFile = baseName(fileName); + if (! parsed.name().equals(nameFromFile)) { + throw new IllegalArgumentException("The file containing schema '" + + parsed.name() + "' must be named '" + + parsed.name() + ApplicationPackage.SD_NAME_SUFFIX + + "', was '" + stripDirs(fileName) + "'"); + } + return parsed.name(); + } + + private String baseName(String filename) { + int pos = filename.lastIndexOf('/'); + if (pos != -1) { + filename = filename.substring(pos + 1); + } + pos = filename.lastIndexOf('.'); + if (pos != -1) { + filename = filename.substring(0, pos); + } + return filename; + } + + private String stripDirs(String filename) { + int pos = filename.lastIndexOf('/'); + if (pos != -1) { + return filename.substring(pos + 1); + } + return filename; + } + + /** + * parse a schema from the given reader and add result to collection + **/ + public String addSchemaFromReader(NamedReader reader) throws ParseException { + try { + var nameParsed = addSchemaFromStringWithFileName(IOUtils.readAll(reader.getReader()), reader.getName()); + reader.close(); + return nameParsed; + } catch (ParseException ex) { + throw new ParseException("Failed parsing schema from " + reader.getName() + ": " + ex.getMessage()); + } catch (java.io.IOException ex) { + throw new IllegalArgumentException("Failed reading from " + reader.getName() + ": " + ex.getMessage()); + } + } + + /** for unit tests */ + public String addSchemaFromFile(String fileName) throws ParseException { + try { + // return addSchemaFromStringWithFileName(IOUtils.readFile(new File(fileName)), fileName); + var parsed = addSchemaFromString(IOUtils.readFile(new File(fileName))); + return parsed.name(); + } catch (ParseException ex) { + throw new ParseException("Failed parsing schema from " + fileName + ": " + ex.getMessage()); + } catch (java.io.IOException ex) { + throw new IllegalArgumentException("Could not read file " + fileName + ": " + ex.getMessage()); + } + } + + /** + * parse a rank profile from the given reader and add to the schema identified by name. + * note: the named schema must have been parsed already. + **/ + public void addRankProfileFile(String schemaName, NamedReader reader) throws ParseException { + try { + ParsedSchema schema = parsedSchemas.get(schemaName); + if (schema == null) { + throw new IllegalArgumentException("No schema named: " + schemaName); + } + var stream = new SimpleCharStream(IOUtils.readAll(reader.getReader())); + var parser = new SchemaParser(stream, deployLogger, modelProperties); + try { + parser.rankProfile(schema); + } catch (ParseException pe) { + throw new ParseException("Failed parsing rank-profile from " + reader.getName() + ": " + + stream.formatException(Exceptions.toMessageString(pe))); + } + } catch (java.io.IOException ex) { + throw new IllegalArgumentException("Failed reading from " + reader.getName() + ": " + ex.getMessage()); + } + } + + // for unit test + void addRankProfileFile(String schemaName, String fileName) throws ParseException { + try { + var reader = IOUtils.createReader(fileName, "UTF-8"); + addRankProfileFile(schemaName, new NamedReader(fileName, reader)); + } catch (java.io.IOException ex) { + throw new IllegalArgumentException("Could not read file " + fileName + ": " + ex.getMessage()); + } + } + + void resolveInternalConnections() { + var resolver = new InheritanceResolver(parsedSchemas); + resolver.resolveInheritance(); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedAnnotation.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedAnnotation.java new file mode 100644 index 00000000000..c36656838f7 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedAnnotation.java @@ -0,0 +1,57 @@ +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a + * "annotation" block, using simple data structures as far as + * possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedAnnotation extends ParsedBlock { + + private ParsedStruct wrappedStruct = null; + private final List<String> inherited = new ArrayList<>(); + private final List<ParsedAnnotation> resolvedInherits = new ArrayList<>(); + private ParsedDocument ownedBy = null; + + ParsedAnnotation(String name) { + super(name, "annotation"); + } + + public List<String> getInherited() { return List.copyOf(inherited); } + public List<ParsedAnnotation> getResolvedInherits() { + assert(inherited.size() == resolvedInherits.size()); + return List.copyOf(resolvedInherits); + } + + + public Optional<ParsedStruct> getStruct() { return Optional.ofNullable(wrappedStruct); } + public ParsedDocument getOwnerDoc() { return ownedBy; } + public String getOwnerName() { return ownedBy.name(); } + + public ParsedStruct ensureStruct() { + if (wrappedStruct == null) { + wrappedStruct = new ParsedStruct("annotation." + name()); + wrappedStruct.tagOwner(ownedBy); + } + return wrappedStruct; + } + void setStruct(ParsedStruct struct) { this.wrappedStruct = struct; } + + void inherit(String other) { inherited.add(other); } + + void tagOwner(ParsedDocument owner) { + verifyThat(ownedBy == null, "already owned by", ownedBy); + this.ownedBy = owner; + getStruct().ifPresent(s -> s.tagOwner(owner)); + } + + void resolveInherit(String name, ParsedAnnotation parsed) { + verifyThat(inherited.contains(name), "resolveInherit for non-inherited name", name); + verifyThat(name.equals(parsed.name()), "resolveInherit name mismatch for", name); + resolvedInherits.add(parsed); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedAttribute.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedAttribute.java new file mode 100644 index 00000000000..be8d20fbe93 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedAttribute.java @@ -0,0 +1,69 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a + * "attribute" block, using simple data structures as far as + * possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedAttribute extends ParsedBlock { + + private boolean enableBitVectors = false; + private boolean enableOnlyBitVector = false; + private boolean enableFastAccess = false; + private boolean enableFastRank = false; + private boolean enableFastSearch = false; + private boolean enableHuge = false; + private boolean enableMutable = false; + private boolean enablePaged = false; + private final Map<String, String> aliases = new LinkedHashMap<>(); + private ParsedSorting sortSettings = null; + private String distanceMetric = null; + + ParsedAttribute(String name) { + super(name, "attribute"); + } + + List<String> getAliases() { return List.copyOf(aliases.keySet()); } + String lookupAliasedFrom(String alias) { return aliases.get(alias); } + Optional<String> getDistanceMetric() { return Optional.ofNullable(distanceMetric); } + boolean getEnableBitVectors() { return this.enableBitVectors; } + boolean getEnableOnlyBitVector() { return this.enableOnlyBitVector; } + boolean getFastAccess() { return this.enableFastAccess; } + boolean getFastRank() { return this.enableFastRank; } + boolean getFastSearch() { return this.enableFastSearch; } + boolean getHuge() { return this.enableHuge; } + boolean getMutable() { return this.enableMutable; } + boolean getPaged() { return this.enablePaged; } + Optional<ParsedSorting> getSorting() { return Optional.ofNullable(sortSettings); } + + void addAlias(String from, String to) { + verifyThat(! aliases.containsKey(to), "already has alias", to); + aliases.put(to, from); + } + + void setDistanceMetric(String value) { + verifyThat(distanceMetric == null, "already has distance-metric", distanceMetric); + this.distanceMetric = value; + } + + ParsedSorting sortInfo() { + if (sortSettings == null) sortSettings = new ParsedSorting(name(), "attribute.sorting"); + return this.sortSettings; + } + + void setEnableBitVectors(boolean value) { this.enableBitVectors = value; } + void setEnableOnlyBitVector(boolean value) { this.enableOnlyBitVector = value; } + void setFastAccess(boolean value) { this.enableFastAccess = true; } + void setFastRank(boolean value) { this.enableFastRank = true; } + void setFastSearch(boolean value) { this.enableFastSearch = true; } + void setHuge(boolean value) { this.enableHuge = true; } + void setMutable(boolean value) { this.enableMutable = true; } + void setPaged(boolean value) { this.enablePaged = true; } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedBlock.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedBlock.java new file mode 100644 index 00000000000..c20abf52bf3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedBlock.java @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +/** + * Common methods for various Parsed* classes. + * @author arnej27959 + **/ +public class ParsedBlock { + private final String name; + private final String blockType; + + public ParsedBlock(String name, String blockType) { + this.name = name; + this.blockType = blockType; + } + + public final String name() { return name; } + public final String blockType() { return blockType; } + + protected void verifyThat(boolean check, String msg, Object ... msgDetails) { + if (check) return; + var buf = new StringBuilder(); + buf.append(blockType).append(" '").append(name).append("' error: "); + buf.append(msg); + for (Object detail : msgDetails) { + buf.append(" "); + buf.append(detail.toString()); + } + throw new IllegalArgumentException(buf.toString()); + } + + public String toString() { + return blockType + " '" + name + "'"; + } +} + diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocument.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocument.java new file mode 100644 index 00000000000..281e7989885 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocument.java @@ -0,0 +1,127 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * This class holds the extracted information after parsing a + * "document" block in a schema (.sd) file, using simple data + * structures as far as possible. Do not put advanced logic here! + * @author arnej27959 + **/ +public class ParsedDocument extends ParsedBlock { + private final List<String> inherited = new ArrayList<>(); + private final Map<String, ParsedDocument> resolvedInherits = new LinkedHashMap(); + private final Map<String, ParsedDocument> resolvedReferences = new LinkedHashMap(); + private final Map<String, ParsedField> docFields = new LinkedHashMap<>(); + private final Map<String, ParsedStruct> docStructs = new LinkedHashMap<>(); + private final Map<String, ParsedAnnotation> docAnnotations = new LinkedHashMap<>(); + + public ParsedDocument(String name) { + super(name, "document"); + } + + List<String> getInherited() { return List.copyOf(inherited); } + List<ParsedAnnotation> getAnnotations() { return List.copyOf(docAnnotations.values()); } + List<ParsedDocument> getResolvedInherits() { + assert(inherited.size() == resolvedInherits.size()); + return List.copyOf(resolvedInherits.values()); + } + List<ParsedDocument> getResolvedReferences() { + return List.copyOf(resolvedReferences.values()); + } + List<ParsedDocument> getAllResolvedParents() { + List<ParsedDocument> all = new ArrayList<>(); + all.addAll(getResolvedInherits()); + all.addAll(getResolvedReferences()); + return all; + } + List<ParsedField> getFields() { return List.copyOf(docFields.values()); } + List<ParsedStruct> getStructs() { return List.copyOf(docStructs.values()); } + ParsedStruct getStruct(String name) { return docStructs.get(name); } + ParsedAnnotation getAnnotation(String name) { return docAnnotations.get(name); } + + List<String> getReferencedDocuments() { + var result = new ArrayList<String>(); + for (var field : docFields.values()) { + var type = field.getType(); + if (type.getVariant() == ParsedType.Variant.DOC_REFERENCE) { + var docType = type.getReferencedDocumentType(); + assert(docType.getVariant() == ParsedType.Variant.DOCUMENT); + result.add(docType.name()); + } + } + return result; + } + + void inherit(String other) { inherited.add(other); } + + void addField(ParsedField field) { + String fieldName = field.name().toLowerCase(); + verifyThat(! docFields.containsKey(fieldName), + "Duplicate (case insensitively) " + field + " in document type '" + this.name() + "'"); + docFields.put(fieldName, field); + } + + void addStruct(ParsedStruct struct) { + String sName = struct.name(); + verifyThat(! docStructs.containsKey(sName), "already has struct", sName); + docStructs.put(sName, struct); + struct.tagOwner(this); + } + + void addAnnotation(ParsedAnnotation annotation) { + String annName = annotation.name(); + verifyThat(! docAnnotations.containsKey(annName), "already has annotation", annName); + docAnnotations.put(annName, annotation); + annotation.tagOwner(this); + } + + void resolveInherit(String name, ParsedDocument parsed) { + verifyThat(inherited.contains(name), "resolveInherit for non-inherited name", name); + verifyThat(name.equals(parsed.name()), "resolveInherit name mismatch for", name); + verifyThat(! resolvedInherits.containsKey(name), "double resolveInherit for", name); + resolvedInherits.put(name, parsed); + } + + void resolveReferenced(ParsedDocument parsed) { + var old = resolvedReferences.put(parsed.name(), parsed); + assert(old == null || old == parsed); + } + + ParsedStruct findParsedStruct(String name) { + ParsedStruct found = getStruct(name); + if (found != null) return found; + for (var parent : getAllResolvedParents()) { + var fromParent = parent.findParsedStruct(name); + if (fromParent == null) continue; + if (fromParent == found) continue; + if (found == null) { + found = fromParent; + } else { + throw new IllegalArgumentException("conflicting values for struct " + name + " in " +this); + } + } + return found; + } + + ParsedAnnotation findParsedAnnotation(String name) { + ParsedAnnotation found = docAnnotations.get(name); + if (found != null) return found; + for (var parent : getResolvedInherits()) { + var fromParent = parent.findParsedAnnotation(name); + if (fromParent == null) continue; + if (fromParent == found) continue; + if (found == null) { + found = fromParent; + } else { + throw new IllegalArgumentException("conflicting values for annotation " + name + " in " +this); + } + } + return found; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocumentSummary.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocumentSummary.java new file mode 100644 index 00000000000..93469a86fe3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocumentSummary.java @@ -0,0 +1,49 @@ + +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * This class holds the extracted information after parsing a + * "document-summary" block, using simple data structures as far as + * possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedDocumentSummary extends ParsedBlock { + + private boolean omitSummaryFeatures; + private boolean fromDisk; + private final List<String> inherited = new ArrayList<>(); + private final Map<String, ParsedSummaryField> fields = new LinkedHashMap<>(); + + ParsedDocumentSummary(String name) { + super(name, "document-summary"); + } + + boolean getOmitSummaryFeatures() { return omitSummaryFeatures; } + boolean getFromDisk() { return fromDisk; } + List<ParsedSummaryField> getSummaryFields() { return List.copyOf(fields.values()); } + List<String> getInherited() { return List.copyOf(inherited); } + + ParsedSummaryField addField(ParsedSummaryField field) { + String fieldName = field.name(); + // TODO disallow this on Vespa 8 + // verifyThat(! fields.containsKey(fieldName), "already has field", fieldName); + return fields.put(fieldName, field); + } + + void setFromDisk(boolean value) { + this.fromDisk = value; + } + + void setOmitSummaryFeatures(boolean value) { + this.omitSummaryFeatures = value; + } + + void inherit(String other) { + inherited.add(other); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedField.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedField.java new file mode 100644 index 00000000000..a4df2ac6dc2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedField.java @@ -0,0 +1,159 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.document.Stemming; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a "field" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +class ParsedField extends ParsedBlock { + + private ParsedType type; + private boolean hasBolding = false; + private boolean isFilter = false; + private int overrideId = 0; + private boolean isLiteral = false; + private boolean isNormal = false; + private Integer weight; + private String normalizing = null; + private final ParsedMatchSettings matchInfo = new ParsedMatchSettings(); + private Stemming stemming = null; + private ParsedIndexingOp indexingOp = null; + private ParsedSorting sortSettings = null; + private final Map<String, ParsedAttribute> attributes = new LinkedHashMap<>(); + private final Map<String, ParsedIndex> fieldIndexes = new LinkedHashMap<>(); + private final Map<String, String> aliases = new LinkedHashMap<>(); + private final Map<String, String> rankTypes = new LinkedHashMap<>(); + private final Map<String, ParsedField> structFields = new LinkedHashMap<>(); + private final Map<String, ParsedSummaryField> summaryFields = new LinkedHashMap<>(); + private final List<DictionaryOption> dictionaryOptions = new ArrayList<>(); + private final List<String> queryCommands = new ArrayList<>(); + + ParsedField(String name, ParsedType type) { + super(name, "field"); + this.type = type; + } + + ParsedType getType() { return this.type; } + boolean hasBolding() { return this.hasBolding; } + boolean hasFilter() { return this.isFilter; } + boolean hasLiteral() { return this.isLiteral; } + boolean hasNormal() { return this.isNormal; } + boolean hasIdOverride() { return overrideId != 0; } + int idOverride() { return overrideId; } + List<DictionaryOption> getDictionaryOptions() { return List.copyOf(dictionaryOptions); } + List<ParsedAttribute> getAttributes() { return List.copyOf(attributes.values()); } + List<ParsedIndex> getIndexes() { return List.copyOf(fieldIndexes.values()); } + List<ParsedSummaryField> getSummaryFields() { return List.copyOf(summaryFields.values()); } + List<ParsedField> getStructFields() { return List.copyOf(structFields.values()); } + List<String> getAliases() { return List.copyOf(aliases.keySet()); } + List<String> getQueryCommands() { return List.copyOf(queryCommands); } + String lookupAliasedFrom(String alias) { return aliases.get(alias); } + ParsedMatchSettings matchSettings() { return this.matchInfo; } + Optional<Integer> getWeight() { return Optional.ofNullable(weight); } + Optional<Stemming> getStemming() { return Optional.ofNullable(stemming); } + Optional<String> getNormalizing() { return Optional.ofNullable(normalizing); } + Optional<ParsedIndexingOp> getIndexing() { return Optional.ofNullable(indexingOp); } + Optional<ParsedSorting> getSorting() { return Optional.ofNullable(sortSettings); } + Map<String, String> getRankTypes() { return Collections.unmodifiableMap(rankTypes); } + + /** get an existing summary field for modification, or create it */ + ParsedSummaryField summaryFieldFor(String name) { + if (summaryFields.containsKey(name)) { + return summaryFields.get(name); + } + var sf = new ParsedSummaryField(name, getType()); + summaryFields.put(name, sf); + return sf; + } + + /** get an existing summary field for modification, or create it */ + ParsedSummaryField summaryFieldFor(String name, ParsedType type) { + if (summaryFields.containsKey(name)) { + var sf = summaryFields.get(name); + if (sf.getType() == null) { + sf.setType(type); + } else { + // TODO check that types are properly equal here + String oldName = sf.getType().name(); + String newName = type.name(); + verifyThat(newName.equals(oldName), "type mismatch for summary field", name, ":", oldName, "/", newName); + } + return sf; + } + var sf = new ParsedSummaryField(name, type); + summaryFields.put(name, sf); + return sf; + } + + void addAlias(String from, String to) { + verifyThat(! aliases.containsKey(to), "already has alias", to); + aliases.put(to, from); + } + + void addIndex(ParsedIndex index) { + String idxName = index.name(); + verifyThat(! fieldIndexes.containsKey(idxName), "already has index", idxName); + fieldIndexes.put(idxName, index); + } + + void addRankType(String index, String rankType) { + rankTypes.put(index, rankType); + } + + void dictionary(DictionaryOption option) { + dictionaryOptions.add(option); + } + + void setBolding(boolean value) { this.hasBolding = value; } + void setFilter(boolean value) { this.isFilter = value; } + void setId(int id) { this.overrideId = id; } + void setLiteral(boolean value) { this.isLiteral = value; } + void setNormal(boolean value) { this.isNormal = value; } + void setNormalizing(String value) { this.normalizing = value; } + void setStemming(Stemming stemming) { this.stemming = stemming; } + void setWeight(int weight) { this.weight = weight; } + + ParsedAttribute attributeFor(String attrName) { + return attributes.computeIfAbsent(attrName, n -> new ParsedAttribute(n)); + } + + void setIndexingOperation(ParsedIndexingOp idxOp) { + verifyThat(indexingOp == null, "already has indexing"); + indexingOp = idxOp; + } + + ParsedSorting sortInfo() { + if (sortSettings == null) sortSettings = new ParsedSorting(name(), "field.sorting"); + return this.sortSettings; + } + + void addQueryCommand(String command) { + queryCommands.add(command); + } + + void addStructField(ParsedField structField) { + String fieldName = structField.name(); + verifyThat(! structFields.containsKey(fieldName), "already has struct-field", fieldName); + structFields.put(fieldName, structField); + } + + void addSummaryField(ParsedSummaryField summaryField) { + String fieldName = summaryField.name(); + verifyThat(! summaryFields.containsKey(fieldName), "already has summary field", fieldName); + if (summaryField.getType() == null) { + summaryField.setType(getType()); + } + summaryFields.put(fieldName, summaryField); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedFieldSet.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedFieldSet.java new file mode 100644 index 00000000000..9e8906a41a4 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedFieldSet.java @@ -0,0 +1,36 @@ +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a "fieldset" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +class ParsedFieldSet extends ParsedBlock { + + private final List<String> fields = new ArrayList<>(); + private final List<String> queryCommands = new ArrayList<>(); + private ParsedMatchSettings matchInfo = null; + + ParsedFieldSet(String name) { + super(name, "fieldset"); + } + + ParsedMatchSettings matchSettings() { + if (matchInfo == null) matchInfo = new ParsedMatchSettings(); + return this.matchInfo; + } + + List<String> getQueryCommands() { return List.copyOf(queryCommands); } + List<String> getFieldNames() { return List.copyOf(fields); } + Optional<ParsedMatchSettings> getMatchSettings() { + return Optional.ofNullable(this.matchInfo); + } + + void addField(String field) { fields.add(field); } + void addQueryCommand(String command) { queryCommands.add(command); } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndex.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndex.java new file mode 100644 index 00000000000..cf70168e8d2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndex.java @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.document.HnswIndexParams; +import com.yahoo.schema.document.Stemming; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing an "index" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +class ParsedIndex extends ParsedBlock { + + private Boolean enableBm25 = null; + private Boolean isPrefix = null; + private HnswIndexParams hnswParams = null; + private final List<String> aliases = new ArrayList<>(); + private Stemming stemming = null; + private Integer arity = null; + private Long lowerBound = null; + private Long upperBound = null; + private Double densePLT = null; + + ParsedIndex(String name) { + super(name, "index"); + } + + Optional<Boolean> getEnableBm25() { return Optional.ofNullable(this.enableBm25); } + Optional<Boolean> getPrefix() { return Optional.ofNullable(this.isPrefix); } + Optional<HnswIndexParams> getHnswIndexParams() { return Optional.ofNullable(this.hnswParams); } + List<String> getAliases() { return List.copyOf(aliases); } + boolean hasStemming() { return stemming != null; } + Optional<Stemming> getStemming() { return Optional.ofNullable(stemming); } + Optional<Integer> getArity() { return Optional.ofNullable(this.arity); } + Optional<Long> getLowerBound() { return Optional.ofNullable(this.lowerBound); } + Optional<Long> getUpperBound() { return Optional.ofNullable(this.upperBound); } + Optional<Double> getDensePostingListThreshold() { return Optional.ofNullable(this.densePLT); } + + void addAlias(String alias) { + aliases.add(alias); + } + + void setArity(int arity) { + this.arity = arity; + } + + void setDensePostingListThreshold(double threshold) { + this.densePLT = threshold; + } + + void setEnableBm25(boolean value) { + this.enableBm25 = value; + } + + void setHnswIndexParams(HnswIndexParams params) { + this.hnswParams = params; + } + + void setLowerBound(long value) { + this.lowerBound = value; + } + + void setPrefix(boolean value) { + this.isPrefix = value; + } + + void setStemming(Stemming stemming) { + this.stemming = stemming; + } + + void setUpperBound(long value) { + this.upperBound = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndexingOp.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndexingOp.java new file mode 100644 index 00000000000..3a2df2aac4c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndexingOp.java @@ -0,0 +1,37 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.vespa.indexinglanguage.ExpressionSearcher; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.LowerCaseExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; + +/** + * This class wraps an indexing script expression, with some helper + * methods for extracting information from it + * @author arnej27959 + **/ +class ParsedIndexingOp { + + private final ScriptExpression script; + + ParsedIndexingOp(ScriptExpression script) { + this.script = script; + } + + ScriptExpression script() { return this.script; } + + public boolean doesAttributing() { return containsExpression(AttributeExpression.class); } + public boolean doesIndexing() { return containsExpression(IndexExpression.class); } + public boolean doesLowerCasing() { return containsExpression(LowerCaseExpression.class); } + public boolean doesSummarying() { return containsExpression(SummaryExpression.class); } + + private <T extends Expression> boolean containsExpression(Class<T> searchFor) { + var searcher = new ExpressionSearcher<>(searchFor); + var expr = searcher.searchIn(script); + return (expr != null); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java new file mode 100644 index 00000000000..4d3c45ad67f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java @@ -0,0 +1,38 @@ +package com.yahoo.schema.parser; + +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.MatchAlgorithm; + +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a "match" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +public class ParsedMatchSettings { + + private MatchType matchType = null; + private Case matchCase = null; + private MatchAlgorithm matchAlgorithm = null; + private String exactTerminator = null; + private Integer gramSize = null; + private Integer maxLength = null; + + Optional<MatchType> getMatchType() { return Optional.ofNullable(matchType); } + Optional<Case> getMatchCase() { return Optional.ofNullable(matchCase); } + Optional<MatchAlgorithm> getMatchAlgorithm() { return Optional.ofNullable(matchAlgorithm); } + Optional<String> getExactTerminator() { return Optional.ofNullable(exactTerminator); } + Optional<Integer> getGramSize() { return Optional.ofNullable(gramSize); } + Optional<Integer> getMaxLength() { return Optional.ofNullable(maxLength); } + + // TODO - consider allowing each set only once: + void setType(MatchType value) { this.matchType = value; } + void setCase(Case value) { this.matchCase = value; } + void setAlgorithm(MatchAlgorithm value) { this.matchAlgorithm = value; } + void setExactTerminator(String value) { this.exactTerminator = value; } + void setGramSize(int value) { this.gramSize = value; } + void setMaxLength(int value) { this.maxLength = value; } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankFunction.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankFunction.java new file mode 100644 index 00000000000..73f1316d468 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankFunction.java @@ -0,0 +1,39 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class holds the extracted information after parsing a + * "function" block in a rank-profile, using simple data structures as + * far as possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedRankFunction extends ParsedBlock { + + private boolean inline; + private String expression; + private final List<String> parameters = new ArrayList<>(); + + ParsedRankFunction(String name) { + super(name, "function"); + } + + boolean getInline() { return this.inline; } + String getExpression() { return this.expression; } + List<String> getParameters() { return List.copyOf(parameters); } + + void addParameter(String param) { + verifyThat(! parameters.contains(param), "cannot have parameter", param, "twice"); + parameters.add(param); + } + + void setInline(boolean value) { + this.inline = value; + } + + void setExpression(String value) { + this.expression = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java new file mode 100644 index 00000000000..64dd8dd0ad4 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java @@ -0,0 +1,220 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfile.MatchPhaseSettings; +import com.yahoo.schema.RankProfile.MutateOperation; +import com.yahoo.searchlib.rankingexpression.FeatureList; +import com.yahoo.searchlib.rankingexpression.Reference; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a + * rank-profile block in a schema (.sd) file, using simple data + * structures as far as possible. Do not put advanced logic here! + * + * @author arnej27959 + */ +class ParsedRankProfile extends ParsedBlock { + + private boolean ignoreDefaultRankFeatures = false; + private Double rankScoreDropLimit = null; + private Double termwiseLimit = null; + private Double postFilterThreshold = null; + private Double approximateThreshold = null; + private final List<FeatureList> matchFeatures = new ArrayList<>(); + private final List<FeatureList> rankFeatures = new ArrayList<>(); + private final List<FeatureList> summaryFeatures = new ArrayList<>(); + private Integer keepRankCount = null; + private Integer minHitsPerThread = null; + private Integer numSearchPartitions = null; + private Integer numThreadsPerSearch = null; + private Integer reRankCount = null; + private MatchPhaseSettings matchPhaseSettings = null; + private String firstPhaseExpression = null; + private String inheritedSummaryFeatures = null; + private String inheritedMatchFeatures = null; + private String secondPhaseExpression = null; + private Boolean strict = null; + private final List<MutateOperation> mutateOperations = new ArrayList<>(); + private final List<String> inherited = new ArrayList<>(); + private final Map<String, Boolean> fieldsRankFilter = new LinkedHashMap<>(); + private final Map<String, Integer> fieldsRankWeight = new LinkedHashMap<>(); + private final Map<String, ParsedRankFunction> functions = new LinkedHashMap<>(); + private final Map<String, String> fieldsRankType = new LinkedHashMap<>(); + private final Map<String, List<String>> rankProperties = new LinkedHashMap<>(); + private final Map<Reference, RankProfile.Constant> constants = new LinkedHashMap<>(); + private final Map<Reference, RankProfile.Input> inputs = new LinkedHashMap<>(); + private final List<OnnxModel> onnxModels = new ArrayList<>(); + + ParsedRankProfile(String name) { + super(name, "rank-profile"); + } + + boolean getIgnoreDefaultRankFeatures() { return this.ignoreDefaultRankFeatures; } + Optional<Double> getRankScoreDropLimit() { return Optional.ofNullable(this.rankScoreDropLimit); } + Optional<Double> getTermwiseLimit() { return Optional.ofNullable(this.termwiseLimit); } + Optional<Double> getPostFilterThreshold() { return Optional.ofNullable(this.postFilterThreshold); } + Optional<Double> getApproximateThreshold() { return Optional.ofNullable(this.approximateThreshold); } + List<FeatureList> getMatchFeatures() { return List.copyOf(this.matchFeatures); } + List<FeatureList> getRankFeatures() { return List.copyOf(this.rankFeatures); } + List<FeatureList> getSummaryFeatures() { return List.copyOf(this.summaryFeatures); } + Optional<Integer> getKeepRankCount() { return Optional.ofNullable(this.keepRankCount); } + Optional<Integer> getMinHitsPerThread() { return Optional.ofNullable(this.minHitsPerThread); } + Optional<Integer> getNumSearchPartitions() { return Optional.ofNullable(this.numSearchPartitions); } + Optional<Integer> getNumThreadsPerSearch() { return Optional.ofNullable(this.numThreadsPerSearch); } + Optional<Integer> getReRankCount() { return Optional.ofNullable(this.reRankCount); } + Optional<MatchPhaseSettings> getMatchPhaseSettings() { return Optional.ofNullable(this.matchPhaseSettings); } + Optional<String> getFirstPhaseExpression() { return Optional.ofNullable(this.firstPhaseExpression); } + Optional<String> getInheritedMatchFeatures() { return Optional.ofNullable(this.inheritedMatchFeatures); } + List<ParsedRankFunction> getFunctions() { return List.copyOf(functions.values()); } + List<MutateOperation> getMutateOperations() { return List.copyOf(mutateOperations); } + List<String> getInherited() { return List.copyOf(inherited); } + + Map<String, Boolean> getFieldsWithRankFilter() { return Collections.unmodifiableMap(fieldsRankFilter); } + Map<String, Integer> getFieldsWithRankWeight() { return Collections.unmodifiableMap(fieldsRankWeight); } + Map<String, String> getFieldsWithRankType() { return Collections.unmodifiableMap(fieldsRankType); } + Map<String, List<String>> getRankProperties() { return Collections.unmodifiableMap(rankProperties); } + Map<Reference, RankProfile.Constant> getConstants() { return Collections.unmodifiableMap(constants); } + Map<Reference, RankProfile.Input> getInputs() { return Collections.unmodifiableMap(inputs); } + List<OnnxModel> getOnnxModels() { return List.copyOf(onnxModels); } + + Optional<String> getInheritedSummaryFeatures() { return Optional.ofNullable(this.inheritedSummaryFeatures); } + Optional<String> getSecondPhaseExpression() { return Optional.ofNullable(this.secondPhaseExpression); } + Optional<Boolean> isStrict() { return Optional.ofNullable(this.strict); } + + void addSummaryFeatures(FeatureList features) { this.summaryFeatures.add(features); } + void addMatchFeatures(FeatureList features) { this.matchFeatures.add(features); } + void addRankFeatures(FeatureList features) { this.rankFeatures.add(features); } + + void inherit(String other) { inherited.add(other); } + + void setInheritedSummaryFeatures(String other) { + verifyThat(inheritedSummaryFeatures == null, "already inherits summary-features"); + this.inheritedSummaryFeatures = other; + } + + void add(RankProfile.Constant constant) { + verifyThat(! constants.containsKey(constant.name()), "already has constant", constant.name()); + constants.put(constant.name(), constant); + } + + void addInput(Reference name, RankProfile.Input input) { + verifyThat(! inputs.containsKey(name), "already has input", name); + inputs.put(name, input); + } + + void add(OnnxModel model) { + onnxModels.add(model); + } + + void addFieldRankFilter(String field, boolean filter) { + fieldsRankFilter.put(field, filter); + } + + void addFieldRankType(String field, String type) { + verifyThat(! fieldsRankType.containsKey(field), "already has rank type for field", field); + fieldsRankType.put(field, type); + } + + void addFieldRankWeight(String field, int weight) { + verifyThat(! fieldsRankType.containsKey(field), "already has weight for field", field); + fieldsRankWeight.put(field, weight); + } + + ParsedRankFunction addOrReplaceFunction(ParsedRankFunction func) { + // allowed with warning + // verifyThat(! functions.containsKey(func.name()), "already has function", func.name()); + return functions.put(func.name(), func); + } + + void addMutateOperation(MutateOperation.Phase phase, String attrName, String operation) { + mutateOperations.add(new MutateOperation(phase, attrName, operation)); + } + + void addRankProperty(String key, String value) { + List<String> values = rankProperties.computeIfAbsent(key, k -> new ArrayList<String>()); + values.add(value); + } + + void setFirstPhaseRanking(String expression) { + verifyThat(firstPhaseExpression == null, "already has first-phase expression"); + this.firstPhaseExpression = expression; + } + + void setIgnoreDefaultRankFeatures(boolean value) { + this.ignoreDefaultRankFeatures = value; + } + + void setInheritedMatchFeatures(String other) { + this.inheritedMatchFeatures = other; + } + + void setKeepRankCount(int count) { + verifyThat(keepRankCount == null, "already has rerank-count"); + this.keepRankCount = count; + } + + void setMatchPhaseSettings(MatchPhaseSettings settings) { + verifyThat(matchPhaseSettings == null, "already has match-phase"); + this.matchPhaseSettings = settings; + } + + void setMinHitsPerThread(int minHits) { + verifyThat(minHitsPerThread == null, "already has min-hits-per-thread"); + this.minHitsPerThread = minHits; + } + + void setNumSearchPartitions(int numParts) { + verifyThat(numSearchPartitions == null, "already has num-search-partitions"); + this.numSearchPartitions = numParts; + } + + void setNumThreadsPerSearch(int threads) { + verifyThat(numThreadsPerSearch == null, "already has num-threads-per-search"); + this.numThreadsPerSearch = threads; + } + + void setRankScoreDropLimit(double limit) { + verifyThat(rankScoreDropLimit == null, "already has rank-score-drop-limit"); + this.rankScoreDropLimit = limit; + } + + void setRerankCount(int count) { + verifyThat(reRankCount == null, "already has rerank-count"); + this.reRankCount = count; + } + + void setSecondPhaseRanking(String expression) { + verifyThat(secondPhaseExpression == null, "already has second-phase expression"); + this.secondPhaseExpression = expression; + } + + void setStrict(boolean strict) { + verifyThat(this.strict == null, "already has strict"); + this.strict = strict; + } + + void setTermwiseLimit(double limit) { + verifyThat(termwiseLimit == null, "already has termwise-limit"); + this.termwiseLimit = limit; + } + + void setPostFilterThreshold(double threshold) { + verifyThat(postFilterThreshold == null, "already has post-filter-threshold"); + this.postFilterThreshold = threshold; + } + + void setApproximateThreshold(double threshold) { + verifyThat(approximateThreshold == null, "already has approximate-threshold"); + this.approximateThreshold = threshold; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSchema.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSchema.java new file mode 100644 index 00000000000..5ee483db044 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSchema.java @@ -0,0 +1,176 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.document.Stemming; +import com.yahoo.searchlib.rankingexpression.Reference; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing + * one schema (.sd) file, using simple data structures + * as far as possible. + * + * Do not put complicated logic here! + * + * @author arnej27959 + */ +public class ParsedSchema extends ParsedBlock { + + public static class ImportedField { + public final String asFieldName; + public final String refFieldName; + public final String foreignFieldName; + public ImportedField(String asField, String refField, String foreignField) { + this.asFieldName = asField; + this.refFieldName = refField; + this.foreignFieldName = foreignField; + } + } + + private boolean documentWithoutSchema = false; + private Boolean rawAsBase64 = null; + private ParsedDocument myDocument = null; + private Stemming defaultStemming = null; + private final List<ImportedField> importedFields = new ArrayList<>(); + private final List<OnnxModel> onnxModels = new ArrayList<>(); + private final Map<Reference, RankProfile.Constant> constants = new LinkedHashMap<>(); + private final List<String> inherited = new ArrayList<>(); + private final List<String> inheritedByDocument = new ArrayList<>(); + private final Map<String, ParsedSchema> resolvedInherits = new LinkedHashMap<>(); + private final Map<String, ParsedSchema> allResolvedInherits = new LinkedHashMap<>(); + private final Map<String, ParsedAnnotation> extraAnnotations = new LinkedHashMap<>(); + private final Map<String, ParsedDocumentSummary> docSums = new LinkedHashMap<>(); + private final Map<String, ParsedField> extraFields = new LinkedHashMap<>(); + private final Map<String, ParsedFieldSet> fieldSets = new LinkedHashMap<>(); + private final Map<String, ParsedIndex> extraIndexes = new LinkedHashMap<>(); + private final Map<String, ParsedRankProfile> rankProfiles = new LinkedHashMap<>(); + private final Map<String, ParsedStruct> extraStructs = new LinkedHashMap<>(); + + public ParsedSchema(String name) { + super(name, "schema"); + } + + boolean getDocumentWithoutSchema() { return documentWithoutSchema; } + Optional<Boolean> getRawAsBase64() { return Optional.ofNullable(rawAsBase64); } + boolean hasDocument() { return myDocument != null; } + ParsedDocument getDocument() { return myDocument; } + boolean hasStemming() { return defaultStemming != null; } + Stemming getStemming() { return defaultStemming; } + List<ImportedField> getImportedFields() { return List.copyOf(importedFields); } + List<OnnxModel> getOnnxModels() { return List.copyOf(onnxModels); } + List<ParsedAnnotation> getAnnotations() { return List.copyOf(extraAnnotations.values()); } + List<ParsedDocumentSummary> getDocumentSummaries() { return List.copyOf(docSums.values()); } + List<ParsedField> getFields() { return List.copyOf(extraFields.values()); } + List<ParsedFieldSet> getFieldSets() { return List.copyOf(fieldSets.values()); } + List<ParsedIndex> getIndexes() { return List.copyOf(extraIndexes.values()); } + List<ParsedStruct> getStructs() { return List.copyOf(extraStructs.values()); } + List<String> getInherited() { return List.copyOf(inherited); } + List<String> getInheritedByDocument() { return List.copyOf(inheritedByDocument); } + List<ParsedRankProfile> getRankProfiles() { return List.copyOf(rankProfiles.values()); } + List<ParsedSchema> getResolvedInherits() { return List.copyOf(resolvedInherits.values()); } + List<ParsedSchema> getAllResolvedInherits() { return List.copyOf(allResolvedInherits.values()); } + List<RankProfile.Constant> getConstants() { return List.copyOf(constants.values()); } + + void addAnnotation(ParsedAnnotation annotation) { + String annName = annotation.name(); + verifyThat(! extraAnnotations.containsKey(annName), "already has annotation", annName); + extraAnnotations.put(annName, annotation); + } + + void addDocument(ParsedDocument document) { + verifyThat(myDocument == null, + "already has", myDocument, "so cannot add", document); + // TODO - disallow? + // verifyThat(name().equals(document.name()), + // "schema " + name() + " can only contain document named " + name() + ", was: "+ document.name()); + this.myDocument = document; + } + + void setDocumentWithoutSchema() { this.documentWithoutSchema = true; } + + void addDocumentSummary(ParsedDocumentSummary docsum) { + String dsName = docsum.name(); + verifyThat(! docSums.containsKey(dsName), "already has document-summary", dsName); + docSums.put(dsName, docsum); + } + + void addField(ParsedField field) { + String fieldName = field.name(); + verifyThat(! extraFields.containsKey(fieldName), "already has field", fieldName); + extraFields.put(fieldName, field); + } + + void addFieldSet(ParsedFieldSet fieldSet) { + String fsName = fieldSet.name(); + verifyThat(! fieldSets.containsKey(fsName), "already has fieldset", fsName); + fieldSets.put(fsName, fieldSet); + } + + void addImportedField(String asFieldName, String refFieldName, String foregnFieldName) { + importedFields.add(new ImportedField(asFieldName, refFieldName, foregnFieldName)); + } + + void addIndex(ParsedIndex index) { + String idxName = index.name(); + verifyThat(! extraIndexes.containsKey(idxName), "already has index", idxName); + extraIndexes.put(idxName, index); + } + + void add(OnnxModel model) { + onnxModels.add(model); + } + + void addRankProfile(ParsedRankProfile profile) { + String rpName = profile.name(); + verifyThat(! rankProfiles.containsKey(rpName), "already has rank-profile", rpName); + rankProfiles.put(rpName, profile); + } + + void add(RankProfile.Constant constant) { + constants.put(constant.name(), constant); + } + + void addStruct(ParsedStruct struct) { + String sName = struct.name(); + verifyThat(! extraStructs.containsKey(sName), "already has struct", sName); + extraStructs.put(sName, struct); + } + + void enableRawAsBase64(boolean value) { + this.rawAsBase64 = value; + } + + void inherit(String other) { inherited.add(other); } + + void inheritByDocument(String other) { inheritedByDocument.add(other); } + + void setStemming(Stemming value) { + verifyThat((defaultStemming == null) || (defaultStemming == value), + "already has stemming", defaultStemming, "cannot also set", value); + defaultStemming = value; + } + + void resolveInherit(String name, ParsedSchema parsed) { + verifyThat(inherited.contains(name), "resolveInherit for non-inherited name", name); + verifyThat(name.equals(parsed.name()), "resolveInherit name mismatch for", name); + verifyThat(! resolvedInherits.containsKey(name), "double resolveInherit for", name); + resolvedInherits.put(name, parsed); + var old = allResolvedInherits.put("schema " + name, parsed); + verifyThat(old == null || old == parsed, "conflicting resolveInherit for", name); + } + + void resolveInheritByDocument(String name, ParsedSchema parsed) { + verifyThat(inheritedByDocument.contains(name), + "resolveInheritByDocument for non-inherited name", name); + var old = allResolvedInherits.put("document " + name, parsed); + verifyThat(old == null || old == parsed, "conflicting resolveInheritByDocument for", name); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSorting.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSorting.java new file mode 100644 index 00000000000..af84bbbb5bd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSorting.java @@ -0,0 +1,48 @@ + +package com.yahoo.schema.parser; + +import com.yahoo.schema.document.Sorting.Function; +import com.yahoo.schema.document.Sorting.Strength; + +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a "sorting" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +class ParsedSorting extends ParsedBlock { + + private boolean ascending = true; + private Function sortFunction = null; + private Strength sortStrength = null; + private String sortLocale = null; + + ParsedSorting(String blockName, String blockType) { + super(blockName, blockType); + } + + boolean getAscending() { return this.ascending; } + boolean getDescending() { return ! this.ascending; } + Optional<Function> getFunction() { return Optional.ofNullable(sortFunction); } + Optional<Strength> getStrength() { return Optional.ofNullable(sortStrength); } + Optional<String> getLocale() { return Optional.ofNullable(sortLocale); } + + void setAscending() { this.ascending = true; } + + void setDescending() { this.ascending = false; } + + void setLocale(String value) { + verifyThat(sortLocale == null, "sorting already has locale", sortLocale); + this.sortLocale = value; + } + void setFunction(Function value) { + verifyThat(sortFunction == null, "sorting already has function", sortFunction); + this.sortFunction = value; + } + void setStrength(Strength value) { + verifyThat(sortStrength == null, "sorting already has strength", sortStrength); + this.sortStrength = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedStruct.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedStruct.java new file mode 100644 index 00000000000..abe14b3689f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedStruct.java @@ -0,0 +1,60 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * This class holds the extracted information after parsing a "struct" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +public class ParsedStruct extends ParsedBlock { + private final List<String> inherited = new ArrayList<>(); + private final List<ParsedStruct> resolvedInherits = new ArrayList<>(); + private final Map<String, ParsedField> fields = new LinkedHashMap<>(); + private final ParsedType asParsedType; + private ParsedDocument ownedBy = null; + + public ParsedStruct(String name) { + super(name, "struct"); + this.asParsedType = ParsedType.fromName(name); + asParsedType.setVariant(ParsedType.Variant.STRUCT); + } + + List<ParsedField> getFields() { return List.copyOf(fields.values()); } + List<String> getInherited() { return List.copyOf(inherited); } + ParsedDocument getOwnerDoc() { return ownedBy; } + String getOwnerName() { return ownedBy.name(); } + List<ParsedStruct> getResolvedInherits() { + assert(inherited.size() == resolvedInherits.size()); + return List.copyOf(resolvedInherits); + } + + void addField(ParsedField field) { + String fieldName = field.name(); + verifyThat(! fields.containsKey(fieldName), "already has field", fieldName); + fields.put(fieldName, field); + } + + void inherit(String other) { + verifyThat(! name().equals(other), "cannot inherit from itself"); + inherited.add(other); + } + + void tagOwner(ParsedDocument document) { + verifyThat(ownedBy == null, "already owned by document "+ownedBy); + this.ownedBy = document; + } + + void resolveInherit(String name, ParsedStruct parsed) { + verifyThat(inherited.contains(name), "resolveInherit for non-inherited name", name); + verifyThat(name.equals(parsed.name()), "resolveInherit name mismatch for", name); + resolvedInherits.add(parsed); + } + +} + diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java new file mode 100644 index 00000000000..38ee52c9d06 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class holds the extracted information after parsing a summary + * field declaration, either from "field" inside "document-summary" or + * "summary" inside "field". Using simple data structures as far as + * possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedSummaryField extends ParsedBlock { + + private ParsedType type; + private boolean isDyn = false; + private boolean isMEO = false; + private boolean isFull = false; + private boolean isBold = false; + private final List<String> sources = new ArrayList<>(); + private final List<String> destinations = new ArrayList<>(); + + ParsedSummaryField(String name) { + this(name, null); + } + + ParsedSummaryField(String name, ParsedType type) { + super(name, "summary field"); + this.type = type; + } + + ParsedType getType() { return type; } + List<String> getDestinations() { return List.copyOf(destinations); } + List<String> getSources() { return List.copyOf(sources); } + boolean getBolded() { return isBold; } + boolean getDynamic() { return isDyn; } + boolean getFull() { return isFull; } + boolean getMatchedElementsOnly() { return isMEO; } + + void addDestination(String dst) { destinations.add(dst); } + void addSource(String src) { sources.add(src); } + void setBold(boolean value) { this.isBold = value; } + void setDynamic() { this.isDyn = true; } + void setFull() { this.isFull = true; } + void setMatchedElementsOnly() { this.isMEO = true; } + void setType(ParsedType value) { + verifyThat(type == null, "Cannot change type from ", type, "to", value); + this.type = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedType.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedType.java new file mode 100644 index 00000000000..9c3206a333a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedType.java @@ -0,0 +1,224 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.tensor.TensorType; + +/** + * This class holds the extracted information after parsing a type + * declaration (typically for a field). Since types can be complex, + * struct names (known or unknown), or even document names, this class + * is somewhat complicated. + * @author arnej27959 + **/ +class ParsedType { + public enum Variant { + NONE, + BUILTIN, + POSITION, + TENSOR, + ARRAY, WSET, MAP, + DOC_REFERENCE, + ANN_REFERENCE, + STRUCT, + DOCUMENT, + UNKNOWN + } + + private final String name; + private final ParsedType keyType; + private final ParsedType valType; + private final TensorType tensorType; + private Variant variant; + private boolean createIfNonExistent = false; + private boolean removeIfZero = false; + + public String toString() { + var buf = new StringBuilder(); + buf.append("[type ").append(variant).append("] {"); + switch (variant) { + case NONE: + break; + case BUILTIN: + buf.append(name); + break; + case POSITION: + buf.append(name); + break; + case TENSOR: + buf.append(tensorType.toString()); + break; + case ARRAY: buf + .append(" array<") + .append(valType.toString()) + .append("> "); + break; + case WSET: buf + .append(" weightedset<") + .append(valType.toString()) + .append(">"); + if (createIfNonExistent) buf.append(",createIfNonExistent"); + if (removeIfZero) buf.append(",removeIfZero"); + buf.append(" "); + break; + case MAP: buf + .append(" map<") + .append(keyType.toString()) + .append(",") + .append(valType.toString()) + .append("> "); + break; + case DOC_REFERENCE: buf + .append(" reference<") + .append(valType.toString()) + .append("> "); + break; + case ANN_REFERENCE: buf + .append(" ") + .append(toString()) + .append(" "); + break; + case STRUCT: + case DOCUMENT: + case UNKNOWN: + buf.append(" ").append(name).append(" "); + break; + } + buf.append("}"); + return buf.toString(); + } + + private static Variant guessVariant(String name) { + switch (name) { + case "bool": return Variant.BUILTIN; + case "byte": return Variant.BUILTIN; + case "int": return Variant.BUILTIN; + case "long": return Variant.BUILTIN; + case "string": return Variant.BUILTIN; + case "float": return Variant.BUILTIN; + case "double": return Variant.BUILTIN; + case "uri": return Variant.BUILTIN; + case "predicate": return Variant.BUILTIN; + case "raw": return Variant.BUILTIN; + case "tag": return Variant.BUILTIN; + case "position": return Variant.POSITION; + case "float16": return Variant.BUILTIN; + } + return Variant.UNKNOWN; + } + + public String name() { return name; } + public Variant getVariant() { return variant; } + public ParsedType mapKeyType() { assert(variant == Variant.MAP); return keyType; } + public ParsedType mapValueType() { assert(variant == Variant.MAP); return valType; } + public ParsedType nestedType() { assert(variant == Variant.ARRAY || variant == Variant.WSET); assert(valType != null); return valType; } + public boolean getCreateIfNonExistent() { assert(variant == Variant.WSET); return this.createIfNonExistent; } + public boolean getRemoveIfZero() { assert(variant == Variant.WSET); return this.removeIfZero; } + public ParsedType getReferencedDocumentType() { assert(variant == Variant.DOC_REFERENCE); return valType; } + public TensorType getTensorType() { assert(variant == Variant.TENSOR); return tensorType; } + + public String getNameOfReferencedAnnotation() { + assert(variant == Variant.ANN_REFERENCE); + String prefix = "annotationreference<"; + int fromPos = prefix.length(); + int toPos = name.length() - 1; + return name.substring(fromPos, toPos); + } + + private ParsedType(String name, Variant variant) { + this(name, variant, null, null, null); + } + private ParsedType(String name, Variant variant, ParsedType vt) { + this(name, variant, null, vt, null); + } + private ParsedType(String name, Variant variant, ParsedType kt, ParsedType vt) { + this(name, variant, kt, vt, null); + } + private ParsedType(String name, Variant variant, ParsedType kt, ParsedType vt, TensorType tType) { + this.name = name; + this.variant = variant; + this.keyType = kt; + this.valType = vt; + this.tensorType = tType; + } + + static ParsedType mapType(ParsedType kt, ParsedType vt) { + assert(kt != null); + assert(vt != null); + String name = "map<" + kt.name() + "," + vt.name() + ">"; + return new ParsedType(name, Variant.MAP, kt, vt); + } + static ParsedType arrayOf(ParsedType vt) { + assert(vt != null); + return new ParsedType("array<" + vt.name() + ">", Variant.ARRAY, vt); + } + static ParsedType wsetOf(ParsedType vt) { + assert(vt != null); + if (vt.getVariant() != Variant.BUILTIN) { + throw new IllegalArgumentException("weightedset of complex type '" + vt + "' is not supported"); + } + switch (vt.name()) { + // allowed types: + case "bool": + case "byte": + case "int": + case "long": + case "string": + case "uri": + break; + case "predicate": + case "raw": + case "tag": + throw new IllegalArgumentException("weightedset of complex type '" + vt + "' is not supported"); + case "float16": + case "float": + case "double": + /* TODO Vespa 8: + throw new IllegalArgumentException("weightedset of inexact type '" + vt + "' is not supported"); + */ + break; + default: + throw new IllegalArgumentException("weightedset of unknown type '" + vt + "' is not supported"); + } + return new ParsedType("weightedset<" + vt.name() + ">", Variant.WSET, vt); + } + static ParsedType documentRef(ParsedType docType) { + assert(docType != null); + return new ParsedType("reference<" + docType.name + ">", Variant.DOC_REFERENCE, docType); + } + static ParsedType annotationRef(String name) { + return new ParsedType("annotationreference<" + name + ">", Variant.ANN_REFERENCE); + } + static ParsedType tensorType(TensorType tType) { + assert(tType != null); + return new ParsedType(tType.toString(), Variant.TENSOR, null, null, tType); + } + static ParsedType fromName(String name) { + return new ParsedType(name, guessVariant(name)); + } + static ParsedType documentType(String name) { + return new ParsedType(name, Variant.DOCUMENT); + } + + void setCreateIfNonExistent(boolean value) { + if (variant != Variant.WSET) { + throw new IllegalArgumentException("CreateIfNonExistent only valid for weightedset, not " + variant); + } + this.createIfNonExistent = value; + } + + void setRemoveIfZero(boolean value) { + if (variant != Variant.WSET) { + throw new IllegalArgumentException("RemoveIfZero only valid for weightedset, not " + variant); + } + this.removeIfZero = value; + } + + void setVariant(Variant value) { + if (variant == value) return; // already OK + if (variant != Variant.UNKNOWN) { + throw new IllegalArgumentException("setVariant(" + value + ") only valid for UNKNOWN, not: " + variant); + } + // maybe even more checking would be useful + this.variant = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/SimpleCharStream.java b/config-model/src/main/java/com/yahoo/schema/parser/SimpleCharStream.java new file mode 100644 index 00000000000..0a53e0477ac --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/SimpleCharStream.java @@ -0,0 +1,16 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.javacc.FastCharStream; + +/** + * @author Simon Thoresen Hult + */ +public class SimpleCharStream extends FastCharStream implements com.yahoo.schema.parser.CharStream, + com.yahoo.vespa.indexinglanguage.parser.CharStream { + + public SimpleCharStream(String input) { + super(input); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/Utils.java b/config-model/src/main/java/com/yahoo/schema/parser/Utils.java new file mode 100644 index 00000000000..cdb299c92df --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/Utils.java @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +/** + * @author bjorncs + */ +class Utils { + + private Utils() {} + + // Separate class since javacc does not accept Java code using lambdas + static int count(String str, char ch) { + return (int) str.chars().filter(c -> c == ch).count(); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AddAttributeTransformToSummaryOfImportedFields.java b/config-model/src/main/java/com/yahoo/schema/processing/AddAttributeTransformToSummaryOfImportedFields.java new file mode 100644 index 00000000000..d96cd88f6be --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AddAttributeTransformToSummaryOfImportedFields.java @@ -0,0 +1,61 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableImportedComplexSDField; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.stream.Stream; + +/** + * Adds the attribute summary transform ({@link SummaryTransform#ATTRIBUTE} to all {@link SummaryField} having an imported + * field as source. + * + * @author bjorncs + */ +public class AddAttributeTransformToSummaryOfImportedFields extends Processor { + + public AddAttributeTransformToSummaryOfImportedFields(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + schema.allImportedFields() + .forEach(field -> setTransform(field)); + } + + private Stream<SummaryField> getSummaryFieldsForImportedField(ImmutableSDField importedField) { + return schema.getSummaryFields(importedField).stream(); + } + + private void setTransform(ImmutableSDField field) { + if (field instanceof ImmutableImportedComplexSDField) { + getSummaryFieldsForImportedField(field).forEach(AddAttributeTransformToSummaryOfImportedFields::setAttributeCombinerTransform); + } else { + getSummaryFieldsForImportedField(field).forEach(AddAttributeTransformToSummaryOfImportedFields::setAttributeTransform); + } + } + + private static void setAttributeTransform(SummaryField summaryField) { + if (summaryField.getTransform() == SummaryTransform.NONE) { + summaryField.setTransform(SummaryTransform.ATTRIBUTE); + } + } + + private static void setAttributeCombinerTransform(SummaryField summaryField) { + if (summaryField.getTransform() == SummaryTransform.MATCHED_ELEMENTS_FILTER) { + summaryField.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); + } else { + summaryField.setTransform(SummaryTransform.ATTRIBUTECOMBINER); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java b/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java new file mode 100644 index 00000000000..ca81301da73 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AddExtraFieldsToDocument.java @@ -0,0 +1,90 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * This processor creates a {@link com.yahoo.schema.document.SDDocumentType} for each {@link Schema} + * object which holds all the data that search + * associates with a document described in a search definition file. This includes all extra fields, summary fields and + * implicit fields. All non-indexed and non-summary fields are discarded. + */ +public class AddExtraFieldsToDocument extends Processor { + + AddExtraFieldsToDocument(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + SDDocumentType document = schema.getDocument(); + if (document != null) { + for (SDField field : schema.extraFieldList()) { + addSdField(schema, document, field, validate); + } + for (var docsum : schema.getSummaries().values()) { + for (var summaryField : docsum.getSummaryFields().values()) { + switch (summaryField.getTransform()) { + case NONE: + case BOLDED: + case DYNAMICBOLDED: + case DYNAMICTEASER: + case TEXTEXTRACTOR: + addSummaryField(schema, document, summaryField, validate); + break; + default: + // skip: generated from attribute or similar, + // so does not need to be included as an extra + // field in the document type + } + } + } + } + } + + private void addSdField(Schema schema, SDDocumentType document, SDField field, boolean validate) { + if (! field.hasIndex() && field.getAttributes().isEmpty()) { + return; + } + for (Attribute atr : field.getAttributes().values()) { + if (!atr.getName().equals(field.getName())) { + addField(schema, document, new SDField(document, atr.getName(), atr.getDataType()), validate); + } + } + addField(schema, document, field, validate); + } + + private void addSummaryField(Schema schema, SDDocumentType document, SummaryField field, boolean validate) { + Field docField = document.getField(field.getName()); + if (docField == null) { + ImmutableSDField existingField = schema.getField(field.getName()); + if (existingField == null) { + SDField newField = new SDField(document, field.getName(), field.getDataType()); + newField.setIsExtraField(true); + document.addField(newField); + } else if (!existingField.isImportedField()) { + document.addField(existingField.asField()); + } + } else if (!docField.getDataType().equals(field.getDataType())) { + if (validate) + throw newProcessException(schema, field, "Summary field has conflicting type."); + } + } + + private void addField(Schema schema, SDDocumentType document, Field field, boolean validate) { + if (document.getField(field.getName()) != null && !(document.getField(field.getName()) == field)) { + if (validate) + throw newProcessException(schema, field, "Field shadows another."); + } + document.addField(field); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AdjustPositionSummaryFields.java b/config-model/src/main/java/com/yahoo/schema/processing/AdjustPositionSummaryFields.java new file mode 100644 index 00000000000..6c2d62f37cb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AdjustPositionSummaryFields.java @@ -0,0 +1,135 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.PositionDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryField.Source; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/* + * Adjusts position summary fields by adding derived summary fields (.distance and .position) and setting summary + * transform and source. + */ +public class AdjustPositionSummaryFields extends Processor { + + public AdjustPositionSummaryFields(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + private boolean useV8GeoPositions = false; + + @Override + public void process(boolean validate, boolean documentsOnly, ModelContext.Properties properties) { + this.useV8GeoPositions = properties.featureFlags().useV8GeoPositions(); + process(validate, documentsOnly); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (DocumentSummary summary : schema.getSummaries().values()) { + scanSummary(summary); + } + } + + private void scanSummary(DocumentSummary summary) { + for (SummaryField summaryField : summary.getSummaryFields().values()) { + if ( ! GeoPos.isAnyPos(summaryField.getDataType())) continue; + + String originalSource = summaryField.getSingleSource(); + if (originalSource.indexOf('.') == -1) { // Eliminate summary fields with pos.x or pos.y as source + ImmutableSDField sourceField = schema.getField(originalSource); + if (sourceField != null) { + String zCurve = null; + if (sourceField.getDataType().equals(summaryField.getDataType())) { + zCurve = PositionDataType.getZCurveFieldName(originalSource); + } else if (sourceField.getDataType().equals(makeZCurveDataType(summaryField.getDataType())) && + hasZCurveSuffix(originalSource)) { + zCurve = originalSource; + } + if (zCurve != null) { + if (hasPositionAttribute(zCurve)) { + Source source = new Source(zCurve); + adjustPositionField(summary, summaryField, source); + } else if (sourceField.isImportedField() || !summaryField.getName().equals(originalSource)) { + fail(summaryField, "No position attribute '" + zCurve + "'"); + } + } + } + } + } + } + + private void adjustPositionField(DocumentSummary summary, SummaryField summaryField, Source source) { + summaryField.setTransform(SummaryTransform.GEOPOS); + summaryField.getSources().clear(); + summaryField.addSource(source); + ensureSummaryField(summary, + PositionDataType.getPositionSummaryFieldName(summaryField.getName()), + DataType.getArray(DataType.STRING), + source, + SummaryTransform.POSITIONS); + ensureSummaryField(summary, + PositionDataType.getDistanceSummaryFieldName(summaryField.getName()), + DataType.INT, + source, + SummaryTransform.DISTANCE); + } + + private void ensureSummaryField(DocumentSummary summary, String fieldName, DataType dataType, Source source, SummaryTransform transform) { + SummaryField oldField = schema.getSummaryField(fieldName); + if (oldField == null) { + if (useV8GeoPositions) return; + SummaryField newField = new SummaryField(fieldName, dataType, transform); + newField.addSource(source); + summary.add(newField); + return; + } + if (!oldField.getDataType().equals(dataType)) { + fail(oldField, "exists with type '" + oldField.getDataType().toString() + "', should be of type '" + dataType.toString() + "'"); + } + if (oldField.getTransform() != transform) { + fail(oldField, "has summary transform '" + oldField.getTransform().toString() + "', should have transform '" + transform.toString() + "'"); + } + if (oldField.getSourceCount() != 1 || !oldField.getSingleSource().equals(source.getName())) { + fail(oldField, "has source '" + oldField.getSources().toString() + "', should have source '" + source + "'"); + } + if (useV8GeoPositions) return; + summary.add(oldField); + } + + private boolean hasPositionAttribute(String name) { + Attribute attribute = schema.getAttribute(name); + if (attribute == null) { + ImmutableSDField field = schema.getField(name); + if (field != null && field.isImportedField()) { + attribute = field.getAttribute(); + } + } + return attribute != null && attribute.isPosition(); + } + + private static boolean hasZCurveSuffix(String name) { + String suffix = PositionDataType.getZCurveFieldName(""); + return name.length() > suffix.length() && name.substring(name.length() - suffix.length()).equals(suffix); + } + + private static DataType makeZCurveDataType(DataType dataType) { + return dataType instanceof ArrayDataType ? DataType.getArray(DataType.LONG) : DataType.LONG; + } + + private void fail(SummaryField summaryField, String msg) { + throw newProcessException(schema.getName(), summaryField.getName(), msg); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AttributeProperties.java b/config-model/src/main/java/com/yahoo/schema/processing/AttributeProperties.java new file mode 100644 index 00000000000..6c7dbaecbfb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AttributeProperties.java @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Checks that attribute properties only are set for attributes that have data (are created by an indexing statement). + * + * @author hmusum + */ +public class AttributeProperties extends Processor { + + public AttributeProperties(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (ImmutableSDField field : schema.allConcreteFields()) { + String fieldName = field.getName(); + + // For each attribute, check if the attribute has been created + // by an indexing statement. + for (Attribute attribute : field.getAttributes().values()) { + if (attributeCreated(field, attribute.getName())) { + continue; + } + // Check other fields or statements that may have created this attribute. + boolean created = false; + for (SDField f : schema.allConcreteFields()) { + // Checking against the field we are looking at + if (!f.getName().equals(fieldName)) { + if (attributeCreated(f, attribute.getName())) { + created = true; + break; + } + } + } + if (validate && !created) { + throw new IllegalArgumentException("Attribute '" + attribute.getName() + "' in field '" + + field.getName() + "' is not created by the indexing statement"); + } + } + } + } + + /** + * Checks if the attribute has been created bye an indexing statement in this field. + * + * @param field a searchdefinition field + * @param attributeName name of the attribute + * @return true if the attribute has been created by this field, else false + */ + static boolean attributeCreated(ImmutableSDField field, String attributeName) { + if ( ! field.doesAttributing()) { + return false; + } + for (Attribute attribute : field.getAttributes().values()) { + if (attribute.getName().equals(attributeName)) { + return true; + } + } + return false; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java b/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java new file mode 100644 index 00000000000..415f23f2786 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/AttributesImplicitWord.java @@ -0,0 +1,57 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.MatchType; +import com.yahoo.document.NumericDataType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Fields that derive to attribute(s) and no indices should use the WORD indexing form, + * in a feeble attempt to match the most peoples expectations as closely as possible. + * + * @author Vegard Havdal + */ +public class AttributesImplicitWord extends Processor { + + public AttributesImplicitWord(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (ImmutableSDField field : schema.allConcreteFields()) { + processFieldRecursive(field); + } + } + + private void processFieldRecursive(ImmutableSDField field) { + processField(field); + for (ImmutableSDField structField : field.getStructFields()) { + processFieldRecursive(structField); + } + } + + private void processField(ImmutableSDField field) { + if (fieldImplicitlyWordMatch(field)) { + field.getMatching().setType(MatchType.WORD); + } + } + + private boolean fieldImplicitlyWordMatch(ImmutableSDField field) { + // numeric types should not trigger exact-match query parsing + DataType dt = field.getDataType().getPrimitiveType(); + if (dt != null && dt instanceof NumericDataType) { + return false; + } + return (! field.hasIndex() + && !field.getAttributes().isEmpty() + && field.getIndices().isEmpty() + && !field.getMatching().isTypeUserSet()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/Bolding.java b/config-model/src/main/java/com/yahoo/schema/processing/Bolding.java new file mode 100644 index 00000000000..53a3d462d54 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/Bolding.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Checks that bolding or dynamic summary is turned on only for text fields. Throws exception if it is turned on for any + * other fields (otherwise will cause indexing failure) + * + * @author hmusum + */ +public class Bolding extends Processor { + + public Bolding(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + for (ImmutableSDField field : schema.allConcreteFields()) { + for (SummaryField summary : field.getSummaryFields().values()) { + if (summary.getTransform().isBolded() && + !((summary.getDataType() == DataType.STRING) || (summary.getDataType() == DataType.URI))) + { + throw new IllegalArgumentException("'bolding: on' for non-text field " + + "'" + field.getName() + "'" + + " (" + summary.getDataType() + ")" + + " is not allowed"); + } else if (summary.getTransform().isDynamic() && + !((summary.getDataType() == DataType.STRING) || (summary.getDataType() == DataType.URI))) + { + throw new IllegalArgumentException("'summary: dynamic' for non-text field " + + "'" + field.getName() + "'" + + " (" + summary.getDataType() + ")" + + " is not allowed"); + } + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/BoolAttributeValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/BoolAttributeValidator.java new file mode 100644 index 00000000000..bdb1eed4b10 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/BoolAttributeValidator.java @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Validates attribute fields using bool type, ensuring the collection type is supported. + * + * Currently, only the single value bool type is supported. + * + * @author geirst + */ +public class BoolAttributeValidator extends Processor { + + public BoolAttributeValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (var field : schema.allConcreteFields()) { + var attribute = field.getAttribute(); + if (attribute == null) { + continue; + } + if (attribute.getType().equals(Attribute.Type.BOOL) && + !attribute.getCollectionType().equals(Attribute.CollectionType.SINGLE)) { + fail(schema, field, "Only single value bool attribute fields are supported"); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/BuiltInFieldSets.java b/config-model/src/main/java/com/yahoo/schema/processing/BuiltInFieldSets.java new file mode 100644 index 00000000000..514cbf225fd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/BuiltInFieldSets.java @@ -0,0 +1,52 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DocumentType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Adds field sets for 1) fields defined inside document type 2) fields inside search but outside document + * + * @author Vegard Havdal + */ +public class BuiltInFieldSets extends Processor { + + public static final String SEARCH_FIELDSET_NAME = "[search]"; // Public due to oddities in position handling. + public static final String INTERNAL_FIELDSET_NAME = "[internal]"; // This one populated from misc places + + public BuiltInFieldSets(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + addDocumentFieldSet(); + addSearchFieldSet(); + // "Hook" the field sets on search onto the document types, since we will include them + // on the document configs + schema.getDocument().setFieldSets(schema.fieldSets()); + } + + private void addSearchFieldSet() { + for (SDField searchField : schema.extraFieldList()) { + schema.fieldSets().addBuiltInFieldSetItem(SEARCH_FIELDSET_NAME, searchField.getName()); + } + } + + private void addDocumentFieldSet() { + for (Field docField : schema.getDocument().fieldSet()) { + if (docField instanceof SDField && ((SDField) docField).isExtraField()) { + continue; // skip + } + schema.fieldSets().addBuiltInFieldSetItem(DocumentType.DOCUMENT, docField.getName()); + } + } + + + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/CreatePositionZCurve.java b/config-model/src/main/java/com/yahoo/schema/processing/CreatePositionZCurve.java new file mode 100644 index 00000000000..5bb5079fab6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/CreatePositionZCurve.java @@ -0,0 +1,216 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.PositionDataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.logging.Level; + +/** + * Adds a "fieldName_zcurve" long attribute and "fieldName.distance" and "FieldName.position" summary fields to all position type fields. + * + * @author bratseth + */ +public class CreatePositionZCurve extends Processor { + + private final SDDocumentType repo; + + public CreatePositionZCurve(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + this.repo = schema.getDocument(); + } + + private boolean useV8GeoPositions = false; + + @Override + public void process(boolean validate, boolean documentsOnly, ModelContext.Properties properties) { + this.useV8GeoPositions = properties.featureFlags().useV8GeoPositions(); + process(validate, documentsOnly); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + DataType fieldType = field.getDataType(); + if ( ! isSupportedPositionType(fieldType)) continue; + + if (validate && field.doesIndexing()) { + fail(schema, field, "Indexing of data type '" + fieldType.getName() + "' is not supported, " + + "replace 'index' statement with 'attribute'."); + } + + if ( ! field.doesAttributing()) continue; + + boolean doesSummary = field.doesSummarying(); + + String fieldName = field.getName(); + field.getAttributes().remove(fieldName); + + String zName = PositionDataType.getZCurveFieldName(fieldName); + SDField zCurveField = createZCurveField(field, zName, validate); + schema.addExtraField(zCurveField); + schema.fieldSets().addBuiltInFieldSetItem(BuiltInFieldSets.INTERNAL_FIELDSET_NAME, zCurveField.getName()); + + // configure summary + Collection<String> summaryTo = removeSummaryTo(field); + if (! useV8GeoPositions) { + ensureCompatibleSummary(field, zName, + PositionDataType.getPositionSummaryFieldName(fieldName), + DataType.getArray(DataType.STRING), // will become "xmlstring" + SummaryTransform.POSITIONS, summaryTo, validate); + ensureCompatibleSummary(field, zName, + PositionDataType.getDistanceSummaryFieldName(fieldName), + DataType.INT, + SummaryTransform.DISTANCE, summaryTo, validate); + } + // clear indexing script + field.setIndexingScript(null); + SDField posX = field.getStructField(PositionDataType.FIELD_X); + if (posX != null) { + posX.setIndexingScript(null); + } + SDField posY = field.getStructField(PositionDataType.FIELD_Y); + if (posY != null) { + posY.setIndexingScript(null); + } + if (doesSummary) ensureCompatibleSummary(field, zName, + field.getName(), + field.getDataType(), + SummaryTransform.GEOPOS, summaryTo, validate); + } + } + + private SDField createZCurveField(SDField inputField, String fieldName, boolean validate) { + if (validate && schema.getConcreteField(fieldName) != null || schema.getAttribute(fieldName) != null) { + throw newProcessException(schema, null, "Incompatible position attribute '" + fieldName + + "' already created."); + } + boolean isArray = inputField.getDataType() instanceof ArrayDataType; + SDField field = new SDField(repo, fieldName, isArray ? DataType.getArray(DataType.LONG) : DataType.LONG); + Attribute attribute = new Attribute(fieldName, Attribute.Type.LONG, isArray ? Attribute.CollectionType.ARRAY : + Attribute.CollectionType.SINGLE); + attribute.setPosition(true); + attribute.setFastSearch(true); + field.addAttribute(attribute); + + ScriptExpression script = inputField.getIndexingScript(); + script = (ScriptExpression)new RemoveSummary(inputField.getName()).convert(script); + script = (ScriptExpression)new PerformZCurve(field, fieldName).convert(script); + field.setIndexingScript(script); + return field; + } + + private void ensureCompatibleSummary(SDField field, String sourceName, String summaryName, DataType summaryType, + SummaryTransform summaryTransform, Collection<String> summaryTo, boolean validate) { + SummaryField summary = schema.getSummaryField(summaryName); + if (summary == null) { + summary = new SummaryField(summaryName, summaryType, summaryTransform); + summary.addDestination("default"); + summary.addDestinations(summaryTo); + field.addSummaryField(summary); + } else if (!summary.getDataType().equals(summaryType)) { + if (validate) + fail(schema, field, "Incompatible summary field '" + summaryName + "' type " + summary.getDataType() + " already created."); + } else if (summary.getTransform() == SummaryTransform.NONE) { + summary.setTransform(summaryTransform); + summary.addDestination("default"); + summary.addDestinations(summaryTo); + } else if (summary.getTransform() != summaryTransform) { + deployLogger.logApplicationPackage(Level.WARNING, "Summary field " + summaryName + " has wrong transform: " + summary.getTransform()); + return; + } + SummaryField.Source source = new SummaryField.Source(sourceName); + summary.getSources().clear(); + summary.addSource(source); + } + + private Set<String> removeSummaryTo(SDField field) { + Set<String> summaryTo = new HashSet<>(); + Collection<SummaryField> summaryFields = field.getSummaryFields().values(); + for (SummaryField summary : summaryFields) { + summaryTo.addAll(summary.getDestinations()); + } + field.removeSummaryFields(); + return summaryTo; + } + + private static boolean isSupportedPositionType(DataType dataType) { + return GeoPos.isAnyPos(dataType); + } + + private static class RemoveSummary extends ExpressionConverter { + + final String find; + + RemoveSummary(String find) { + this.find = find; + } + + @Override + protected boolean shouldConvert(Expression exp) { + if (!(exp instanceof SummaryExpression)) { + return false; + } + String fieldName = ((SummaryExpression)exp).getFieldName(); + return fieldName == null || fieldName.equals(find); + } + + @Override + protected Expression doConvert(Expression exp) { + return null; + } + } + + private static class PerformZCurve extends ExpressionConverter { + + final String find; + final String replace; + final boolean isArray; + + PerformZCurve(SDField find, String replace) { + this.find = find.getName(); + this.replace = replace; + this.isArray = find.getDataType() instanceof ArrayDataType; + } + + @Override + protected boolean shouldConvert(Expression exp) { + if (!(exp instanceof AttributeExpression)) { + return false; + } + String fieldName = ((AttributeExpression)exp).getFieldName(); + return fieldName == null || fieldName.equals(find); + } + + @Override + protected Expression doConvert(Expression exp) { + return new StatementExpression( + isArray ? new ForEachExpression(new ZCurveExpression()) : + new ZCurveExpression(), new AttributeExpression(replace)); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/DictionaryProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/DictionaryProcessor.java new file mode 100644 index 00000000000..3209fd1703d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/DictionaryProcessor.java @@ -0,0 +1,54 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.NumericDataType; +import com.yahoo.document.PrimitiveDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.Dictionary; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Propagates dictionary settings from field level to attribute level. + * Only applies to numeric fields with fast-search enabled. + * + * @author baldersheim + */ +public class DictionaryProcessor extends Processor { + public DictionaryProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + Attribute attribute = field.getAttribute(); + if (attribute == null) continue; + attribute.setCase(field.getMatching().getCase()); + Dictionary dictionary = field.getDictionary(); + if (dictionary == null) continue; + if (attribute.getDataType().getPrimitiveType() instanceof NumericDataType ) { + if (attribute.isFastSearch()) { + attribute.setDictionary(dictionary); + } else { + fail(schema, field, "You must specify 'attribute:fast-search' to allow dictionary control"); + } + } else if (attribute.getDataType().getPrimitiveType() == PrimitiveDataType.STRING) { + attribute.setDictionary(dictionary); + if (dictionary.getType() == Dictionary.Type.HASH) { + if (dictionary.getMatch() != Case.CASED) { + fail(schema, field, "hash dictionary require cased match"); + } + } + if (! dictionary.getMatch().equals(attribute.getCase())) { + fail(schema, field, "Dictionary casing '" + dictionary.getMatch() + "' does not match field match casing '" + attribute.getCase() + "'"); + } + } else { + fail(schema, field, "You can only specify 'dictionary:' for numeric or string fields"); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/DisallowComplexMapAndWsetKeyTypes.java b/config-model/src/main/java/com/yahoo/schema/processing/DisallowComplexMapAndWsetKeyTypes.java new file mode 100644 index 00000000000..a5b4ca9a71f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/DisallowComplexMapAndWsetKeyTypes.java @@ -0,0 +1,57 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.Field; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.document.MapDataType; +import com.yahoo.document.PrimitiveDataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Non-primitive key types for map and weighted set forbidden (though OK in document model) + * + * @author Vegard Havdal + */ +public class DisallowComplexMapAndWsetKeyTypes extends Processor { + + public DisallowComplexMapAndWsetKeyTypes(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + // TODO also traverse struct types to search for bad map or wset types. + // Do this after document manager is fixed, do not start using the static stuff on SDDocumentTypes any more. + for (SDField field : schema.allConcreteFields()) { + checkFieldType(field, field.getDataType()); + } + } + + private void checkFieldType(Field field, DataType dataType) { + if (dataType instanceof ArrayDataType) { + DataType nestedType = ((ArrayDataType) dataType).getNestedType(); + checkFieldType(field, nestedType); + } else if (dataType instanceof WeightedSetDataType) { + DataType nestedType = ((WeightedSetDataType) dataType).getNestedType(); + if ( ! (nestedType instanceof PrimitiveDataType)) { + fail(schema, field, "Weighted set must have a primitive key type."); + } + } else if (dataType instanceof MapDataType) { + DataType keyType = ((MapDataType) dataType).getKeyType(); + if ( ! (keyType instanceof PrimitiveDataType)) { + fail(schema, field, "Map key type must be a primitive type."); + } + checkFieldType(field, ((MapDataType) dataType).getValueType()); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/DiversitySettingsValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/DiversitySettingsValidator.java new file mode 100644 index 00000000000..0400292c7e5 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/DiversitySettingsValidator.java @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * @author baldersheim + */ +public class DiversitySettingsValidator extends Processor { + + public DiversitySettingsValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + if (documentsOnly) return; + + for (RankProfile rankProfile : rankProfileRegistry.rankProfilesOf(schema)) { + if (rankProfile.getMatchPhaseSettings() != null && rankProfile.getMatchPhaseSettings().getDiversity() != null) { + validate(rankProfile, rankProfile.getMatchPhaseSettings().getDiversity()); + } + } + } + private void validate(RankProfile rankProfile, RankProfile.DiversitySettings settings) { + String attributeName = settings.getAttribute(); + new AttributeValidator(schema.getName(), rankProfile.name(), + schema.getAttribute(attributeName), attributeName).validate(); + } + + private static class AttributeValidator extends MatchPhaseSettingsValidator.AttributeValidator { + + public AttributeValidator(String searchName, String rankProfileName, Attribute attribute, String attributeName) { + super(searchName, rankProfileName, attribute, attributeName); + } + + protected void validateThatAttributeIsSingleAndNotPredicate() { + if ( ! attribute.getCollectionType().equals(Attribute.CollectionType.SINGLE) || + attribute.getType().equals(Attribute.Type.PREDICATE)) + { + failValidation("must be single value numeric, or enumerated attribute, but it is '" + + attribute.getDataType().getName() + "'"); + } + } + + @Override + public void validate() { + validateThatAttributeExists(); + validateThatAttributeIsSingleAndNotPredicate(); + } + + @Override + public String getValidationType() { + return "diversity"; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java new file mode 100644 index 00000000000..aa2d8293cac --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ExactMatch.java @@ -0,0 +1,109 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.indexinglanguage.ExpressionSearcher; +import com.yahoo.vespa.indexinglanguage.expressions.ExactExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * The implementation of exact matching + * + * @author bratseth + */ +public class ExactMatch extends Processor { + + public static final String DEFAULT_EXACT_TERMINATOR = "@@"; + + ExactMatch(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + processField(field, schema); + } + } + + private void processField(SDField field, Schema schema) { + MatchType matching = field.getMatching().getType(); + if (matching.equals(MatchType.EXACT) || matching.equals(MatchType.WORD)) { + implementExactMatch(field, schema); + } else if (field.getMatching().getExactMatchTerminator() != null) { + warn(schema, field, "exact-terminator requires 'exact' matching to have any effect."); + } + for (var structField : field.getStructFields()) { + processField(structField, schema); + } + } + + private void implementExactMatch(SDField field, Schema schema) { + field.setStemming(Stemming.NONE); + field.getNormalizing().inferLowercase(); + + if (field.getMatching().getType().equals(MatchType.WORD)) { + field.addQueryCommand("word"); + } else { // exact + String exactTerminator = DEFAULT_EXACT_TERMINATOR; + if (field.getMatching().getExactMatchTerminator() != null + && ! field.getMatching().getExactMatchTerminator().equals("")) { + exactTerminator = field.getMatching().getExactMatchTerminator(); + } else { + info(schema, field, + "With 'exact' matching, an exact-terminator is needed," + + " using default value '" + exactTerminator +"' as terminator"); + } + field.addQueryCommand("exact " + exactTerminator); + + // The following part illustrates how nice it would have been with canonical representation of indices + if (field.doesIndexing()) { + exactMatchSettingsForField(field); + } + } + ScriptExpression script = field.getIndexingScript(); + if (new ExpressionSearcher<>(IndexExpression.class).containedIn(script)) { + field.setIndexingScript((ScriptExpression)new MyProvider(schema).convert(field.getIndexingScript())); + } + } + + private void exactMatchSettingsForField(SDField field) { + field.getRanking().setFilter(true); + } + + private static class MyProvider extends TypedTransformProvider { + + MyProvider(Schema schema) { + super(ExactExpression.class, schema); + } + + @Override + protected boolean requiresTransform(Expression exp, DataType fieldType) { + return exp instanceof OutputExpression; + } + + @Override + protected Expression newTransform(DataType fieldType) { + Expression exp = new ExactExpression(); + if (fieldType instanceof CollectionDataType) { + exp = new ForEachExpression(exp); + } + return exp; + } + + } + +} + diff --git a/config-model/src/main/java/com/yahoo/schema/processing/FastAccessValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/FastAccessValidator.java new file mode 100644 index 00000000000..224000e6b64 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/FastAccessValidator.java @@ -0,0 +1,54 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.stream.Collectors; + +/** + * Validates the use of the fast-access property. + * + * @author bjorncs + */ +public class FastAccessValidator extends Processor { + + public FastAccessValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + String invalidAttributes = schema.allFields() + .flatMap(field -> field.getAttributes().values().stream()) + .filter(FastAccessValidator::isIncompatibleAttribute) + .map(Attribute::getName) + .collect(Collectors.joining(", ")); + if ( ! invalidAttributes.isEmpty()) { + throw new IllegalArgumentException( + "For " + schema + ": The following attributes have a type that is incompatible with fast-access: " + + invalidAttributes + ". Predicate, tensor and reference attributes are incompatible with fast-access."); + } + } + + private static boolean isIncompatibleAttribute(Attribute attribute) { + return attribute.isFastAccess() && isTypeIncompatibleWithFastAccess(attribute.getType()); + } + + private static boolean isTypeIncompatibleWithFastAccess(Attribute.Type type) { + switch (type) { + case PREDICATE: + case TENSOR: + case REFERENCE: + return true; + default: + return false; + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java b/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java new file mode 100644 index 00000000000..f0c59ece1bf --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/FieldSetSettings.java @@ -0,0 +1,107 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.FieldSet; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.NormalizeLevel; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Computes the right "index commands" for each fieldset in a search definition. + * + * @author vegardh + * @author bratseth + */ +// See also IndexInfo.addFieldSetCommands, which does more of this in a complicated way. +// That should be moved here, and done in the way the match setting is done below +// (this requires adding normalizing and stemming settings to FieldSet). +public class FieldSetSettings extends Processor { + + public FieldSetSettings(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (FieldSet fieldSet : schema.fieldSets().userFieldSets().values()) { + if (validate) + checkFieldNames(schema, fieldSet); + checkMatching(schema, fieldSet); + checkNormalization(schema, fieldSet); + checkStemming(schema, fieldSet); + } + } + + private void checkFieldNames(Schema schema, FieldSet fieldSet) { + for (String field : fieldSet.getFieldNames()) { + if (schema.getField(field) == null) + throw new IllegalArgumentException("For " + schema + ": Field '" + field + "' in " + + fieldSet + " does not exist."); + } + } + + private void checkMatching(Schema schema, FieldSet fieldSet) { + Matching matching = fieldSet.getMatching(); + for (String fieldName : fieldSet.getFieldNames()) { + ImmutableSDField field = schema.getField(fieldName); + Matching fieldMatching = field.getMatching(); + if (matching == null) { + matching = fieldMatching; + } else { + if ( ! matching.equals(fieldMatching)) { + warn(schema, field.asField(), + "The matching settings for the fields in " + fieldSet + " are inconsistent " + + "(explicitly or because of field type). This may lead to recall and ranking issues."); + return; + } + } + } + fieldSet.setMatching(matching); // Assign the uniquely determined matching to the field set + } + + private void checkNormalization(Schema schema, FieldSet fieldSet) { + NormalizeLevel.Level normalizing = null; + for (String fieldName : fieldSet.getFieldNames()) { + ImmutableSDField field = schema.getField(fieldName); + NormalizeLevel.Level fieldNorm = field.getNormalizing().getLevel(); + if (normalizing == null) { + normalizing = fieldNorm; + } else { + if ( ! normalizing.equals(fieldNorm)) { + warn(schema, field.asField(), + "The normalization settings for the fields in " + fieldSet + " are inconsistent " + + "(explicitly or because of field type). This may lead to recall and ranking issues."); + return; + } + } + } + } + + private void checkStemming(Schema schema, FieldSet fieldSet) { + Stemming stemming = null; + for (String fieldName : fieldSet.getFieldNames()) { + ImmutableSDField field = schema.getField(fieldName); + Stemming fieldStemming = field.getStemming(); + if (stemming == null) { + stemming = fieldStemming; + } else { + if ( ! stemming.equals(fieldStemming)) { + warn(schema, field.asField(), + "The stemming settings for the fields in the fieldset '"+fieldSet.getName()+ + "' are inconsistent (explicitly or because of field type). " + + "This may lead to recall and ranking issues."); + return; + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/FilterFieldNames.java b/config-model/src/main/java/com/yahoo/schema/processing/FilterFieldNames.java new file mode 100644 index 00000000000..28973c82d42 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/FilterFieldNames.java @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.RankProfile; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.logging.Level; + +/** + * Takes the fields and indexes that are of type rank filter, and stores those names on all rank profiles + * + * @author Vegard Havdal + */ +public class FilterFieldNames extends Processor { + + public FilterFieldNames(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + + for (SDField f : schema.allConcreteFields()) { + if (f.getRanking().isFilter()) { + filterField(f.getName()); + } + } + + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + Set<String> filterFields = new LinkedHashSet<>(); + findFilterFields(schema, profile, filterFields); + for (Iterator<String> itr = filterFields.iterator(); itr.hasNext(); ) { + String fieldName = itr.next(); + profile.filterFields().add(fieldName); + profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.RANKTYPE, RankType.EMPTY); + } + } + } + + private void filterField(String f) { + for (RankProfile rp : rankProfileRegistry.rankProfilesOf(schema)) { + rp.filterFields().add(f); + } + } + + private void findFilterFields(Schema schema, RankProfile profile, Set<String> filterFields) { + for (Iterator<RankProfile.RankSetting> itr = profile.declaredRankSettingIterator(); itr.hasNext(); ) { + RankProfile.RankSetting setting = itr.next(); + if (setting.getType().equals(RankProfile.RankSetting.Type.PREFERBITVECTOR) && ((Boolean)setting.getValue())) + { + String fieldName = setting.getFieldName(); + if (schema.getConcreteField(fieldName) != null) { + if ( ! profile.filterFields().contains(fieldName)) { + filterFields.add(fieldName); + } + } else { + deployLogger.logApplicationPackage(Level.WARNING, "For rank profile '" + profile.name() + "': Cannot apply rank filter setting to unexisting field '" + fieldName + "'"); + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaries.java b/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaries.java new file mode 100644 index 00000000000..4080e37003f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaries.java @@ -0,0 +1,232 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import java.util.logging.Level; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.document.PositionDataType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isComplexFieldWithOnlyStructFieldAttributes; + +/** + * Makes implicitly defined summaries into explicit summaries + * + * @author bratseth + */ +public class ImplicitSummaries extends Processor { + + public ImplicitSummaries(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + DocumentSummary defaultSummary = schema.getSummariesInThis().get("default"); + if (defaultSummary == null) { + defaultSummary = new DocumentSummary("default", schema); + defaultSummary.setFromDisk(true); // As we add documentid to this + schema.addSummary(defaultSummary); + } + + for (SDField field : schema.allConcreteFields()) { + collectSummaries(field, schema, validate); + } + for (DocumentSummary documentSummary : schema.getSummaries().values()) { + documentSummary.purgeImplicits(); + } + } + + private void addSummaryFieldSources(SummaryField summaryField, SDField sdField) { + sdField.addSummaryFieldSources(summaryField); + } + + private void collectSummaries(SDField field, Schema schema, boolean validate) { + SummaryField addedSummaryField = null; + + // Implicit + String fieldName = field.getName(); + SummaryField fieldSummaryField = field.getSummaryField(fieldName); + if (fieldSummaryField == null && field.doesSummarying()) { + fieldSummaryField = new SummaryField(fieldName, field.getDataType()); + fieldSummaryField.setImplicit(true); + addSummaryFieldSources(fieldSummaryField, field); + fieldSummaryField.addDestination("default"); + field.addSummaryField(fieldSummaryField); + addedSummaryField = fieldSummaryField; + } + if (fieldSummaryField != null) { + for (String dest : fieldSummaryField.getDestinations()) { + DocumentSummary summary = schema.getSummariesInThis().get(dest); + if (summary != null) { + summary.add(fieldSummaryField); + } + } + } + + // Attribute prefetch + for (Attribute attribute : field.getAttributes().values()) { + if (attribute.getName().equals(fieldName)) { + if (addedSummaryField != null) { + addedSummaryField.setTransform(SummaryTransform.ATTRIBUTE); + } + if (attribute.isPrefetch()) { + addPrefetchAttribute(attribute, field, schema); + } + } + } + + if (addedSummaryField != null && isComplexFieldWithOnlyStructFieldAttributes(field)) { + addedSummaryField.setTransform(SummaryTransform.ATTRIBUTECOMBINER); + } + + // Position attributes + if (field.doesSummarying()) { + for (Attribute attribute : field.getAttributes().values()) { + if ( ! attribute.isPosition()) continue; + var distField = field.getSummaryField(PositionDataType.getDistanceSummaryFieldName(fieldName)); + if (distField != null) { + DocumentSummary attributePrefetchSummary = getOrCreateAttributePrefetchSummary(schema); + attributePrefetchSummary.add(distField); + } + var posField = field.getSummaryField(PositionDataType.getPositionSummaryFieldName(fieldName)); + if (posField != null) { + DocumentSummary attributePrefetchSummary = getOrCreateAttributePrefetchSummary(schema); + attributePrefetchSummary.add(posField); + } + } + } + + // Explicits + for (SummaryField summaryField : field.getSummaryFields().values()) { + // Make sure we fetch from attribute here too + Attribute attribute = field.getAttributes().get(fieldName); + if (attribute != null && summaryField.getTransform() == SummaryTransform.NONE) { + summaryField.setTransform(SummaryTransform.ATTRIBUTE); + } + if (isValid(summaryField, schema, validate)) { + addToDestinations(summaryField, schema); + } + } + + } + + private DocumentSummary getOrCreateAttributePrefetchSummary(Schema schema) { + DocumentSummary summary = schema.getSummariesInThis().get("attributeprefetch"); + if (summary == null) { + summary = new DocumentSummary("attributeprefetch", schema); + schema.addSummary(summary); + } + return summary; + } + + + private void addPrefetchAttribute(Attribute attribute, SDField field, Schema schema) { + if (attribute.getPrefetchValue() == null) { // Prefetch by default - unless any summary makes this dynamic + // Check if there is an implicit dynamic definition + SummaryField fieldSummaryField = field.getSummaryField(attribute.getName()); + if (fieldSummaryField != null && fieldSummaryField.getTransform().isDynamic()) return; + + // Check if an explicit class makes it dynamic (first is enough, as all must be the same, checked later) + SummaryField explicitSummaryField = schema.getExplicitSummaryField(attribute.getName()); + if (explicitSummaryField != null && explicitSummaryField.getTransform().isDynamic()) return; + } + + DocumentSummary summary = getOrCreateAttributePrefetchSummary(schema); + SummaryField attributeSummaryField = new SummaryField(attribute.getName(), attribute.getDataType()); + attributeSummaryField.addSource(attribute.getName()); + attributeSummaryField.addDestination("attributeprefetch"); + attributeSummaryField.setTransform(SummaryTransform.ATTRIBUTE); + summary.add(attributeSummaryField); + } + + // Returns whether this is valid. Warns if invalid and ignorable. Throws if not ignorable. + private boolean isValid(SummaryField summaryField, Schema schema, boolean validate) { + if (summaryField.getTransform() == SummaryTransform.DISTANCE || + summaryField.getTransform() == SummaryTransform.POSITIONS) { + int sourceCount = summaryField.getSourceCount(); + if (validate && sourceCount != 1) { + throw newProcessException(schema.getName(), summaryField.getName(), + "Expected 1 source field, got " + sourceCount + "."); + } + String sourceName = summaryField.getSingleSource(); + if (validate && schema.getAttribute(sourceName) == null) { + throw newProcessException(schema.getName(), summaryField.getName(), + "Summary source attribute '" + sourceName + "' not found."); + } + return true; + } + + String fieldName = summaryField.getSourceField(); + SDField sourceField = schema.getConcreteField(fieldName); + if (validate && sourceField == null) { + throw newProcessException(schema, summaryField, "Source field '" + fieldName + "' does not exist."); + } + if (! sourceField.doesSummarying() && + summaryField.getTransform() != SummaryTransform.ATTRIBUTE && + summaryField.getTransform() != SummaryTransform.GEOPOS) + { + // Summary transform attribute may indicate that the ilscript was rewritten to remove summary + // by another search that uses this same field in inheritance. + deployLogger.logApplicationPackage(Level.WARNING, "Ignoring " + summaryField + ": " + sourceField + + " is not creating a summary value in its indexing statement"); + return false; + } + + if (summaryField.getTransform().isDynamic() + && summaryField.getName().equals(sourceField.getName()) + && sourceField.doesAttributing()) { + Attribute attribute = sourceField.getAttributes().get(sourceField.getName()); + if (attribute != null) { + String destinations = "document summary 'default'"; + if (summaryField.getDestinations().size() >0) { + destinations = "document summaries " + summaryField.getDestinations(); + } + deployLogger.logApplicationPackage(Level.WARNING, + "Will fetch the disk summary value of " + sourceField + " in " + destinations + + " since this summary field uses a dynamic summary value (snippet/bolding): Dynamic summaries and bolding " + + "is not supported with summary values fetched from in-memory attributes yet. If you want to see partial updates " + + "to this attribute, remove any bolding and dynamic snippeting from this field"); + // Note: The dynamic setting has already overridden the attribute map setting, + // so we do not need to actually do attribute.setSummary(false) here + // Also, we can not do this, since it makes it impossible to fetch this attribute + // in another summary + } + } + + return true; + } + + private void addToDestinations(SummaryField summaryField, Schema schema) { + if (summaryField.getDestinations().size() == 0) { + addToDestination("default", summaryField, schema); + } + else { + for (String destinationName : summaryField.getDestinations()) { + addToDestination(destinationName, summaryField, schema); + } + } + } + + private void addToDestination(String destinationName, SummaryField summaryField, Schema schema) { + DocumentSummary destination = schema.getSummariesInThis().get(destinationName); + if (destination == null) { + destination = new DocumentSummary(destinationName, schema); + schema.addSummary(destination); + destination.add(summaryField); + } + else { + SummaryField existingField= destination.getSummaryField(summaryField.getName()); + SummaryField merged = summaryField.mergeWith(existingField); + destination.add(merged); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaryFields.java b/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaryFields.java new file mode 100644 index 00000000000..b17efbfe8e8 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ImplicitSummaryFields.java @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * This processor adds all implicit summary fields to all registered document summaries. If another field has already + * been registered with one of the implicit names, this processor will throw an {@link IllegalStateException}. + */ +public class ImplicitSummaryFields extends Processor { + + public ImplicitSummaryFields(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (DocumentSummary docsum : schema.getSummariesInThis().values()) { + if (docsum.inherited().isPresent()) continue; // Implicit fields are added to inheriting summaries through their parent + addField(docsum, new SummaryField("rankfeatures", DataType.STRING, SummaryTransform.RANKFEATURES), validate); + addField(docsum, new SummaryField("summaryfeatures", DataType.STRING, SummaryTransform.SUMMARYFEATURES), validate); + } + } + + private void addField(DocumentSummary docsum, SummaryField field, boolean validate) { + if (validate && docsum.getSummaryField(field.getName()) != null) { + throw new IllegalArgumentException("Summary class '" + docsum.getName() + "' uses reserved field name '" + + field.getName() + "'."); + } + docsum.add(field); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ImportedFieldsResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/ImportedFieldsResolver.java new file mode 100644 index 00000000000..ee465be44f2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ImportedFieldsResolver.java @@ -0,0 +1,207 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.document.PositionDataType; +import com.yahoo.schema.DocumentReference; +import com.yahoo.schema.DocumentReferences; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.ImportedComplexField; +import com.yahoo.schema.document.ImportedField; +import com.yahoo.schema.document.ImportedFields; +import com.yahoo.schema.document.ImportedSimpleField; +import com.yahoo.schema.document.TemporaryImportedField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isArrayOfSimpleStruct; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfPrimitiveType; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isMapOfSimpleStruct; + +/** + * Iterates all imported fields from schema parsing and validates and resolves them into concrete fields from referenced document types. + * + * @author geirst + */ +public class ImportedFieldsResolver extends Processor { + + private final Map<String, ImportedField> importedFields = new LinkedHashMap<>(); + private final Optional<DocumentReferences> references; + + public ImportedFieldsResolver(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + references = schema.getDocument().getDocumentReferences(); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + schema.temporaryImportedFields().get().fields().forEach((name, field) -> resolveImportedField(field, validate)); + schema.setImportedFields(new ImportedFields(importedFields)); + } + + private void resolveImportedField(TemporaryImportedField importedField, boolean validate) { + DocumentReference reference = validateDocumentReference(importedField); + ImmutableSDField targetField = getTargetField(importedField, reference); + if (GeoPos.isAnyPos(targetField)) { + resolveImportedPositionField(importedField, reference, targetField, validate); + } else if (isArrayOfSimpleStruct(targetField)) { + resolveImportedArrayOfStructField(importedField, reference, targetField, validate); + } else if (isMapOfSimpleStruct(targetField)) { + resolveImportedMapOfStructField(importedField, reference, targetField, validate); + } else if (isMapOfPrimitiveType(targetField)) { + resolveImportedMapOfPrimitiveField(importedField, reference, targetField, validate); + } else { + resolveImportedNormalField(importedField, reference, targetField, validate); + } + } + + private void resolveImportedPositionField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + TemporaryImportedField importedZCurveField = new TemporaryImportedField(PositionDataType.getZCurveFieldName(importedField.fieldName()), + reference.referenceField().getName(), PositionDataType.getZCurveFieldName(targetField.getName())); + ImmutableSDField targetZCurveField = getTargetField(importedZCurveField, reference); + resolveImportedNormalField(importedZCurveField, reference, targetZCurveField, validate); + ImportedComplexField importedStructField = new ImportedComplexField(importedField.fieldName(), reference, targetField); + registerImportedField(importedField, null, importedStructField); + } + + private void resolveImportedArrayOfStructField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + ImportedComplexField importedStructField = new ImportedComplexField(importedField.fieldName(), reference, targetField); + resolveImportedNestedStructField(importedField, reference, importedStructField, targetField, validate); + registerImportedField(importedField, null, importedStructField); + } + + private void resolveImportedMapOfStructField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + ImportedComplexField importedMapField = new ImportedComplexField(importedField.fieldName(), reference, targetField); + ImportedComplexField importedStructField = new ImportedComplexField(importedField.fieldName() + ".value", reference, targetField.getStructField("value")); + importedMapField.addNestedField(importedStructField); + resolveImportedNestedField(importedField, reference, importedMapField, targetField.getStructField("key"), validate); + resolveImportedNestedStructField(importedField, reference, importedStructField, importedStructField.targetField(), validate); + registerImportedField(importedField, null, importedMapField); + } + + private void makeImportedNormalField(TemporaryImportedField importedField, ImportedComplexField owner, String name, DocumentReference reference, ImmutableSDField targetField) { + ImportedField importedSimpleField = new ImportedSimpleField(name, reference, targetField); + registerImportedField(importedField, owner, importedSimpleField); + } + + private void registerImportedField(TemporaryImportedField temporaryImportedField, ImportedComplexField owner, ImportedField importedField) { + if (owner != null) { + owner.addNestedField(importedField); + } else { + if (importedFields.get(importedField.fieldName()) != null) { + fail(temporaryImportedField, importedField.fieldName(), targetFieldAsString(importedField.targetField().getName(), importedField.reference()) + ": Field already imported"); + } + importedFields.put(importedField.fieldName(), importedField); + } + } + + private static String makeImportedNestedFieldName(TemporaryImportedField importedField, ImmutableSDField targetNestedField) { + return importedField.fieldName() + targetNestedField.getName().substring(importedField.targetFieldName().length()); + } + + private boolean resolveImportedNestedField(TemporaryImportedField importedField, DocumentReference reference, + ImportedComplexField owner, ImmutableSDField targetNestedField, boolean requireAttribute) { + Attribute attribute = targetNestedField.getAttribute(); + String importedNestedFieldName = makeImportedNestedFieldName(importedField, targetNestedField); + if (attribute != null) { + makeImportedNormalField(importedField, owner, importedNestedFieldName, reference, targetNestedField); + } else if (requireAttribute) { + fail(importedField, importedNestedFieldName, targetFieldAsString(targetNestedField.getName(), reference) + + ": Is not an attribute field. Only attribute fields supported"); + } + return attribute != null; + } + + private void resolveImportedNestedStructField(TemporaryImportedField importedField, DocumentReference reference, + ImportedComplexField ownerField, ImmutableSDField targetNestedField, boolean validate) { + boolean foundAttribute = false; + for (ImmutableSDField targetStructField : targetNestedField.getStructFields()) { + if (resolveImportedNestedField(importedField, reference, ownerField, targetStructField, false)) { + foundAttribute = true; + }; + } + if (validate && !foundAttribute) { + String importedNestedFieldName = makeImportedNestedFieldName(importedField, targetNestedField); + fail(importedField, importedNestedFieldName, targetFieldAsString(targetNestedField.getName(), reference) + + ": Is not a struct containing an attribute field."); + } + } + + private void resolveImportedMapOfPrimitiveField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + ImportedComplexField importedMapField = new ImportedComplexField(importedField.fieldName(), reference, targetField); + resolveImportedNestedField(importedField, reference, importedMapField, targetField.getStructField("key"), validate); + resolveImportedNestedField(importedField, reference, importedMapField, targetField.getStructField("value"), validate); + registerImportedField(importedField, null, importedMapField); + } + + private void resolveImportedNormalField(TemporaryImportedField importedField, DocumentReference reference, + ImmutableSDField targetField, boolean validate) { + if (validate) { + validateTargetField(importedField, targetField, reference); + } + makeImportedNormalField(importedField, null, importedField.fieldName(), reference, targetField); + } + + private DocumentReference validateDocumentReference(TemporaryImportedField importedField) { + String referenceFieldName = importedField.referenceFieldName(); + DocumentReference reference = references.get().referenceMap().get(referenceFieldName); + if (reference == null) { + fail(importedField, "Reference field '" + referenceFieldName + "' not found"); + } + return reference; + } + + private ImmutableSDField getTargetField(TemporaryImportedField importedField, + DocumentReference reference) { + String targetFieldName = importedField.targetFieldName(); + Schema targetSchema = reference.targetSearch(); + ImmutableSDField targetField = targetSchema.getField(targetFieldName); + if (targetField == null) { + fail(importedField, targetFieldAsString(targetFieldName, reference) + ": Not found"); + } + return targetField; + } + + private void validateTargetField(TemporaryImportedField importedField, + ImmutableSDField targetField, DocumentReference reference) { + if (!targetField.doesAttributing()) { + fail(importedField, targetFieldAsString(targetField.getName(), reference) + + ": Is not an attribute field. Only attribute fields supported"); + } else if (targetField.doesIndexing()) { + fail(importedField, targetFieldAsString(targetField.getName(), reference) + + ": Is an index field. Not supported"); + } else if (targetField.getDataType().equals(DataType.PREDICATE)) { + fail(importedField, targetFieldAsString(targetField.getName(), reference) + + ": Is of type 'predicate'. Not supported"); + } + } + + private static String targetFieldAsString(String targetFieldName, DocumentReference reference) { + return "Field '" + targetFieldName + "' via reference field '" + reference.referenceField().getName() + "'"; + } + + private void fail(TemporaryImportedField importedField, String msg) { + throw new IllegalArgumentException("For " + schema + ", import field '" + + importedField.fieldName() + "': " + msg); + } + + private void fail(TemporaryImportedField importedField, String importedNestedFieldName, String msg) { + if (importedField.fieldName().equals(importedNestedFieldName)) { + fail(importedField, msg); + } + throw new IllegalArgumentException("For " + schema + ", import field '" + + importedField.fieldName() + "' (nested to '" + importedNestedFieldName + "'): " + msg); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexFieldNames.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexFieldNames.java new file mode 100644 index 00000000000..27101c47c7a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexFieldNames.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Because of the way the parser works (allowing any token as identifier), + * it is not practical to limit the syntax of field names there, do it here. + * Important to disallow dash, has semantic in IL. + * + * @author Vehard Havdal + */ +public class IndexFieldNames extends Processor { + + private static final String FIELD_NAME_REGEXP = "[a-zA-Z]\\w*"; + + public IndexFieldNames(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (SDField field : schema.allConcreteFields()) { + if ( ! field.getName().matches(FIELD_NAME_REGEXP) && ! legalDottedPositionField(field)) { + fail(schema, field, " Not a legal field name. Legal expression: " + FIELD_NAME_REGEXP); + } + } + } + + /** + * In {@link CreatePositionZCurve} we add some .position and .distance fields for pos fields. Make an exception for those for now. + * TODO Vespa 8: Rename to _position and _distance and delete this method. + * + * @param field an {@link com.yahoo.schema.document.SDField} + * @return true if allowed + */ + private boolean legalDottedPositionField(SDField field) { + return field.getName().endsWith(".position") || field.getName().endsWith(".distance"); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java new file mode 100644 index 00000000000..88e84d5289f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java @@ -0,0 +1,106 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * This processor modifies all indexing scripts so that they input the value of the owning field by default. It also + * ensures that all fields used as input exist. + * + * @author Simon Thoresen Hult + */ +public class IndexingInputs extends Processor { + + public IndexingInputs(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + ScriptExpression script = field.getIndexingScript(); + if (script == null) continue; + + String fieldName = field.getName(); + script = (ScriptExpression)new DefaultToCurrentField(fieldName).convert(script); + script = (ScriptExpression)new EnsureInputExpression(fieldName).convert(script); + if (validate) + new VerifyInputExpression(schema, field).visit(script); + + field.setIndexingScript(script); + } + } + + private static class DefaultToCurrentField extends ExpressionConverter { + + final String fieldName; + + DefaultToCurrentField(String fieldName) { + this.fieldName = fieldName; + } + + @Override + protected boolean shouldConvert(Expression exp) { + return exp instanceof InputExpression && ((InputExpression)exp).getFieldName() == null; + } + + @Override + protected Expression doConvert(Expression exp) { + return new InputExpression(fieldName); + } + } + + private static class EnsureInputExpression extends ExpressionConverter { + + final String fieldName; + + EnsureInputExpression(String fieldName) { + this.fieldName = fieldName; + } + + @Override + protected boolean shouldConvert(Expression exp) { + return exp instanceof StatementExpression; + } + + @Override + protected Expression doConvert(Expression exp) { + if (exp.requiredInputType() != null) { + return new StatementExpression(new InputExpression(fieldName), exp); + } else { + return exp; + } + } + } + + private class VerifyInputExpression extends ExpressionVisitor { + + private final Schema schema; + private final SDField field; + + public VerifyInputExpression(Schema schema, SDField field) { + this.schema = schema; + this.field = field; + } + + @Override + protected void doVisit(Expression exp) { + if ( ! (exp instanceof InputExpression)) return; + String inputField = ((InputExpression)exp).getFieldName(); + if (schema.getField(inputField).hasFullIndexingDocprocRights()) return; + + fail(schema, field, "Indexing script refers to field '" + inputField + "' which does not exist " + + "in document type '" + schema.getDocument().getName() + "', and is not a mutable attribute."); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java new file mode 100644 index 00000000000..ea65a223686 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java @@ -0,0 +1,144 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.*; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.*; + +/** + * This processor modifies all indexing scripts so that they output to the owning field by default. It also prevents + * any output expression from writing to any field except for the owning field. Finally, for <code>SummaryExpression</code>, + * this processor expands to write all appropriate summary fields. + * + * @author Simon Thoresen Hult + */ +public class IndexingOutputs extends Processor { + + public IndexingOutputs(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + ScriptExpression script = field.getIndexingScript(); + if (script == null) continue; + + Set<String> summaryFields = new TreeSet<>(); + findSummaryTo(schema, field, summaryFields, summaryFields); + MyConverter converter = new MyConverter(schema, field, summaryFields, validate); + field.setIndexingScript((ScriptExpression)converter.convert(script)); + } + } + + public void findSummaryTo(Schema schema, SDField field, Set<String> dynamicSummary, Set<String> staticSummary) { + var summaryFields = schema.getSummaryFields(field); + if (summaryFields.isEmpty()) { + fillSummaryToFromField(field, dynamicSummary, staticSummary); + } else { + fillSummaryToFromSearch(schema, field, summaryFields, dynamicSummary, staticSummary); + } + } + + private void fillSummaryToFromSearch(Schema schema, SDField field, List<SummaryField> summaryFields, + Set<String> dynamicSummary, Set<String> staticSummary) { + for (SummaryField summaryField : summaryFields) { + fillSummaryToFromSummaryField(schema, field, summaryField, dynamicSummary, staticSummary); + } + } + + private void fillSummaryToFromSummaryField(Schema schema, SDField field, SummaryField summaryField, + Set<String> dynamicSummary, Set<String> staticSummary) { + SummaryTransform summaryTransform = summaryField.getTransform(); + String summaryName = summaryField.getName(); + if (summaryTransform.isDynamic() && summaryField.getSourceCount() > 2) { + // Avoid writing to summary fields that have more than a single input field, as that is handled by the + // summary rewriter in the search core. + return; + } + if (summaryTransform.isDynamic()) { + DataType fieldType = field.getDataType(); + if (fieldType != DataType.URI && fieldType != DataType.STRING) { + warn(schema, field, "Dynamic summaries are only supported for fields of type " + + "string, ignoring summary field '" + summaryField.getName() + + "' for sd field '" + field.getName() + "' of type " + + fieldType.getName() + "."); + return; + } + dynamicSummary.add(summaryName); + } else if (summaryTransform != SummaryTransform.ATTRIBUTE) { + staticSummary.add(summaryName); + } + } + + private static void fillSummaryToFromField(SDField field, Set<String> dynamicSummary, Set<String> staticSummary) { + for (SummaryField summaryField : field.getSummaryFields().values()) { + String summaryName = summaryField.getName(); + if (summaryField.getTransform().isDynamic()) { + dynamicSummary.add(summaryName); + } else { + staticSummary.add(summaryName); + } + } + } + + private class MyConverter extends ExpressionConverter { + + final Schema schema; + final Field field; + final Set<String> summaryFields; + final boolean validate; + + MyConverter(Schema schema, Field field, Set<String> summaryFields, boolean validate) { + this.schema = schema; + this.field = field; + this.summaryFields = summaryFields.isEmpty() ? Collections.singleton(field.getName()) : summaryFields; + this.validate = validate; + } + + @Override + protected boolean shouldConvert(Expression exp) { + if ( ! (exp instanceof OutputExpression)) { + return false; + } + String fieldName = ((OutputExpression)exp).getFieldName(); + if (fieldName == null) { + return true; // inject appropriate field name + } + if ( validate && ! fieldName.equals(field.getName())) { + fail(schema, field, "Indexing expression '" + exp + "' attempts to write to a field other than '" + + field.getName() + "'."); + } + return false; + } + + @Override + protected Expression doConvert(Expression exp) { + List<Expression> ret = new LinkedList<>(); + if (exp instanceof AttributeExpression) { + ret.add(new AttributeExpression(field.getName())); + } else if (exp instanceof IndexExpression) { + ret.add(new IndexExpression(field.getName())); + } else if (exp instanceof SummaryExpression) { + for (String fieldName : summaryFields) { + ret.add(new SummaryExpression(fieldName)); + } + } else { + throw new UnsupportedOperationException(exp.getClass().getName()); + } + return new StatementExpression(ret); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java new file mode 100644 index 00000000000..d8c1fb3125f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java @@ -0,0 +1,164 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.MapDataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.GeoPos; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.FieldTypeAdapter; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; +import com.yahoo.vespa.indexinglanguage.expressions.VerificationContext; +import com.yahoo.vespa.indexinglanguage.expressions.VerificationException; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashSet; +import java.util.Set; + +/** + * @author Simon Thoresen Hult + */ +public class IndexingValidation extends Processor { + + IndexingValidation(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + VerificationContext context = new VerificationContext(new MyAdapter(schema)); + for (SDField field : schema.allConcreteFields()) { + ScriptExpression script = field.getIndexingScript(); + try { + script.verify(context); + MyConverter converter = new MyConverter(); + for (StatementExpression exp : script) { + converter.convert(exp); // TODO: stop doing this explicitly when visiting a script does not branch + } + } catch (VerificationException e) { + fail(schema, field, "For expression '" + e.getExpression() + "': " + e.getMessage()); + } + } + } + + private static class MyConverter extends ExpressionConverter { + + final Set<String> outputs = new HashSet<>(); + final Set<String> prevNames = new HashSet<>(); + + @Override + protected ExpressionConverter branch() { + MyConverter ret = new MyConverter(); + ret.outputs.addAll(outputs); + ret.prevNames.addAll(prevNames); + return ret; + } + + @Override + protected boolean shouldConvert(Expression exp) { + if (exp instanceof OutputExpression) { + String fieldName = ((OutputExpression)exp).getFieldName(); + if (outputs.contains(fieldName) && !prevNames.contains(fieldName)) { + throw new VerificationException(exp, "Attempting to assign conflicting values to field '" + + fieldName + "'."); + } + outputs.add(fieldName); + prevNames.add(fieldName); + } + if (exp.createdOutputType() != null) { + prevNames.clear(); + } + return false; + } + + @Override + protected Expression doConvert(Expression exp) { + throw new UnsupportedOperationException(); + } + } + + private static class MyAdapter implements FieldTypeAdapter { + + final Schema schema; + + MyAdapter(Schema schema) { + this.schema = schema; + } + + @Override + public DataType getInputType(Expression exp, String fieldName) { + SDField field = schema.getDocumentField(fieldName); + if (field == null) { + throw new VerificationException(exp, "Input field '" + fieldName + "' not found."); + } + return field.getDataType(); + } + + @Override + public void tryOutputType(Expression exp, String fieldName, DataType valueType) { + String fieldDesc; + DataType fieldType; + if (exp instanceof AttributeExpression) { + Attribute attribute = schema.getAttribute(fieldName); + if (attribute == null) { + throw new VerificationException(exp, "Attribute '" + fieldName + "' not found."); + } + fieldDesc = "attribute"; + fieldType = attribute.getDataType(); + } else if (exp instanceof IndexExpression) { + SDField field = schema.getConcreteField(fieldName); + if (field == null) { + throw new VerificationException(exp, "Index field '" + fieldName + "' not found."); + } + fieldDesc = "index field"; + fieldType = field.getDataType(); + } else if (exp instanceof SummaryExpression) { + SummaryField field = schema.getSummaryField(fieldName); + if (field == null) { + throw new VerificationException(exp, "Summary field '" + fieldName + "' not found."); + } + fieldDesc = "summary field"; + fieldType = field.getDataType(); + } else { + throw new UnsupportedOperationException(); + } + if ( ! fieldType.isAssignableFrom(valueType) && + ! fieldType.isAssignableFrom(createCompatType(valueType))) { + throw new VerificationException(exp, "Can not assign " + valueType.getName() + " to " + fieldDesc + + " '" + fieldName + "' which is " + fieldType.getName() + "."); + } + } + + private static DataType createCompatType(DataType origType) { + if (origType instanceof ArrayDataType) { + return DataType.getArray(createCompatType(((ArrayDataType)origType).getNestedType())); + } else if (origType instanceof MapDataType) { + MapDataType mapType = (MapDataType)origType; + return DataType.getMap(createCompatType(mapType.getKeyType()), + createCompatType(mapType.getValueType())); + } else if (origType instanceof WeightedSetDataType) { + return DataType.getWeightedSet(createCompatType(((WeightedSetDataType)origType).getNestedType())); + } else if (GeoPos.isPos(origType)) { + return DataType.LONG; + } else { + return origType; + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingValues.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValues.java new file mode 100644 index 00000000000..fa4b7d2bc40 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValues.java @@ -0,0 +1,71 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.InputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * @author Simon Thoresen Hult + */ +public class IndexingValues extends Processor { + + public IndexingValues(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (Field field : schema.getDocument().fieldSet()) { + SDField sdField = (SDField)field; + if ( ! sdField.isExtraField()) { + new RequireThatDocumentFieldsAreImmutable(field).convert(sdField.getIndexingScript()); + } + } + } + + private class RequireThatDocumentFieldsAreImmutable extends ExpressionConverter { + + final Field field; + Expression mutatedBy; + + RequireThatDocumentFieldsAreImmutable(Field field) { + this.field = field; + } + + @Override + public ExpressionConverter branch() { + return clone(); + } + + @Override + protected boolean shouldConvert(Expression exp) { + if (exp instanceof OutputExpression && mutatedBy != null) { + throw newProcessException(schema, field, + "Indexing expression '" + mutatedBy + "' attempts to modify the value of the " + + "document field '" + field.getName() + "'. Use a field outside the document " + + "block instead."); + } + if (exp instanceof InputExpression && ((InputExpression)exp).getFieldName().equals(field.getName())) { + mutatedBy = null; + } else if (exp.createdOutputType() != null) { + mutatedBy = exp; + } + return false; + } + + @Override + protected Expression doConvert(Expression exp) { + throw new UnsupportedOperationException(); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IntegerIndex2Attribute.java b/config-model/src/main/java/com/yahoo/schema/processing/IntegerIndex2Attribute.java new file mode 100644 index 00000000000..1d8480a8e99 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/IntegerIndex2Attribute.java @@ -0,0 +1,88 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.NumericDataType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashSet; +import java.util.Set; + +/** + * Replaces the 'index' statement of all numerical fields to 'attribute' because we no longer support numerical indexes. + * + * @author baldersheim + */ +public class IntegerIndex2Attribute extends Processor { + + public IntegerIndex2Attribute(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.doesIndexing() && field.getDataType().getPrimitiveType() instanceof NumericDataType) { + if (field.getIndex(field.getName()) != null + && ! (field.getIndex(field.getName()).getType().equals(Index.Type.VESPA))) continue; + ScriptExpression script = field.getIndexingScript(); + Set<String> attributeNames = new HashSet<>(); + new MyVisitor(attributeNames).visit(script); + field.setIndexingScript((ScriptExpression)new MyConverter(attributeNames).convert(script)); + warn(schema, field, "Changed to attribute because numerical indexes (field has type " + + field.getDataType().getName() + ") is not currently supported." + + " Index-only settings may fail. Ignore this warning for streaming search."); + } + } + } + + private static class MyVisitor extends ExpressionVisitor { + + final Set<String> attributeNames; + + public MyVisitor(Set<String> attributeNames) { + this.attributeNames = attributeNames; + } + + @Override + protected void doVisit(Expression exp) { + if (exp instanceof AttributeExpression) { + attributeNames.add(((AttributeExpression)exp).getFieldName()); + } + } + } + + private static class MyConverter extends ExpressionConverter { + + final Set<String> attributeNames; + + public MyConverter(Set<String> attributeNames) { + this.attributeNames = attributeNames; + } + + @Override + protected boolean shouldConvert(Expression exp) { + return exp instanceof IndexExpression; + } + + @Override + protected Expression doConvert(Expression exp) { + String indexName = ((IndexExpression)exp).getFieldName(); + if (attributeNames.contains(indexName)) { + return null; + } + return new AttributeExpression(indexName); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/LiteralBoost.java b/config-model/src/main/java/com/yahoo/schema/processing/LiteralBoost.java new file mode 100644 index 00000000000..a84f895100a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/LiteralBoost.java @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.RankProfile; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Iterator; + +/** + * Expresses literal boosts in terms of extra indices with rank boost. + * One extra index named <i>indexname</i>_exact is added for each index having + * a fields with literal-boosts of zero or more (zero to support other + * rank profiles setting a literal boost). Complete boost values in to fields + * are translated to rank boosts to the implementation indices. + * These indices has no positional + * or phrase support and contains concatenated versions of each field value + * of complete-boosted fields indexed to <i>indexname</i>. A search for indexname + * will be rewritten to also search <i>indexname</i>_exaxt + * + * @author bratseth + */ +public class LiteralBoost extends Processor { + + public LiteralBoost(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + /** Adds extra search fields and indices to express literal boosts */ + @Override + public void process(boolean validate, boolean documentsOnly) { + checkRankModifierRankType(schema); + addLiteralBoostsToFields(schema); + reduceFieldLiteralBoosts(schema); + } + + /** Checks if literal boost is given using rank: , and set the actual literal boost accordingly. */ + private void checkRankModifierRankType(Schema schema) { + for (SDField field : schema.allConcreteFields()) { + if (field.getLiteralBoost() > -1) continue; // Let explicit value take precedence + if (field.getRanking().isLiteral()) + field.setLiteralBoost(100); + } + } + + /** + * Ensures there are field boosts for all literal boosts mentioned in rank profiles. + * This is required because boost indices will only be generated by looking + * at field boosts + */ + private void addLiteralBoostsToFields(Schema schema) { + Iterator i = matchingRankSettingsIterator(schema, RankProfile.RankSetting.Type.LITERALBOOST); + while (i.hasNext()) { + RankProfile.RankSetting setting = (RankProfile.RankSetting)i.next(); + SDField field = schema.getConcreteField(setting.getFieldName()); + if (field == null) continue; + if (field.getLiteralBoost() < 0) + field.setLiteralBoost(0); + } + } + + private void reduceFieldLiteralBoosts(Schema schema) { + for (SDField field : schema.allConcreteFields()) { + if (field.getLiteralBoost() < 0) continue; + reduceFieldLiteralBoost(field, schema); + } + } + + private void reduceFieldLiteralBoost(SDField field, Schema schema) { + SDField literalField = addField(schema, field, "literal", + "{ input " + field.getName() + " | tokenize | index " + field.getName() + "_literal; }", + "literal-boost"); + literalField.setWeight(field.getWeight() + field.getLiteralBoost()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MakeAliases.java b/config-model/src/main/java/com/yahoo/schema/processing/MakeAliases.java new file mode 100644 index 00000000000..7093242d0ac --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MakeAliases.java @@ -0,0 +1,61 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Index; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Takes the aliases set on field by parser and sets them on correct Index or Attribute + * + * @author vegardh + */ +public class MakeAliases extends Processor { + + public MakeAliases(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + List<String> usedAliases = new ArrayList<>(); + for (SDField field : schema.allConcreteFields()) { + for (Map.Entry<String, String> e : field.getAliasToName().entrySet()) { + String alias = e.getKey(); + String name = e.getValue(); + String errMsg = "For " + schema + ": alias '" + alias + "' "; + if (validate && schema.existsIndex(alias)) { + throw new IllegalArgumentException(errMsg + "is illegal since it is the name of an index."); + } + if (validate && schema.getAttribute(alias) != null) { + throw new IllegalArgumentException(errMsg + "is illegal since it is the name of an attribute."); + } + if (validate && usedAliases.contains(alias)) { + throw new IllegalArgumentException(errMsg + "specified more than once."); + } + usedAliases.add(alias); + + Index index = field.getIndex(name); + Attribute attribute = field.getAttributes().get(name); + if (index != null) { + index.addAlias(alias); // alias will be for index in this case, since it is the one used in a search + } else if (attribute != null && ! field.doesIndexing()) { + attribute.getAliases().add(alias); + } else { + index = new Index(name); + index.addAlias(alias); + field.addIndex(index); + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MakeDefaultSummaryTheSuperSet.java b/config-model/src/main/java/com/yahoo/schema/processing/MakeDefaultSummaryTheSuperSet.java new file mode 100644 index 00000000000..ea24bf0569d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MakeDefaultSummaryTheSuperSet.java @@ -0,0 +1,49 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * All summary fields which are not attributes + * must currently be present in the default summary class, + * since the default summary class also defines the docsum.dat format. + * This processor adds any missing summaries to the default summary. + * When that is decoupled from the actual summaries returned, this + * processor can be removed. Note: the StreamingSummary also takes advantage of + * the fact that default is the superset. + * + * All other summary logic should work unchanged without this processing step + * except that IndexStructureValidator.validateSummaryFields must be changed to + * consider all summaries, not just the default, i.e change to + * if (search.getSummaryField(expr.getFieldName()) == null) + * + * This must be done after other summary processors. + * + * @author bratseth + */ +public class MakeDefaultSummaryTheSuperSet extends Processor { + + public MakeDefaultSummaryTheSuperSet(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + DocumentSummary defaultSummary= schema.getSummariesInThis().get("default"); + for (SummaryField summaryField : schema.getUniqueNamedSummaryFields().values() ) { + if (defaultSummary.getSummaryField(summaryField.getName()) != null) continue; + if (summaryField.getTransform() == SummaryTransform.ATTRIBUTE) continue; + if (summaryField.getTransform() == SummaryTransform.ATTRIBUTECOMBINER) continue; + if (summaryField.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER) continue; + + defaultSummary.add(summaryField.clone()); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MatchConsistency.java b/config-model/src/main/java/com/yahoo/schema/processing/MatchConsistency.java new file mode 100644 index 00000000000..5fb59e53ba9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MatchConsistency.java @@ -0,0 +1,72 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashMap; +import java.util.Map; + +/** + * Warn on inconsistent match settings for any index + * + * @author vegardh + */ +public class MatchConsistency extends Processor { + + public MatchConsistency(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + Map<String, MatchType> types = new HashMap<>(); + for (SDField field : schema.allConcreteFields()) { + new MyVisitor(schema, field, types).visit(field.getIndexingScript()); + } + } + + private void checkMatching(Schema schema, SDField field, Map<String, MatchType> types, String indexTo) { + MatchType prevType = types.get(indexTo); + if (prevType == null) { + types.put(indexTo, field.getMatching().getType()); + } else if ( ! field.getMatching().getType().equals(prevType)) { + warn(schema, field, "The matching type for index '" + indexTo + "' (got " + field.getMatching().getType() + + ") is inconsistent with that given for the same index in a previous field (had " + + prevType + ")."); + } + } + + private class MyVisitor extends ExpressionVisitor { + + final Schema schema; + final SDField field; + final Map<String, MatchType> types; + + MyVisitor(Schema schema, SDField field, Map<String, MatchType> types) { + this.schema = schema; + this.field = field; + this.types = types; + } + + @Override + protected void doVisit(Expression exp) { + if (exp instanceof IndexExpression) { + checkMatching(schema, field, types, ((IndexExpression)exp).getFieldName()); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MatchPhaseSettingsValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/MatchPhaseSettingsValidator.java new file mode 100644 index 00000000000..7c1c255097f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MatchPhaseSettingsValidator.java @@ -0,0 +1,98 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Validates the match phase settings for all registered rank profiles. + * + * @author geirst + */ +public class MatchPhaseSettingsValidator extends Processor { + + public MatchPhaseSettingsValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + if (documentsOnly) return; + + for (RankProfile rankProfile : rankProfileRegistry.rankProfilesOf(schema)) { + RankProfile.MatchPhaseSettings settings = rankProfile.getMatchPhaseSettings(); + if (settings != null) { + validateMatchPhaseSettings(rankProfile, settings); + } + } + } + + private void validateMatchPhaseSettings(RankProfile rankProfile, RankProfile.MatchPhaseSettings settings) { + String attributeName = settings.getAttribute(); + new AttributeValidator(schema.getName(), + rankProfile.name(), + schema.getAttribute(attributeName), attributeName).validate(); + } + + public static class AttributeValidator { + + private final String searchName; + private final String rankProfileName; + protected final Attribute attribute; + private final String attributeName; + + public AttributeValidator(String searchName, String rankProfileName, Attribute attribute, String attributeName) { + this.searchName = searchName; + this.rankProfileName = rankProfileName; + this.attribute = attribute; + this.attributeName = attributeName; + } + + public void validate() { + validateThatAttributeExists(); + validateThatAttributeIsSingleNumeric(); + validateThatAttributeIsFastSearch(); + } + + protected void validateThatAttributeExists() { + if (attribute == null) { + failValidation("does not exists"); + } + } + + protected void validateThatAttributeIsSingleNumeric() { + if (!attribute.getCollectionType().equals(Attribute.CollectionType.SINGLE) || + attribute.getType().equals(Attribute.Type.STRING) || + attribute.getType().equals(Attribute.Type.PREDICATE)) + { + failValidation("must be single value numeric, but it is '" + + attribute.getDataType().getName() + "'"); + } + } + + protected void validateThatAttributeIsFastSearch() { + if ( ! attribute.isFastSearch()) { + failValidation("must be fast-search, but it is not"); + } + } + + protected void failValidation(String what) { + throw new IllegalArgumentException(createMessagePrefix() + what); + } + + public String getValidationType() { return "match-phase"; } + + private String createMessagePrefix() { + return "In search definition '" + searchName + + "', rank-profile '" + rankProfileName + + "': " + getValidationType() + " attribute '" + attributeName + "' "; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MatchedElementsOnlyResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/MatchedElementsOnlyResolver.java new file mode 100644 index 00000000000..ed95f87d7d6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MatchedElementsOnlyResolver.java @@ -0,0 +1,95 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ComplexAttributeFieldUtils; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isComplexFieldWithOnlyStructFieldAttributes; +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isSupportedComplexField; + +/** + * Iterates all summary fields with 'matched-elements-only' and adjusts transform (if all struct-fields are attributes) + * and validates that the field type is supported. + * + * @author geirst + */ +public class MatchedElementsOnlyResolver extends Processor { + + public MatchedElementsOnlyResolver(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (var entry : schema.getSummaries().entrySet()) { + var summary = entry.getValue(); + for (var field : summary.getSummaryFields().values()) { + if (field.getTransform() == SummaryTransform.MATCHED_ELEMENTS_FILTER) { + processSummaryField(summary, field, validate); + } + } + } + } + + private void processSummaryField(DocumentSummary summary, SummaryField field, boolean validate) { + var sourceField = schema.getField(field.getSingleSource()); + if (sourceField != null) { + if (isSupportedComplexField(sourceField)) { + if (isComplexFieldWithOnlyStructFieldAttributes(sourceField)) { + field.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); + } + } else if (isSupportedMultiValueField(sourceField)) { + if (sourceField.doesAttributing()) { + field.setTransform(SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER); + } + } else if (validate) { + fail(summary, field, "'matched-elements-only' is not supported for this field type. " + + "Supported field types are: array of primitive, weighted set of primitive, " + + "array of simple struct, map of primitive type to simple struct, " + + "and map of primitive type to primitive type"); + } + } + // else case is handled in SummaryFieldsMustHaveValidSource + } + + private boolean isSupportedMultiValueField(ImmutableSDField sourceField) { + var type = sourceField.getDataType(); + return (isArrayOfPrimitiveType(type) || isWeightedsetOfPrimitiveType(type)); + } + + private boolean isArrayOfPrimitiveType(DataType type) { + if (type instanceof ArrayDataType) { + var arrayType = (ArrayDataType) type; + return ComplexAttributeFieldUtils.isPrimitiveType(arrayType.getNestedType()); + } + return false; + } + + private boolean isWeightedsetOfPrimitiveType(DataType type) { + if (type instanceof WeightedSetDataType) { + var wsetType = (WeightedSetDataType) type; + return ComplexAttributeFieldUtils.isPrimitiveType(wsetType.getNestedType()); + } + return false; + } + + private void fail(DocumentSummary summary, SummaryField field, String msg) { + throw new IllegalArgumentException(formatError(schema, summary, field, msg)); + } + + private String formatError(Schema schema, DocumentSummary summary, SummaryField field, String msg) { + return "For " + schema + ", document summary '" + summary.getName() + + "', summary field '" + field.getName() + "': " + msg; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MultifieldIndexHarmonizer.java b/config-model/src/main/java/com/yahoo/schema/processing/MultifieldIndexHarmonizer.java new file mode 100644 index 00000000000..3a889085871 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MultifieldIndexHarmonizer.java @@ -0,0 +1,76 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.processing.multifieldresolver.IndexCommandResolver; +import com.yahoo.schema.processing.multifieldresolver.RankTypeResolver; +import com.yahoo.schema.processing.multifieldresolver.StemmingResolver; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.List; +import java.util.Map; + +/** + * Ensures that there are no conflicting types or field settings + * in multifield indices, either by changing settings or by splitting + * conflicting fields in multiple ones with different settings. + * + * @author bratseth + */ +public class MultifieldIndexHarmonizer extends Processor { + + /** A map from index names to a List of fields going to that index */ + private Map<String,List<SDField>> indexToFields=new java.util.HashMap<>(); + + public MultifieldIndexHarmonizer(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + populateIndexToFields(schema); + resolveAllConflicts(schema); + } + + private void populateIndexToFields(Schema schema) { + for (SDField field : schema.allConcreteFields() ) { + if ( ! field.doesIndexing()) continue; + addIndexField(field.getName(), field); + } + } + + private void addIndexField(String indexName,SDField field) { + List<SDField> fields = indexToFields.get(indexName); + if (fields == null) { + fields = new java.util.ArrayList<>(); + indexToFields.put(indexName, fields); + } + fields.add(field); + } + + private void resolveAllConflicts(Schema schema) { + for (Map.Entry<String, List<SDField>> entry : indexToFields.entrySet()) { + String indexName = entry.getKey(); + List<SDField> fields = entry.getValue(); + if (fields.size() == 1) continue; // It takes two to make a conflict + resolveConflicts(indexName, fields, schema); + } + } + + /** + * Resolves all conflicts for one index + * + * @param indexName the name of the index in question + * @param fields all the fields indexed to this index + * @param schema the search definition having this + */ + private void resolveConflicts(String indexName, List<SDField> fields, Schema schema) { + new StemmingResolver(indexName, fields, schema, deployLogger).resolve(); + new IndexCommandResolver(indexName, fields, schema, deployLogger).resolve(); + new RankTypeResolver(indexName, fields, schema, deployLogger).resolve(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/MutableAttributes.java b/config-model/src/main/java/com/yahoo/schema/processing/MutableAttributes.java new file mode 100644 index 00000000000..854f6b2dddb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/MutableAttributes.java @@ -0,0 +1,29 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +public class MutableAttributes extends Processor { + + public MutableAttributes(Schema schema, DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) + { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if ( ! field.isExtraField() && field.getAttributes().containsKey(field.getName())) { + if (field.getAttributes().get(field.getName()).isMutable()) { + throw new IllegalArgumentException("Field '" + field.getName() + "' in '" + schema.getDocument().getName() + + "' can not be marked mutable as it is inside the document clause."); + } + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java new file mode 100644 index 00000000000..f1ff910be43 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java @@ -0,0 +1,78 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.indexinglanguage.expressions.*; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * The implementation of "gram" matching - splitting the incoming text and the queries into + * n-grams for matching. This will also validate the gram settings. + * + * @author bratseth + */ +public class NGramMatch extends Processor { + + public static final int DEFAULT_GRAM_SIZE = 2; + + public NGramMatch(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getMatching().getType().equals(MatchType.GRAM)) + implementGramMatch(schema, field, validate); + else if (validate && field.getMatching().getGramSize() >= 0) + throw new IllegalArgumentException("gram-size can only be set when the matching mode is 'gram'"); + } + } + + private void implementGramMatch(Schema schema, SDField field, boolean validate) { + if (validate && field.doesAttributing() && ! field.doesIndexing()) + throw new IllegalArgumentException("gram matching is not supported with attributes, use 'index' in indexing"); + + int n = field.getMatching().getGramSize(); + if (n < 0) + n = DEFAULT_GRAM_SIZE; // not set - use default gram size + if (validate && n == 0) + throw new IllegalArgumentException("Illegal gram size in " + field + ": Must be at least 1"); + field.getNormalizing().inferCodepoint(); + field.setStemming(Stemming.NONE); // not compatible with stemming and normalizing + field.addQueryCommand("ngram " + n); + field.setIndexingScript((ScriptExpression)new MyProvider(schema, n).convert(field.getIndexingScript())); + } + + private static class MyProvider extends TypedTransformProvider { + + final int ngram; + + MyProvider(Schema schema, int ngram) { + super(NGramExpression.class, schema); + this.ngram = ngram; + } + + @Override + protected boolean requiresTransform(Expression exp, DataType fieldType) { + return exp instanceof OutputExpression; + } + + @Override + protected Expression newTransform(DataType fieldType) { + Expression exp = new NGramExpression(null, ngram); + if (fieldType instanceof CollectionDataType) + exp = new ForEachExpression(exp); + return exp; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelConfigGenerator.java b/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelConfigGenerator.java new file mode 100644 index 00000000000..ce56a4320d3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelConfigGenerator.java @@ -0,0 +1,96 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.expressiontransforms.OnnxModelTransformer; +import com.yahoo.searchlib.rankingexpression.rule.CompositeNode; +import com.yahoo.searchlib.rankingexpression.rule.ConstantNode; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode; +import com.yahoo.vespa.model.container.search.QueryProfiles; +import com.yahoo.vespa.model.ml.OnnxModelInfo; + +import java.util.Map; + +/** + * Processes ONNX ranking features of the form: + * + * onnx("files/model.onnx", "path/to/output:1") + * + * And generates an "onnx-model" configuration as if it was defined in the profile: + * + * onnx-model files_model_onnx { + * file: "files/model.onnx" + * } + * + * Inputs and outputs are resolved in OnnxModelTypeResolver, which must be + * processed after this. + * + * @author lesters + */ +public class OnnxModelConfigGenerator extends Processor { + + public OnnxModelConfigGenerator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + if (profile.getFirstPhaseRanking() != null) { + process(profile.getFirstPhaseRanking().getRoot(), profile); + } + if (profile.getSecondPhaseRanking() != null) { + process(profile.getSecondPhaseRanking().getRoot(), profile); + } + for (Map.Entry<String, RankProfile.RankingExpressionFunction> function : profile.getFunctions().entrySet()) { + process(function.getValue().function().getBody().getRoot(), profile); + } + for (ReferenceNode feature : profile.getSummaryFeatures()) { + process(feature, profile); + } + } + } + + private void process(ExpressionNode node, RankProfile profile) { + if (node instanceof ReferenceNode) { + process((ReferenceNode)node, profile); + } else if (node instanceof CompositeNode) { + for (ExpressionNode child : ((CompositeNode) node).children()) { + process(child, profile); + } + } + } + + private void process(ReferenceNode feature, RankProfile profile) { + if (feature.getName().equals("onnxModel") || feature.getName().equals("onnx")) { + if (feature.getArguments().size() > 0) { + if (feature.getArguments().expressions().get(0) instanceof ConstantNode) { + ConstantNode node = (ConstantNode) feature.getArguments().expressions().get(0); + String path = OnnxModelTransformer.stripQuotes(node.toString()); + String modelConfigName = OnnxModelTransformer.asValidIdentifier(path); + + // Only add the configuration if the model can actually be found. + if ( ! OnnxModelInfo.modelExists(path, schema.applicationPackage())) { + path = ApplicationPackage.MODELS_DIR.append(path).toString(); + if ( ! OnnxModelInfo.modelExists(path, schema.applicationPackage())) { + return; + } + } + + OnnxModel onnxModel = profile.onnxModels().get(modelConfigName); + if (onnxModel == null) + profile.add(new OnnxModel(modelConfigName, path)); + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelTypeResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelTypeResolver.java new file mode 100644 index 00000000000..32229ea635b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/OnnxModelTypeResolver.java @@ -0,0 +1,40 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; +import com.yahoo.vespa.model.ml.OnnxModelInfo; + +/** + * Processes every "onnx-model" element in the schema. Associates model type + * information by retrieving from either the ONNX model file directly or from + * preprocessed information in ZK. Adds missing input and output mappings + * (assigning default names). + * + * Must be processed before RankingExpressingTypeResolver. + * + * @author lesters + */ +public class OnnxModelTypeResolver extends Processor { + + public OnnxModelTypeResolver(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + for (OnnxModel onnxModel : schema.declaredOnnxModels().values()) + onnxModel.setModelInfo(OnnxModelInfo.load(onnxModel.getFileName(), schema.applicationPackage())); + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + for (OnnxModel onnxModel : profile.declaredOnnxModels().values()) + onnxModel.setModelInfo(OnnxModelInfo.load(onnxModel.getFileName(), schema.applicationPackage())); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/OptimizeIlscript.java b/config-model/src/main/java/com/yahoo/schema/processing/OptimizeIlscript.java new file mode 100644 index 00000000000..a3b026fb724 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/OptimizeIlscript.java @@ -0,0 +1,38 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.indexinglanguage.ExpressionOptimizer; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Run ExpressionOptimizer on all scripts, to get rid of expressions that have no effect. + */ +public class OptimizeIlscript extends Processor { + + public OptimizeIlscript(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + ScriptExpression script = field.getIndexingScript(); + if (script == null) continue; + + field.setIndexingScript((ScriptExpression)new ExpressionOptimizer().convert(script)); + if ( ! field.getIndexingScript().toString().equals(script.toString())) { + info(schema, field, "Rewrote ilscript from:\n" + script.toString() + + "\nto\n" + field.getIndexingScript().toString()); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/PagedAttributeValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/PagedAttributeValidator.java new file mode 100644 index 00000000000..34bb6e1db2e --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/PagedAttributeValidator.java @@ -0,0 +1,66 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.Field; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Optional; + +/** + * Validates the 'paged' attribute setting and throws if specified on unsupported types. + * + * @author geirst + */ +public class PagedAttributeValidator extends Processor { + + public PagedAttributeValidator(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (!validate) { + return; + } + for (var field : schema.allConcreteFields()) { + for (var attribute : field.getAttributes().values()) { + if (attribute.isPaged()) { + validatePagedSetting(field, attribute); + } + } + } + } + + private void validatePagedSetting(Field field, Attribute attribute) { + if (!isSupportedType(attribute)) { + fail(schema, field, "The 'paged' attribute setting is not supported for non-dense tensor, predicate and reference types"); + } + } + + private boolean isSupportedType(Attribute attribute) { + var type = attribute.getType(); + return (type != Attribute.Type.PREDICATE) && + (type != Attribute.Type.REFERENCE) && + (isSupportedTensorType(attribute.tensorType())); + } + + private boolean isSupportedTensorType(Optional<TensorType> tensorType) { + if (tensorType.isPresent()) { + return isDenseTensorType(tensorType.get()); + } + return true; + } + + private boolean isDenseTensorType(TensorType type) { + return type.dimensions().stream().allMatch(d -> d.isIndexed()); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java new file mode 100644 index 00000000000..280eae3d88b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java @@ -0,0 +1,144 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.DataType; +import com.yahoo.document.datatypes.IntegerFieldValue; +import com.yahoo.document.datatypes.LongFieldValue; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.OptimizePredicateExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SetValueExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SetVarExpression; +import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.ArrayList; +import java.util.List; + +/** + * Validates the predicate fields. + * + * @author Lester Solbakken + */ +public class PredicateProcessor extends Processor { + + public PredicateProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getDataType() == DataType.PREDICATE) { + if (validate && field.doesIndexing()) { + fail(schema, field, "Use 'attribute' instead of 'index'. This will require a refeed if you have upgraded."); + } + if (field.doesAttributing()) { + Attribute attribute = field.getAttributes().get(field.getName()); + for (Index index : field.getIndices().values()) { + BooleanIndexDefinition booleanDefinition = index.getBooleanIndexDefiniton(); + if (validate && (booleanDefinition == null || ! booleanDefinition.hasArity())) { + fail(schema, field, "Missing arity value in predicate field."); + } + if (validate && (booleanDefinition.getArity() < 2)) { + fail(schema, field, "Invalid arity value in predicate field, must be greater than 1."); + } + double threshold = booleanDefinition.getDensePostingListThreshold(); + if (validate && (threshold <= 0 || threshold > 1)) { + fail(schema, field, "Invalid dense-posting-list-threshold value in predicate field. " + + "Value must be in range (0..1]."); + } + + attribute.setArity(booleanDefinition.getArity()); + attribute.setLowerBound(booleanDefinition.getLowerBound()); + attribute.setUpperBound(booleanDefinition.getUpperBound()); + + attribute.setDensePostingListThreshold(threshold); + addPredicateOptimizationIlScript(field, booleanDefinition); + } + DocumentSummary summary = schema.getSummariesInThis().get("attributeprefetch"); + if (summary != null) { + summary.remove(attribute.getName()); + } + for (SummaryField summaryField : schema.getSummaryFields(field)) { + summaryField.setTransform(SummaryTransform.NONE); + } + } + } else if (validate && field.getDataType().getPrimitiveType() == DataType.PREDICATE) { + fail(schema, field, "Collections of predicates are not allowed."); + } else if (validate && field.getDataType() == DataType.RAW && field.doesIndexing()) { + fail(schema, field, "Indexing of RAW fields is not supported."); + } else if (validate) { + // if field is not a predicate, disallow predicate-related index parameters + for (Index index : field.getIndices().values()) { + if (index.getBooleanIndexDefiniton() != null) { + BooleanIndexDefinition def = index.getBooleanIndexDefiniton(); + if (def.hasArity()) { + fail(schema, field, "Arity parameter is used only for predicate type fields."); + } else if (def.hasLowerBound() || def.hasUpperBound()) { + fail(schema, field, "Parameters lower-bound and upper-bound are used only for predicate type fields."); + } else if (def.hasDensePostingListThreshold()) { + fail(schema, field, "Parameter dense-posting-list-threshold is used only for predicate type fields."); + } + } + } + } + } + } + + private void addPredicateOptimizationIlScript(SDField field, BooleanIndexDefinition booleanIndexDefiniton) { + Expression script = field.getIndexingScript(); + if (script == null) return; + + script = new StatementExpression(makeSetPredicateVariablesScript(booleanIndexDefiniton), script); + + ExpressionConverter converter = new PredicateOutputTransformer(schema); + field.setIndexingScript(new ScriptExpression((StatementExpression)converter.convert(script))); + } + + private Expression makeSetPredicateVariablesScript(BooleanIndexDefinition options) { + List<Expression> expressions = new ArrayList<>(); + expressions.add(new SetValueExpression(new IntegerFieldValue(options.getArity()))); + expressions.add(new SetVarExpression("arity")); + if (options.hasLowerBound()) { + expressions.add(new SetValueExpression(new LongFieldValue(options.getLowerBound()))); + expressions.add(new SetVarExpression("lower_bound")); + } + if (options.hasUpperBound()) { + expressions.add(new SetValueExpression(new LongFieldValue(options.getUpperBound()))); + expressions.add(new SetVarExpression("upper_bound")); + } + return new StatementExpression(expressions); + } + + private static class PredicateOutputTransformer extends TypedTransformProvider { + + PredicateOutputTransformer(Schema schema) { + super(OptimizePredicateExpression.class, schema); + } + + @Override + protected boolean requiresTransform(Expression exp, DataType fieldType) { + return exp instanceof OutputExpression && fieldType == DataType.PREDICATE; + } + + @Override + protected Expression newTransform(DataType fieldType) { + return new OptimizePredicateExpression(); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/Processing.java b/config-model/src/main/java/com/yahoo/schema/processing/Processing.java new file mode 100644 index 00000000000..63eca2121c1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/Processing.java @@ -0,0 +1,152 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.processing.multifieldresolver.RankProfileTypeSettingsProcessor; +import com.yahoo.vespa.model.container.search.QueryProfiles; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.deploy.TestProperties; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; + +/** + * Executor of processors. This defines the right order of processor execution. + * + * @author bratseth + * @author bjorncs + */ +public class Processing { + + private final ModelContext.Properties properties; + + public Processing() { this.properties = new TestProperties(); } + + public Processing(ModelContext.Properties properties) { this.properties = properties; } + + private Collection<ProcessorFactory> processors() { + return Arrays.asList( + SearchMustHaveDocument::new, + UrlFieldValidator::new, + BuiltInFieldSets::new, + ReservedDocumentNames::new, + IndexFieldNames::new, + IntegerIndex2Attribute::new, + MakeAliases::new, + UriHack::new, + LiteralBoost::new, + TagType::new, + ValidateFieldTypesDocumentsOnly::new, + IndexingInputs::new, + OptimizeIlscript::new, + ValidateFieldWithIndexSettingsCreatesIndex::new, + AttributesImplicitWord::new, + MutableAttributes::new, + CreatePositionZCurve::new, + DictionaryProcessor::new, + WordMatch::new, + ImportedFieldsResolver::new, + ImplicitSummaries::new, + ImplicitSummaryFields::new, + AdjustPositionSummaryFields::new, + SummaryConsistency::new, + SummaryNamesFieldCollisions::new, + SummaryFieldsMustHaveValidSource::new, + MatchedElementsOnlyResolver::new, + AddAttributeTransformToSummaryOfImportedFields::new, + MakeDefaultSummaryTheSuperSet::new, + Bolding::new, + AttributeProperties::new, + SetRankTypeEmptyOnFilters::new, + SummaryDynamicStructsArrays::new, + StringSettingsOnNonStringFields::new, + IndexingOutputs::new, + ExactMatch::new, + NGramMatch::new, + TextMatch::new, + MultifieldIndexHarmonizer::new, + FilterFieldNames::new, + MatchConsistency::new, + ValidateStructTypeInheritance::new, + ValidateFieldTypes::new, + SummaryDiskAccessValidator::new, + DisallowComplexMapAndWsetKeyTypes::new, + SortingSettings::new, + FieldSetSettings::new, + AddExtraFieldsToDocument::new, + PredicateProcessor::new, + MatchPhaseSettingsValidator::new, + DiversitySettingsValidator::new, + TensorFieldProcessor::new, + RankProfileTypeSettingsProcessor::new, + ReferenceFieldsProcessor::new, + FastAccessValidator::new, + ReservedFunctionNames::new, + OnnxModelConfigGenerator::new, + OnnxModelTypeResolver::new, + RankingExpressionTypeResolver::new, + BoolAttributeValidator::new, + PagedAttributeValidator::new, + // These should be last: + IndexingValidation::new, + IndexingValues::new); + } + + /** Processors of rank profiles only (those who tolerate and do something useful when the search field is null) */ + private Collection<ProcessorFactory> rankProfileProcessors() { + return Arrays.asList( + RankProfileTypeSettingsProcessor::new, + ReservedFunctionNames::new, + RankingExpressionTypeResolver::new); + } + + private void runProcessor(Processor processor, boolean validate, boolean documentsOnly) { + processor.process(validate, documentsOnly, properties); + } + + /** + * Runs all search processors on the given {@link Schema} object. These will modify the search object, <b>possibly + * exchanging it with another</b>, as well as its document types. + * + * @param schema the search to process + * @param deployLogger the log to log messages and warnings for application deployment to + * @param rankProfileRegistry a {@link com.yahoo.schema.RankProfileRegistry} + * @param queryProfiles the query profiles contained in the application this search is part of + * @param processorsToSkip a set of processor classes we should not invoke in this. Useful for testing. + */ + public void process(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles, boolean validate, boolean documentsOnly, + Set<Class<? extends Processor>> processorsToSkip) + { + Collection<ProcessorFactory> factories = processors(); + factories.stream() + .map(factory -> factory.create(schema, deployLogger, rankProfileRegistry, queryProfiles)) + .filter(processor -> ! processorsToSkip.contains(processor.getClass())) + .forEach(processor -> runProcessor(processor, validate, documentsOnly)); + } + + /** + * Runs rank profiles processors only. + * + * @param deployLogger the log to log messages and warnings for application deployment to + * @param rankProfileRegistry a {@link com.yahoo.schema.RankProfileRegistry} + * @param queryProfiles the query profiles contained in the application this search is part of + */ + public void processRankProfiles(DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles, boolean validate, boolean documentsOnly) { + Collection<ProcessorFactory> factories = rankProfileProcessors(); + factories.stream() + .map(factory -> factory.create(null, deployLogger, rankProfileRegistry, queryProfiles)) + .forEach(processor -> runProcessor(processor, validate, documentsOnly)); + } + + @FunctionalInterface + public interface ProcessorFactory { + Processor create(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/Processor.java b/config-model/src/main/java/com/yahoo/schema/processing/Processor.java new file mode 100644 index 00000000000..9768f33c27d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/Processor.java @@ -0,0 +1,157 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.schema.Index; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Iterator; +import java.util.List; +import java.util.logging.Level; + +/** + * Abstract superclass of all search definition processors. + * + * @author bratseth + */ +public abstract class Processor { + + protected final Schema schema; + protected final DeployLogger deployLogger; + protected final RankProfileRegistry rankProfileRegistry; + protected final QueryProfiles queryProfiles; + + /** + * Base constructor + * + * @param schema the search to process + * @param deployLogger Logger du use when logging deploy output. + * @param rankProfileRegistry Registry with all rank profiles, used for lookup and insertion. + * @param queryProfiles The query profiles contained in the application this search is part of. + */ + public Processor(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + this.schema = schema; + this.deployLogger = deployLogger; + this.rankProfileRegistry = rankProfileRegistry; + this.queryProfiles = queryProfiles; + } + + /** + * Processes the input search definition by <b>modifying</b> the input search and its documents, and returns the + * input search definition. + * + * @param validate true to throw exceptions on validation errors, false to make the best possible effort + * at completing processing without throwing an exception. + * If we are not validating, emitting warnings have no effect and can (but must not) be skipped. + * @param documentsOnly true to skip processing (including validation, regardless of the validate setting) + * of aspects not relating to document definitions (e.g rank profiles) + */ + public abstract void process(boolean validate, boolean documentsOnly); + + /** + * As above, possibly with properties from a context. Override if needed. + **/ + public void process(boolean validate, boolean documentsOnly, ModelContext.Properties properties) { + process(validate, documentsOnly); + } + + /** + * Convenience method for adding a no-strings-attached implementation field for a regular field + * + * @param schema the search definition in question + * @param field the field to add an implementation field for + * @param suffix the suffix of the added implementation field (without the underscore) + * @param indexing the indexing statement of the field + * @param queryCommand the query command of the original field, or null if none + * @return the implementation field which is added to the search + */ + protected SDField addField(Schema schema, SDField field, String suffix, String indexing, String queryCommand) { + SDField implementationField = schema.getConcreteField(field.getName() + "_" + suffix); + if (implementationField != null) { + deployLogger.logApplicationPackage(Level.WARNING, "Implementation field " + implementationField + " added twice"); + } else { + implementationField = new SDField(schema.getDocument(), field.getName() + "_" + suffix, DataType.STRING); + } + implementationField.setRankType(RankType.EMPTY); + implementationField.setStemming(Stemming.NONE); + implementationField.getNormalizing().inferCodepoint(); + implementationField.parseIndexingScript(indexing); + String indexName = field.getName(); + String implementationIndexName = indexName + "_" + suffix; + Index implementationIndex = new Index(implementationIndexName); + schema.addIndex(implementationIndex); + if (queryCommand != null) { + field.addQueryCommand(queryCommand); + } + schema.addExtraField(implementationField); + schema.fieldSets().addBuiltInFieldSetItem(BuiltInFieldSets.INTERNAL_FIELDSET_NAME, implementationField.getName()); + return implementationField; + } + + /** + * Returns an iterator of all the rank settings with given type in all the rank profiles in this search + * definition. + */ + protected Iterator<RankProfile.RankSetting> matchingRankSettingsIterator( + Schema schema, RankProfile.RankSetting.Type type) + { + List<RankProfile.RankSetting> someRankSettings = new java.util.ArrayList<>(); + + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + for (Iterator j = profile.declaredRankSettingIterator(); j.hasNext(); ) { + RankProfile.RankSetting setting = (RankProfile.RankSetting)j.next(); + if (setting.getType().equals(type)) { + someRankSettings.add(setting); + } + } + } + return someRankSettings.iterator(); + } + + protected String formatError(String schemaName, String fieldName, String msg) { + return "For schema '" + schemaName + "', field '" + fieldName + "': " + msg; + } + + protected RuntimeException newProcessException(String schemaName, String fieldName, String msg) { + return new IllegalArgumentException(formatError(schemaName, fieldName, msg)); + } + + protected RuntimeException newProcessException(Schema schema, Field field, String msg) { + return newProcessException(schema.getName(), field.getName(), msg); + } + + public void fail(Schema schema, Field field, String msg) { + throw newProcessException(schema, field, msg); + } + + protected void warn(String schemaName, String fieldName, String message) { + String fullMsg = formatError(schemaName, fieldName, message); + deployLogger.logApplicationPackage(Level.WARNING, fullMsg); + } + + protected void warn(Schema schema, Field field, String message) { + warn(schema.getName(), field.getName(), message); + } + + protected void info(String schemaName, String fieldName, String message) { + String fullMsg = formatError(schemaName, fieldName, message); + deployLogger.logApplicationPackage(Level.INFO, fullMsg); + } + + protected void info(Schema schema, Field field, String message) { + info(schema.getName(), field.getName(), message); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java new file mode 100644 index 00000000000..07f79f16334 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/RankingExpressionTypeResolver.java @@ -0,0 +1,135 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.search.query.profile.QueryProfileRegistry; +import com.yahoo.schema.MapEvaluationTypeContext; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.searchlib.rankingexpression.ExpressionFunction; +import com.yahoo.searchlib.rankingexpression.RankingExpression; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode; +import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.evaluation.TypeContext; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.logging.Level; + +/** + * Resolves and assigns types to all functions in a ranking expression, and + * validates the types of all ranking expressions under a search instance: + * Some operators constrain the types of inputs, and first-and second-phase expressions + * must return scalar values. + * + * In addition, the existence of all referred attribute, query and constant + * features is ensured. + * + * @author bratseth + */ +public class RankingExpressionTypeResolver extends Processor { + + private final QueryProfileRegistry queryProfiles; + + public RankingExpressionTypeResolver(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + this.queryProfiles = queryProfiles.getRegistry(); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + + Set<Reference> warnedAbout = new HashSet<>(); + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + try { + resolveTypesIn(profile, validate, warnedAbout); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("In " + (schema != null ? schema + ", " : "") + profile, e); + } + } + } + + /** + * Resolves the types of all functions in the given profile + * + * @throws IllegalArgumentException if validate is true and the given rank profile does not produce valid types + */ + private void resolveTypesIn(RankProfile profile, boolean validate, Set<Reference> warnedAbout) { + MapEvaluationTypeContext context = profile.typeContext(queryProfiles); + for (Map.Entry<String, RankProfile.RankingExpressionFunction> function : profile.getFunctions().entrySet()) { + ExpressionFunction expressionFunction = function.getValue().function(); + if (hasUntypedArguments(expressionFunction)) continue; + + // Add any missing inputs for type resolution + for (String argument : expressionFunction.arguments()) { + Reference ref = Reference.fromIdentifier(argument); + if (context.getType(ref).equals(TensorType.empty)) { + context.setType(ref, expressionFunction.argumentTypes().get(argument)); + } + } + context.forgetResolvedTypes(); + + TensorType type = resolveType(expressionFunction.getBody(), "function '" + function.getKey() + "'", context); + function.getValue().setReturnType(type); + } + + if (validate) { + profile.getSummaryFeatures().forEach(f -> resolveType(f, "summary feature " + f, context)); + ensureValidDouble(profile.getFirstPhaseRanking(), "first-phase expression", context); + ensureValidDouble(profile.getSecondPhaseRanking(), "second-phase expression", context); + if ( ( context.tensorsAreUsed() || profile.isStrict()) + && ! context.queryFeaturesNotDeclared().isEmpty() + && ! warnedAbout.containsAll(context.queryFeaturesNotDeclared())) { + if (profile.isStrict()) + throw new IllegalArgumentException(profile + " is strict but is missing a query profile type " + + "declaration of features " + context.queryFeaturesNotDeclared()); + else + deployLogger.logApplicationPackage(Level.WARNING, "The following query features used in " + profile + + " are not declared in query profile " + + "types and will be interpreted as scalars, not tensors: " + + context.queryFeaturesNotDeclared()); + warnedAbout.addAll(context.queryFeaturesNotDeclared()); + } + } + } + + private boolean hasUntypedArguments(ExpressionFunction function) { + return function.arguments().size() > function.argumentTypes().size(); + } + + private TensorType resolveType(RankingExpression expression, String expressionDescription, TypeContext<Reference> context) { + if (expression == null) return null; + return resolveType(expression.getRoot(), expressionDescription, context); + } + + private TensorType resolveType(ExpressionNode expression, String expressionDescription, TypeContext<Reference> context) { + TensorType type; + try { + type = expression.type(context); + } + catch (IllegalArgumentException e) { + throw new IllegalArgumentException("The " + expressionDescription + " is invalid", e); + } + if (type == null) // Not expected to happen + throw new IllegalStateException("Could not determine the type produced by " + expressionDescription); + return type; + } + + private void ensureValidDouble(RankingExpression expression, String expressionDescription, TypeContext<Reference> context) { + if (expression == null) return; + TensorType type = resolveType(expression, expressionDescription, context); + if ( ! type.equals(TensorType.empty)) + throw new IllegalArgumentException("The " + expressionDescription + " must produce a double " + + "(a tensor with no dimensions), but produces " + type); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ReferenceFieldsProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/ReferenceFieldsProcessor.java new file mode 100644 index 00000000000..43e39b1e546 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ReferenceFieldsProcessor.java @@ -0,0 +1,65 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Class that processes reference fields and removes attribute aspect of such fields from summary. + * + * A document summary for a reference field should always be fetched from the document instance in back-end + * as the attribute vector does not store the original document id string. + * + * @author geirst + */ +public class ReferenceFieldsProcessor extends Processor { + + public ReferenceFieldsProcessor(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + clearSummaryAttributeAspectForConcreteFields(); + clearSummaryAttributeAspectForExplicitSummaryFields(); + } + + private void clearSummaryAttributeAspectForExplicitSummaryFields() { + for (DocumentSummary docSum : schema.getSummaries().values()) { + docSum.getSummaryFields().values().stream() + .filter(summaryField -> summaryField.getDataType() instanceof NewDocumentReferenceDataType) + .forEach(summaryField -> summaryField.setTransform(SummaryTransform.NONE)); + } + } + + private void clearSummaryAttributeAspectForConcreteFields() { + for (SDField field : schema.allConcreteFields()) { + if (field.getDataType() instanceof NewDocumentReferenceDataType) { + removeFromAttributePrefetchSummaryClass(field); + clearSummaryTransformOnSummaryFields(field); + } + } + } + + private void removeFromAttributePrefetchSummaryClass(SDField field) { + DocumentSummary summary = schema.getSummariesInThis().get("attributeprefetch"); + if (summary != null) { + summary.remove(field.getName()); + } + } + + private void clearSummaryTransformOnSummaryFields(SDField field) { + schema.getSummaryFields(field).forEach(summaryField -> summaryField.setTransform(SummaryTransform.NONE)); + } + +} + diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ReservedDocumentNames.java b/config-model/src/main/java/com/yahoo/schema/processing/ReservedDocumentNames.java new file mode 100644 index 00000000000..7eaf690d899 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ReservedDocumentNames.java @@ -0,0 +1,39 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashSet; +import java.util.Set; + +/** + * @author Simon Thoresen Hult + */ +public class ReservedDocumentNames extends Processor { + + private static final Set<String> RESERVED_NAMES = new HashSet<>(); + + static { + for (SDDocumentType dataType : SDDocumentType.VESPA_DOCUMENT.getTypes()) { + RESERVED_NAMES.add(dataType.getName()); + } + } + + public ReservedDocumentNames(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + String docName = schema.getDocument().getName(); + if (RESERVED_NAMES.contains(docName)) + throw new IllegalArgumentException("For " + schema + ": Document name '" + docName + "' is reserved."); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ReservedFunctionNames.java b/config-model/src/main/java/com/yahoo/schema/processing/ReservedFunctionNames.java new file mode 100644 index 00000000000..1ec4d5b58f2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ReservedFunctionNames.java @@ -0,0 +1,56 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.google.common.collect.ImmutableSet; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParserConstants; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Set; +import java.util.logging.Level; + +/** + * Issues a warning if some function has a reserved name. This is not necessarily + * an error, as a rank profile function can shadow a built-in function. + * + * @author lesters + */ +public class ReservedFunctionNames extends Processor { + + private static Set<String> reservedNames = getReservedNames(); + + public ReservedFunctionNames(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + if (documentsOnly) return; + + for (RankProfile rp : rankProfileRegistry.all()) { + for (String functionName : rp.getFunctions().keySet()) { + if (reservedNames.contains(functionName)) { + deployLogger.logApplicationPackage(Level.WARNING, "Function '" + functionName + "' " + + "in rank profile '" + rp.name() + "' " + + "has a reserved name. This might mean that the function shadows " + + "the built-in function with the same name." + ); + } + } + } + } + + private static ImmutableSet<String> getReservedNames() { + ImmutableSet.Builder<String> names = ImmutableSet.builder(); + for (String token : RankingExpressionParserConstants.tokenImage) { + String tokenWithoutQuotes = token.substring(1, token.length()-1); + names.add(tokenWithoutQuotes); + } + return names.build(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SearchMustHaveDocument.java b/config-model/src/main/java/com/yahoo/schema/processing/SearchMustHaveDocument.java new file mode 100644 index 00000000000..b90a5fdec98 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SearchMustHaveDocument.java @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * A search must have a document definition of the same name inside of it, otherwise crashes may occur as late as + * during feeding + * + * @author Vegard Havdal + */ +public class SearchMustHaveDocument extends Processor { + + public SearchMustHaveDocument(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + if (schema.getDocument() == null) + throw new IllegalArgumentException("For " + schema + + ": A search specification must have an equally named document inside of it."); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SetRankTypeEmptyOnFilters.java b/config-model/src/main/java/com/yahoo/schema/processing/SetRankTypeEmptyOnFilters.java new file mode 100644 index 00000000000..f84d6f19145 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SetRankTypeEmptyOnFilters.java @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * All rank: filter fields should have rank type empty. + * + * @author bratseth + */ +public class SetRankTypeEmptyOnFilters extends Processor { + + public SetRankTypeEmptyOnFilters(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getRanking().isFilter()) { + field.setRankType(RankType.EMPTY); + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SortingSettings.java b/config-model/src/main/java/com/yahoo/schema/processing/SortingSettings.java new file mode 100644 index 00000000000..e0dfbab9780 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SortingSettings.java @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Sorting; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Validate conflicting settings for sorting + * + * @author Vegard Havdal + */ +public class SortingSettings extends Processor { + + public SortingSettings(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (SDField field : schema.allConcreteFields()) { + for (Attribute attribute : field.getAttributes().values()) { + Sorting sorting = attribute.getSorting(); + if (sorting.getFunction() != Sorting.Function.UCA) { + if (sorting.getStrength()!=null && sorting.getStrength() != Sorting.Strength.PRIMARY) { + warn(schema, field, "Sort strength only works for sort function 'uca'."); + } + if (sorting.getLocale() != null && ! "".equals(sorting.getLocale())) { + warn(schema, field, "Sort locale only works for sort function 'uca'."); + } + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/StringSettingsOnNonStringFields.java b/config-model/src/main/java/com/yahoo/schema/processing/StringSettingsOnNonStringFields.java new file mode 100644 index 00000000000..8ca0b595907 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/StringSettingsOnNonStringFields.java @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.NumericDataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +public class StringSettingsOnNonStringFields extends Processor { + + public StringSettingsOnNonStringFields(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (SDField field : schema.allConcreteFields()) { + if ( ! doCheck(field)) continue; + if (field.getMatching().isTypeUserSet()) { + warn(schema, field, "Matching type " + field.getMatching().getType() + + " is only allowed for string fields."); + } + if (field.getRanking().isLiteral()) { + warn(schema, field, "Rank type literal only applies to string fields"); + } + } + } + + private boolean doCheck(SDField field) { + if (field.getDataType() instanceof NumericDataType) return true; + if (field.getDataType() instanceof CollectionDataType) { + if (((CollectionDataType)field.getDataType()).getNestedType() instanceof NumericDataType) { + return true; + } + } + return false; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryConsistency.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryConsistency.java new file mode 100644 index 00000000000..4fb45c3c68f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryConsistency.java @@ -0,0 +1,131 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isComplexFieldWithOnlyStructFieldAttributes; + +/** + * Ensure that summary field transforms for fields having the same name + * are consistent across summary classes + * + * @author bratseth + */ +public class SummaryConsistency extends Processor { + + public SummaryConsistency(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (DocumentSummary summary : schema.getSummaries().values()) { + if (summary.getName().equals("default")) continue; + + for (SummaryField summaryField : summary.getSummaryFields().values()) { + assertConsistency(summaryField, schema, validate); + makeAttributeTransformIfAppropriate(summaryField, schema); + makeAttributeCombinerTransformIfAppropriate(summaryField, schema); + } + } + } + + private void assertConsistency(SummaryField summaryField, Schema schema, boolean validate) { + // Compare to default: + SummaryField existingDefault = schema.getSummariesInThis().get("default").getSummaryField(summaryField.getName()); + if (existingDefault != null) { + if (validate) + assertConsistentTypes(existingDefault, summaryField); + makeConsistentWithDefaultOrThrow(existingDefault, summaryField); + } + else { + // If no default, compare to whichever definition of the field + SummaryField existing = schema.getExplicitSummaryField(summaryField.getName()); + if (existing == null) return; + if (validate) + assertConsistentTypes(existing, summaryField); + makeConsistentOrThrow(existing, summaryField, schema); + } + } + + /** If the source is an attribute, make this use the attribute transform */ + private void makeAttributeTransformIfAppropriate(SummaryField summaryField, Schema schema) { + if (summaryField.getTransform() != SummaryTransform.NONE) return; + Attribute attribute = schema.getAttribute(summaryField.getSingleSource()); + if (attribute == null) return; + summaryField.setTransform(SummaryTransform.ATTRIBUTE); + } + + /** If the source is a complex field with only struct field attributes then make this use the attribute combiner transform */ + private void makeAttributeCombinerTransformIfAppropriate(SummaryField summaryField, Schema schema) { + if (summaryField.getTransform() == SummaryTransform.NONE) { + String source_field_name = summaryField.getSingleSource(); + ImmutableSDField source = schema.getField(source_field_name); + if (source != null && isComplexFieldWithOnlyStructFieldAttributes(source)) { + summaryField.setTransform(SummaryTransform.ATTRIBUTECOMBINER); + } + } + } + + private void assertConsistentTypes(SummaryField existing, SummaryField seen) { + if (existing.getDataType() instanceof WeightedSetDataType && seen.getDataType() instanceof WeightedSetDataType && + ((WeightedSetDataType)existing.getDataType()).getNestedType().equals(((WeightedSetDataType)seen.getDataType()).getNestedType())) + return; // Disregard create-if-nonexistent and create-if-zero distinction + if ( ! compatibleTypes(seen.getDataType(), existing.getDataType())) + throw new IllegalArgumentException(existing.toLocateString() + " is inconsistent with " + + seen.toLocateString() + ": All declarations of the same summary field must have the same type"); + } + + private boolean compatibleTypes(DataType summaryType, DataType existingType) { + if (summaryType instanceof TensorDataType && existingType instanceof TensorDataType) { + return summaryType.isAssignableFrom(existingType); // TODO: Just do this for all types + } + return summaryType.equals(existingType); + } + + private void makeConsistentOrThrow(SummaryField field1, SummaryField field2, Schema schema) { + if (field2.getTransform() == SummaryTransform.ATTRIBUTE && field1.getTransform() == SummaryTransform.NONE) { + Attribute attribute = schema.getAttribute(field1.getName()); + if (attribute != null) { + field1.setTransform(SummaryTransform.ATTRIBUTE); + } + } + + if (field2.getTransform().equals(SummaryTransform.NONE)) { + field2.setTransform(field1.getTransform()); + } + else { // New field sets an explicit transform - must be the same + assertEqualTransform(field1,field2); + } + } + + private void makeConsistentWithDefaultOrThrow(SummaryField defaultField, SummaryField newField) { + if (newField.getTransform().equals(SummaryTransform.NONE)) { + newField.setTransform(defaultField.getTransform()); + } + else { // New field sets an explicit transform - must be the same + assertEqualTransform(defaultField,newField); + } + } + + private void assertEqualTransform(SummaryField field1, SummaryField field2) { + if ( ! field2.getTransform().equals(field1.getTransform())) { + throw new IllegalArgumentException("Conflicting summary transforms. " + field2 + " is already defined as " + + field1 + ". A field with the same name " + + "can not have different transforms in different summary classes"); + } + } + + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryDiskAccessValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryDiskAccessValidator.java new file mode 100644 index 00000000000..40c38a350b0 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryDiskAccessValidator.java @@ -0,0 +1,73 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.SummaryClass; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Optional; +import java.util.logging.Level; + +import static com.yahoo.schema.document.ComplexAttributeFieldUtils.isComplexFieldWithOnlyStructFieldAttributes; + +/** + * Emits a warning for summaries which accesses disk. + * + * @author bratseth + */ +public class SummaryDiskAccessValidator extends Processor { + + public SummaryDiskAccessValidator(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + if (documentsOnly) return; + + for (DocumentSummary summary : schema.getSummaries().values()) { + for (SummaryField summaryField : summary.getSummaryFields().values()) { + for (SummaryField.Source source : summaryField.getSources()) { + ImmutableSDField field = schema.getField(source.getName()); + if (field == null) + field = findFieldProducingSummaryField(source.getName(), schema).orElse(null); + if (field == null && ! source.getName().equals(SummaryClass.DOCUMENT_ID_FIELD)) + throw new IllegalArgumentException(summaryField + " in " + summary + " references " + + source + ", but this field does not exist"); + if ( ! isInMemory(field, summaryField) && ! summary.isFromDisk()) { + deployLogger.logApplicationPackage(Level.WARNING, summaryField + " in " + summary + " references " + + source + ", which is not an attribute: Using this " + + "summary will cause disk accesses. " + + "Set 'from-disk' on this summary class to silence this warning."); + } + } + } + } + } + + private boolean isInMemory(ImmutableSDField field, SummaryField summaryField) { + if (field == null) return false; // For DOCUMENT_ID_FIELD, which may be implicit, but is then not in memory + if (isComplexFieldWithOnlyStructFieldAttributes(field) && + (summaryField.getTransform() == SummaryTransform.ATTRIBUTECOMBINER || + summaryField.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER)) { + return true; + } + return field.doesAttributing(); + } + + private Optional<ImmutableSDField> findFieldProducingSummaryField(String name, Schema schema) { + return schema.allFields().filter(field -> field.getSummaryFields().get(name) != null).findAny(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryDynamicStructsArrays.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryDynamicStructsArrays.java new file mode 100644 index 00000000000..ed1f47611eb --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryDynamicStructsArrays.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.*; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Fail if: + * An SD field explicitly says summary:dynamic , but the field is wset, array or struct. + * If there is an explicitly defined summary class, saying dynamic in one of its summary + * fields is always legal. + * + * @author Vegard Havdal + */ +public class SummaryDynamicStructsArrays extends Processor { + + public SummaryDynamicStructsArrays(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (SDField field : schema.allConcreteFields()) { + DataType type = field.getDataType(); + if (type instanceof ArrayDataType || type instanceof WeightedSetDataType || type instanceof StructDataType) { + for (SummaryField sField : field.getSummaryFields().values()) { + if (sField.getTransform().equals(SummaryTransform.DYNAMICTEASER)) { + throw new IllegalArgumentException("For field '"+field.getName()+"': dynamic summary is illegal " + + "for fields of type struct, array or weighted set. Use an " + + "explicit summary class with explicit summary fields sourcing" + + " from the array/struct/weighted set."); + } + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryFieldsMustHaveValidSource.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryFieldsMustHaveValidSource.java new file mode 100644 index 00000000000..c8f201e2915 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryFieldsMustHaveValidSource.java @@ -0,0 +1,81 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.derived.SummaryClass; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Verifies that the source fields actually refers to a valid field. + * + * @author baldersheim + */ +public class SummaryFieldsMustHaveValidSource extends Processor { + + SummaryFieldsMustHaveValidSource(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (DocumentSummary summary : schema.getSummaries().values()) { + for (SummaryField summaryField : summary.getSummaryFields().values()) { + if (summaryField.getSources().isEmpty()) { + if ((summaryField.getTransform() != SummaryTransform.RANKFEATURES) && + (summaryField.getTransform() != SummaryTransform.SUMMARYFEATURES)) + { + verifySource(summaryField.getName(), summaryField, summary); + } + } else if (summaryField.getSourceCount() == 1) { + verifySource(summaryField.getSingleSource(), summaryField, summary); + } else { + for (SummaryField.Source source : summaryField.getSources()) { + if ( ! source.getName().equals(summaryField.getName()) ) { + verifySource(source.getName(), summaryField, summary); + } + } + } + } + } + + } + + private boolean isValid(String source, SummaryField summaryField, DocumentSummary summary) { + return isDocumentField(source) || + (isNotInThisSummaryClass(summary, source) && isSummaryField(source)) || + (isInThisSummaryClass(summary, source) && !source.equals(summaryField.getName())) || + (SummaryClass.DOCUMENT_ID_FIELD.equals(source)); + } + + private void verifySource(String source, SummaryField summaryField, DocumentSummary summary) { + if ( ! isValid(source, summaryField, summary) ) { + throw new IllegalArgumentException("For " + schema + ", summary class '" + + summary.getName() + "'," + " summary field '" + summaryField.getName() + + "': there is no valid source '" + source + "'."); + } + } + + private static boolean isNotInThisSummaryClass(DocumentSummary summary, String name) { + return summary.getSummaryField(name) == null; + } + + private static boolean isInThisSummaryClass(DocumentSummary summary, String name) { + return summary.getSummaryField(name) != null; + } + + private boolean isDocumentField(String name) { + return schema.getField(name) != null; + } + + private boolean isSummaryField(String name) { + return schema.getSummaryField(name) != null; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/SummaryNamesFieldCollisions.java b/config-model/src/main/java/com/yahoo/schema/processing/SummaryNamesFieldCollisions.java new file mode 100644 index 00000000000..da5dfeb407b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/SummaryNamesFieldCollisions.java @@ -0,0 +1,60 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import java.util.HashMap; +import java.util.Map; + +import com.yahoo.collections.Pair; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryField.Source; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Verifies that equally named summary fields in different summary classes don't use different fields for source. + * The summarymap config doesn't model this. + * + * @author Vegard Havdal + */ +public class SummaryNamesFieldCollisions extends Processor { + + public SummaryNamesFieldCollisions(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + Map<String, Pair<String, String>> fieldToClassAndSource = new HashMap<>(); + for (DocumentSummary summary : schema.getSummaries().values()) { + if ("default".equals(summary.getName())) continue; + for (SummaryField summaryField : summary.getSummaryFields().values()) { + if (summaryField.isImplicit()) continue; + Pair<String, String> prevClassAndSource = fieldToClassAndSource.get(summaryField.getName()); + for (Source source : summaryField.getSources()) { + if (prevClassAndSource!=null) { + String prevClass = prevClassAndSource.getFirst(); + String prevSource = prevClassAndSource.getSecond(); + if ( ! prevClass.equals(summary.getName())) { + if ( ! prevSource.equals(source.getName())) { + throw new IllegalArgumentException("For " + schema + + ", summary class '" + summary.getName() + "'," + + " summary field '" + summaryField.getName() + "':" + + " Can not use source '" + source.getName() + + "' for this summary field, an equally named field in summary class '" + + prevClass + "' uses a different source: '" + prevSource + "'."); + } + } + } else { + fieldToClassAndSource.put(summaryField.getName(), new Pair<>(summary.getName(), source.getName())); + } + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TagType.java b/config-model/src/main/java/com/yahoo/schema/processing/TagType.java new file mode 100644 index 00000000000..f511d572bc6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/TagType.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.*; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * The implementation of the tag datatype + * + * @author bratseth + */ +public class TagType extends Processor { + + public TagType(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getDataType() instanceof WeightedSetDataType && ((WeightedSetDataType)field.getDataType()).isTag()) + implementTagType(field); + } + } + + private void implementTagType(SDField field) { + field.setDataType(DataType.getWeightedSet(DataType.STRING, true, true)); + // Don't set matching and ranking if this field is not attribute nor index + if (!field.doesIndexing() && !field.doesAttributing()) return; + Matching m = field.getMatching(); + if ( ! m.isTypeUserSet()) + m.setType(MatchType.WORD); + if (field.getRankType() == null || field.getRankType() == RankType.DEFAULT) + field.setRankType((RankType.TAGS)); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TensorFieldProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/TensorFieldProcessor.java new file mode 100644 index 00000000000..e0ce9917179 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/TensorFieldProcessor.java @@ -0,0 +1,118 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.HnswIndexParams; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.SDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Class that processes and validates tensor fields. + * + * @author geirst + */ +public class TensorFieldProcessor extends Processor { + + public TensorFieldProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (var field : schema.allConcreteFields()) { + if ( field.getDataType() instanceof TensorDataType ) { + if (validate) { + validateIndexingScripsForTensorField(field); + validateAttributeSettingForTensorField(field); + validateHnswIndexParametersRequiresIndexing(field); + } + processIndexSettingsForTensorField(field, validate); + } + else if (field.getDataType() instanceof CollectionDataType){ + if (validate) { + validateDataTypeForCollectionField(field); + } + } + } + } + + private void validateIndexingScripsForTensorField(SDField field) { + if (field.doesIndexing() && !isTensorTypeThatSupportsHnswIndex(field)) { + fail(schema, field, "A tensor of type '" + tensorTypeToString(field) + "' does not support having an 'index'. " + + "Currently, only tensors with 1 indexed dimension supports that."); + } + } + + private boolean isTensorTypeThatSupportsHnswIndex(ImmutableSDField field) { + var type = ((TensorDataType)field.getDataType()).getTensorType(); + // Tensors with 1 indexed dimension supports a hnsw index (used for approximate nearest neighbor search). + if ((type.dimensions().size() == 1) && + type.dimensions().get(0).isIndexed()) { + return true; + } + return false; + } + + private boolean isTensorTypeThatSupportsDirectStore(ImmutableSDField field) { + var type = ((TensorDataType)field.getDataType()).getTensorType(); + // Tensors with at least one mapped/sparse dimensions can be "direct" + // (currenty triggered by fast-search flag) + for (var dim : type.dimensions()) { + if (dim.isMapped()) { + return true; + } + } + return false; + } + + private String tensorTypeToString(ImmutableSDField field) { + return ((TensorDataType)field.getDataType()).getTensorType().toString(); + } + + private void validateAttributeSettingForTensorField(SDField field) { + if (field.doesAttributing()) { + var attribute = field.getAttributes().get(field.getName()); + if (attribute != null && attribute.isFastSearch()) { + if (! isTensorTypeThatSupportsDirectStore(field)) { + fail(schema, field, "An attribute of type 'tensor' cannot be 'fast-search'."); + } + } + } + } + + private void validateHnswIndexParametersRequiresIndexing(SDField field) { + var index = field.getIndex(field.getName()); + if (index != null && index.getHnswIndexParams().isPresent() && !field.doesIndexing()) { + fail(schema, field, "A tensor that specifies hnsw index parameters must also specify 'index' in 'indexing'"); + } + } + + private void processIndexSettingsForTensorField(SDField field, boolean validate) { + if (!field.doesIndexing()) { + return; + } + if (isTensorTypeThatSupportsHnswIndex(field)) { + if (validate && !field.doesAttributing()) { + fail(schema, field, "A tensor that has an index must also be an attribute."); + } + var index = field.getIndex(field.getName()); + // TODO: Calculate default params based on tensor dimension size + var params = new HnswIndexParams(); + if (index != null) { + params = params.overrideFrom(index.getHnswIndexParams()); + } + field.getAttribute().setHnswIndexParams(params); + } + } + + private void validateDataTypeForCollectionField(SDField field) { + if (((CollectionDataType)field.getDataType()).getNestedType() instanceof TensorDataType) + fail(schema, field, "A field with collection type of tensor is not supported. Use simple type 'tensor' instead."); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java new file mode 100644 index 00000000000..1783a3c7c63 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java @@ -0,0 +1,127 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.ExpressionVisitor; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.ForEachExpression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; +import com.yahoo.vespa.indexinglanguage.expressions.TokenizeExpression; +import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Set; +import java.util.TreeSet; + +/** + * @author Simon Thoresen Hult + */ +public class TextMatch extends Processor { + + public TextMatch(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.getMatching().getType() != MatchType.TEXT) continue; + + ScriptExpression script = field.getIndexingScript(); + if (script == null) continue; + + DataType fieldType = field.getDataType(); + if (fieldType instanceof CollectionDataType) { + fieldType = ((CollectionDataType)fieldType).getNestedType(); + } + if (fieldType != DataType.STRING) continue; + + Set<String> dynamicSummary = new TreeSet<>(); + Set<String> staticSummary = new TreeSet<>(); + new IndexingOutputs(schema, deployLogger, rankProfileRegistry, queryProfiles).findSummaryTo(schema, + field, + dynamicSummary, + staticSummary); + MyVisitor visitor = new MyVisitor(dynamicSummary); + visitor.visit(script); + if ( ! visitor.requiresTokenize) continue; + + ExpressionConverter converter = new MyStringTokenizer(schema, findAnnotatorConfig(schema, field)); + field.setIndexingScript((ScriptExpression)converter.convert(script)); + } + } + + private AnnotatorConfig findAnnotatorConfig(Schema schema, SDField field) { + AnnotatorConfig ret = new AnnotatorConfig(); + Stemming activeStemming = field.getStemming(); + if (activeStemming == null) { + activeStemming = schema.getStemming(); + } + ret.setStemMode(activeStemming.toStemMode()); + ret.setRemoveAccents(field.getNormalizing().doRemoveAccents()); + if ((field.getMatching() != null) && (field.getMatching().maxLength() != null)) { + ret.setMaxTokenLength(field.getMatching().maxLength()); + } + return ret; + } + + private static class MyVisitor extends ExpressionVisitor { + + final Set<String> dynamicSummaryFields; + boolean requiresTokenize = false; + + MyVisitor(Set<String> dynamicSummaryFields) { + this.dynamicSummaryFields = dynamicSummaryFields; + } + + @Override + protected void doVisit(Expression exp) { + if (exp instanceof IndexExpression) { + requiresTokenize = true; + } + if (exp instanceof SummaryExpression && + dynamicSummaryFields.contains(((SummaryExpression)exp).getFieldName())) + { + requiresTokenize = true; + } + } + + } + + private static class MyStringTokenizer extends TypedTransformProvider { + + final AnnotatorConfig annotatorCfg; + + MyStringTokenizer(Schema schema, AnnotatorConfig annotatorCfg) { + super(TokenizeExpression.class, schema); + this.annotatorCfg = annotatorCfg; + } + + @Override + protected boolean requiresTransform(Expression exp, DataType fieldType) { + return exp instanceof OutputExpression; + } + + @Override + protected Expression newTransform(DataType fieldType) { + Expression exp = new TokenizeExpression(null, annotatorCfg); + if (fieldType instanceof CollectionDataType) { + exp = new ForEachExpression(exp); + } + return exp; + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java new file mode 100644 index 00000000000..1836cd631ad --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/TypedTransformProvider.java @@ -0,0 +1,62 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.document.DataType; +import com.yahoo.document.Field; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.indexinglanguage.ValueTransformProvider; +import com.yahoo.vespa.indexinglanguage.expressions.*; + +/** + * @author Simon Thoresen Hult + */ +public abstract class TypedTransformProvider extends ValueTransformProvider { + + private final Schema schema; + private DataType fieldType; + + TypedTransformProvider(Class<? extends Expression> transformClass, Schema schema) { + super(transformClass); + this.schema = schema; + } + + @Override + protected final boolean requiresTransform(Expression exp) { + if (exp instanceof OutputExpression) { + String fieldName = ((OutputExpression)exp).getFieldName(); + if (exp instanceof AttributeExpression) { + Attribute attribute = schema.getAttribute(fieldName); + if (attribute == null) + throw new IllegalArgumentException("Attribute '" + fieldName + "' not found."); + fieldType = attribute.getDataType(); + } + else if (exp instanceof IndexExpression) { + Field field = schema.getConcreteField(fieldName); + if (field == null) + throw new IllegalArgumentException("Index field '" + fieldName + "' not found."); + fieldType = field.getDataType(); + } + else if (exp instanceof SummaryExpression) { + Field field = schema.getSummaryField(fieldName); + if (field == null) + throw new IllegalArgumentException("Summary field '" + fieldName + "' not found."); + fieldType = field.getDataType(); + } + else { + throw new UnsupportedOperationException(); + } + } + return requiresTransform(exp, fieldType); + } + + @Override + protected final Expression newTransform() { + return newTransform(fieldType); + } + + protected abstract boolean requiresTransform(Expression exp, DataType fieldType); + + protected abstract Expression newTransform(DataType fieldType); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java b/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java new file mode 100644 index 00000000000..a4773a42ed6 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/UriHack.java @@ -0,0 +1,77 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.ArrayDataType; +import com.yahoo.document.CollectionDataType; +import com.yahoo.document.DataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Arrays; +import java.util.List; + +/** + * @author baldersheim + */ +public class UriHack extends Processor { + + private static final List<String> URL_SUFFIX = + Arrays.asList("scheme", "host", "port", "path", "query", "fragment", "hostname"); + + UriHack(Schema schema, + DeployLogger deployLogger, + RankProfileRegistry rankProfileRegistry, + QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + if (field.doesIndexing()) { + DataType fieldType = field.getDataType(); + if (fieldType instanceof CollectionDataType) { + fieldType = ((CollectionDataType)fieldType).getNestedType(); + } + if (fieldType == DataType.URI) { + processField(schema, field); + } + } + } + } + + private void processField(Schema schema, SDField uriField) { + String uriName = uriField.getName(); + uriField.setStemming(Stemming.NONE); + DataType generatedType = DataType.STRING; + if (uriField.getDataType() instanceof ArrayDataType) { + generatedType = new ArrayDataType(DataType.STRING); + } + else if (uriField.getDataType() instanceof WeightedSetDataType) { + WeightedSetDataType wdt = (WeightedSetDataType) uriField.getDataType(); + generatedType = new WeightedSetDataType(DataType.STRING, wdt.createIfNonExistent(), wdt.removeIfZero()); + } + + for (String suffix : URL_SUFFIX) { + String partName = uriName + "." + suffix; + // I wonder if this is explicit in qrs or implicit in backend? + // search.addFieldSetItem(uriName, partName); + SDField partField = new SDField(schema.getDocument(), partName, generatedType); + partField.setIndexStructureField(uriField.doesIndexing()); + partField.setRankType(uriField.getRankType()); + partField.setStemming(Stemming.NONE); + partField.getNormalizing().inferLowercase(); + if (uriField.getIndex(suffix) != null) { + partField.addIndex(uriField.getIndex(suffix)); + } + schema.addExtraField(partField); + schema.fieldSets().addBuiltInFieldSetItem(BuiltInFieldSets.INTERNAL_FIELDSET_NAME, partField.getName()); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/UrlFieldValidator.java b/config-model/src/main/java/com/yahoo/schema/processing/UrlFieldValidator.java new file mode 100644 index 00000000000..63d4a342c72 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/UrlFieldValidator.java @@ -0,0 +1,34 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * @author bratseth + */ +public class UrlFieldValidator extends Processor { + + public UrlFieldValidator(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + for (ImmutableSDField field : schema.allConcreteFields()) { + if ( ! field.getDataType().equals(DataType.URI)) continue; + + if (field.doesAttributing()) + throw new IllegalArgumentException("Error in " + field + " in " + schema + ": " + + "uri type fields cannot be attributes"); + } + + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypes.java b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypes.java new file mode 100644 index 00000000000..2327cf4d9c9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypes.java @@ -0,0 +1,83 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.document.TensorDataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashMap; +import java.util.Map; + +/** + * This Processor makes sure all fields with the same name have the same {@link DataType}. This check + * explicitly disregards whether a field is an index field, an attribute or a summary field. This is a requirement if we + * hope to move to a model where index fields, attributes and summary fields share a common field class. + * + * @author Simon Thoresen Hult + */ +public class ValidateFieldTypes extends Processor { + + public ValidateFieldTypes(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (!validate) return; + + String searchName = schema.getName(); + Map<String, DataType> seenFields = new HashMap<>(); + verifySearchAndDocFields(searchName, seenFields); + verifySummaryFields(searchName, seenFields); + } + + final protected void verifySearchAndDocFields(String searchName, Map<String, DataType> seenFields) { + schema.allFields().forEach(field -> { + checkFieldType(searchName, "index field", field.getName(), field.getDataType(), seenFields); + for (Map.Entry<String, Attribute> entry : field.getAttributes().entrySet()) { + checkFieldType(searchName, "attribute", entry.getKey(), entry.getValue().getDataType(), seenFields); + } + }); + + } + final protected void verifySummaryFields(String searchName, Map<String, DataType> seenFields) { + for (DocumentSummary summary : schema.getSummaries().values()) { + for (SummaryField field : summary.getSummaryFields().values()) { + checkFieldType(searchName, "summary field", field.getName(), field.getDataType(), seenFields); + } + } + } + + private void checkFieldType(String searchName, String fieldDesc, String fieldName, DataType fieldType, + Map<String, DataType> seenFields) { + DataType seenType = seenFields.get(fieldName); + if (seenType == null) { + seenFields.put(fieldName, fieldType); + } else if ( ! compatibleTypes(seenType, fieldType)) { + throw newProcessException(searchName, fieldName, "Incompatible types. Expected " + + seenType.getName() + " for " + fieldDesc + + " '" + fieldName + "', got " + fieldType.getName() + "."); + } + } + + private static boolean compatibleTypes(DataType seenType, DataType fieldType) { + // legacy tag field type compatibility; probably not needed any more (Oct 2016) + if ("tag".equals(seenType.getName())) { + return "tag".equals(fieldType.getName()) || "WeightedSet<string>".equals(fieldType.getName()); + } + if ("tag".equals(fieldType.getName())) { + return "tag".equals(seenType.getName()) || "WeightedSet<string>".equals(seenType.getName()); + } + if (seenType instanceof TensorDataType && fieldType instanceof TensorDataType) { + return fieldType.isAssignableFrom(seenType); // TODO: Just do this for all types + } + return seenType.equals(fieldType); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypesDocumentsOnly.java b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypesDocumentsOnly.java new file mode 100644 index 00000000000..08771b40fe9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldTypesDocumentsOnly.java @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.document.DataType; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.HashMap; +import java.util.Map; + +public class ValidateFieldTypesDocumentsOnly extends ValidateFieldTypes { + public ValidateFieldTypesDocumentsOnly(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + String searchName = schema.getName(); + Map<String, DataType> seenFields = new HashMap<>(); + verifySearchAndDocFields(searchName, seenFields); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldWithIndexSettingsCreatesIndex.java b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldWithIndexSettingsCreatesIndex.java new file mode 100644 index 00000000000..5423defa74a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ValidateFieldWithIndexSettingsCreatesIndex.java @@ -0,0 +1,42 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.document.Matching; +import com.yahoo.schema.document.Ranking; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * Check that fields with index settings actually creates an index or attribute + * + * @author bratseth + */ +public class ValidateFieldWithIndexSettingsCreatesIndex extends Processor { + + public ValidateFieldWithIndexSettingsCreatesIndex(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if ( ! validate) return; + + Matching defaultMatching = new Matching(); + Ranking defaultRanking = new Ranking(); + for (SDField field : schema.allConcreteFields()) { + if (field.doesIndexing()) continue; + if (field.doesAttributing()) continue; + + if ( ! field.getRanking().equals(defaultRanking)) + fail(schema, field, + "Fields which are not creating an index or attribute can not contain rank settings."); + if ( ! field.getMatching().equals(defaultMatching)) + fail(schema, field, + "Fields which are not creating an index or attribute can not contain match settings."); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/ValidateStructTypeInheritance.java b/config-model/src/main/java/com/yahoo/schema/processing/ValidateStructTypeInheritance.java new file mode 100644 index 00000000000..cad555a24b1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/ValidateStructTypeInheritance.java @@ -0,0 +1,71 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.schema.Schema; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import com.yahoo.document.Field; +import com.yahoo.schema.document.SDDocumentType; + +import java.util.ArrayList; +import java.util.HashSet; + +/** + * @author arnej + */ +public class ValidateStructTypeInheritance extends Processor { + + public ValidateStructTypeInheritance(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (!validate) return; + verifyNoRedeclarations(schema.getDocument()); + } + + void fail(Field field, String message) { + throw newProcessException(schema, field, message); + } + + void verifyNoRedeclarations(SDDocumentType docType) { + for (SDDocumentType type : docType.allTypes().values()) { + if (type.isStruct()) { + var inheritedTypes = new ArrayList<SDDocumentType>(type.getInheritedTypes()); + for (int i = 0; i < inheritedTypes.size(); i++) { + SDDocumentType inherit = inheritedTypes.get(i); + for (var extra : inherit.getInheritedTypes()) { + if (! inheritedTypes.contains(extra)) { + inheritedTypes.add(extra); + } + } + } + if (inheritedTypes.isEmpty()) continue; + var seenFieldNames = new HashSet<>(); + for (var field : type.getDocumentType().contentStruct().getFieldsThisTypeOnly()) { + if (seenFieldNames.contains(field.getName())) { + // cannot happen? + fail(field, "struct "+type.getName()+" has multiple fields with same name: "+field.getName()); + } + seenFieldNames.add(field.getName()); + } + for (SDDocumentType inherit : inheritedTypes) { + if (inherit.isStruct()) { + for (var field : inherit.getDocumentType().contentStruct().getFieldsThisTypeOnly()) { + if (seenFieldNames.contains(field.getName())) { + fail(field, "struct "+type.getName()+" cannot inherit from "+inherit.getName()+" and redeclare field "+field.getName()); + } + seenFieldNames.add(field.getName()); + } + } else { + fail(new Field("no field"), "struct cannot inherit from non-struct "+inherit.getName()+" class "+inherit.getClass()); + } + } + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java new file mode 100644 index 00000000000..1e312b71afd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/WordMatch.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; +import com.yahoo.schema.Schema; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +/** + * The implementation of word matching - with word matching the field is assumed to contain a single "word" - some + * contiguous sequence of word and number characters - but without changing the data at the indexing side (as with text + * matching) to enforce this. Word matching is thus almost like exact matching on the indexing side (no action taken), + * and like text matching on the query side. This may be suitable for attributes, where people both expect the data to + * be left as in the input document, and trivially written queries to work by default. However, this may easily lead to + * data which cannot be matched at all as the indexing and query side does not agree. + * + * @author bratseth + */ +public class WordMatch extends Processor { + + public WordMatch(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + public void process(boolean validate, boolean documentsOnly) { + for (SDField field : schema.allConcreteFields()) { + processFieldRecursive(field); + } + } + + private void processFieldRecursive(SDField field) { + processField(field); + for (SDField structField : field.getStructFields()) { + processField(structField); + } + } + + private void processField(SDField field) { + if (!field.getMatching().getType().equals(MatchType.WORD)) { + return; + } + field.setStemming(Stemming.NONE); + field.getNormalizing().inferLowercase(); + field.addQueryCommand("word"); + } + + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/IndexCommandResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/IndexCommandResolver.java new file mode 100644 index 00000000000..565a377f2a9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/IndexCommandResolver.java @@ -0,0 +1,62 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.logging.Level; + +/** + * Resolver-class for harmonizing index-commands in multifield indexes + */ +public class IndexCommandResolver extends MultiFieldResolver { + + /** Commands which don't have to be harmonized between fields */ + private static List<String> ignoredCommands = new ArrayList<>(); + + /** Commands which must be harmonized between fields */ + private static List<String> harmonizedCommands = new ArrayList<>(); + + static { + String[] ignore = { "complete-boost", "literal-boost", "highlight" }; + ignoredCommands.addAll(Arrays.asList(ignore)); + String[] harmonize = { "stemming", "normalizing" }; + harmonizedCommands.addAll(Arrays.asList(harmonize)); + } + + public IndexCommandResolver(String indexName, List<SDField> fields, Schema schema, DeployLogger logger) { + super(indexName, fields, schema, logger); + } + + /** + * Check index-commands for each field, report and attempt to fix any + * inconsistencies + */ + public void resolve() { + for (SDField field : fields) { + for (String command : field.getQueryCommands()) { + if (!ignoredCommands.contains(command)) + checkCommand(command); + } + } + } + + private void checkCommand(String command) { + for (SDField field : fields) { + if (!field.hasQueryCommand(command)) { + if (harmonizedCommands.contains(command)) { + deployLogger.logApplicationPackage(Level.WARNING, command + " must be added to all fields going to the same index (" + indexName + ")" + + ", adding to field " + field.getName()); + field.addQueryCommand(command); + } else { + deployLogger.logApplicationPackage(Level.WARNING, "All fields going to the same index should have the same query-commands. Field \'" + field.getName() + + "\' doesn't contain command \'" + command+"\'"); + } + } + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/MultiFieldResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/MultiFieldResolver.java new file mode 100644 index 00000000000..ed8ad61706b --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/MultiFieldResolver.java @@ -0,0 +1,33 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; +import java.util.List; + +/** + * Abstract superclass of all multifield conflict resolvers + */ +public abstract class MultiFieldResolver { + + protected String indexName; + protected List<SDField> fields; + protected Schema schema; + + protected DeployLogger deployLogger; + + public MultiFieldResolver(String indexName, List<SDField> fields, Schema schema, DeployLogger logger) { + this.indexName = indexName; + this.fields = fields; + this.schema = schema; + this.deployLogger = logger; + } + + /** + * Checks the list of fields for specific conflicts, and reports and/or + * attempts to correct them + */ + public abstract void resolve(); + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankProfileTypeSettingsProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankProfileTypeSettingsProcessor.java new file mode 100644 index 00000000000..3d79ac7d68a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankProfileTypeSettingsProcessor.java @@ -0,0 +1,105 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.search.query.profile.types.FieldDescription; +import com.yahoo.search.query.profile.types.FieldType; +import com.yahoo.search.query.profile.types.QueryProfileType; +import com.yahoo.search.query.profile.types.TensorFieldType; +import com.yahoo.schema.FeatureNames; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.ImmutableSDField; +import com.yahoo.schema.document.ImportedField; +import com.yahoo.schema.document.ImportedFields; +import com.yahoo.schema.processing.Processor; +import com.yahoo.searchlib.rankingexpression.Reference; +import com.yahoo.tensor.TensorType; +import com.yahoo.vespa.model.container.search.QueryProfiles; + +import java.util.Map; +import java.util.Optional; + +/** + * This processes a schema and adds input type settings on all rank profiles. + * + * Currently, type settings are limited to the type of tensor attribute fields and tensor query features. + * + * @author geirst + */ +public class RankProfileTypeSettingsProcessor extends Processor { + + public RankProfileTypeSettingsProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) { + super(schema, deployLogger, rankProfileRegistry, queryProfiles); + } + + @Override + public void process(boolean validate, boolean documentsOnly) { + if (documentsOnly) return; + + processAttributeFields(); + processImportedFields(); + processQueryProfileTypes(); + } + + private void processAttributeFields() { + if (schema == null) return; // we're processing global profiles + for (ImmutableSDField field : schema.allConcreteFields()) { + Attribute attribute = field.getAttributes().get(field.getName()); + if (attribute != null && attribute.tensorType().isPresent()) { + addAttributeTypeToRankProfiles(attribute.getName(), attribute.tensorType().get().toString()); + } + } + } + + private void processImportedFields() { + if (schema == null) return; // we're processing global profiles + Optional<ImportedFields> importedFields = schema.importedFields(); + if (importedFields.isPresent()) { + importedFields.get().fields().forEach((fieldName, field) -> processImportedField(field)); + } + } + + private void processImportedField(ImportedField field) { + ImmutableSDField targetField = field.targetField(); + Attribute attribute = targetField.getAttributes().get(targetField.getName()); + if (attribute != null && attribute.tensorType().isPresent()) { + addAttributeTypeToRankProfiles(field.fieldName(), attribute.tensorType().get().toString()); + } + } + + private void addAttributeTypeToRankProfiles(String attributeName, String attributeType) { + for (RankProfile profile : rankProfileRegistry.rankProfilesOf(schema)) { + profile.addAttributeType(attributeName, attributeType); + } + } + + private void processQueryProfileTypes() { + for (QueryProfileType queryProfileType : queryProfiles.getRegistry().getTypeRegistry().allComponents()) { + for (Map.Entry<String, FieldDescription> fieldDescEntry : queryProfileType.fields().entrySet()) { + processFieldDescription(fieldDescEntry.getValue()); + } + } + } + + private void processFieldDescription(FieldDescription fieldDescription) { + FieldType fieldType = fieldDescription.getType(); + if (fieldType instanceof TensorFieldType) { + TensorFieldType tensorFieldType = (TensorFieldType)fieldType; + Optional<Reference> reference = Reference.simple(fieldDescription.getName()); + if (reference.isPresent() && FeatureNames.isQueryFeature(reference.get())) + addQueryFeatureTypeToRankProfiles(reference.get(), tensorFieldType.asTensorType()); + } + } + + private void addQueryFeatureTypeToRankProfiles(Reference queryFeature, TensorType queryFeatureType) { + for (RankProfile profile : rankProfileRegistry.all()) { + if (! profile.inputs().containsKey(queryFeature)) // declared inputs have precedence + profile.addInput(queryFeature, + new RankProfile.Input(queryFeature, queryFeatureType, Optional.empty())); + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankTypeResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankTypeResolver.java new file mode 100644 index 00000000000..6424fd8ba06 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/RankTypeResolver.java @@ -0,0 +1,46 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.Schema; + +import java.util.List; +import java.util.logging.Level; + +/** + * Checks if fields have defined different rank types for the same + * index (typically in an index-to statement), and if they have + * output a warning and use the first ranktype. + * + * @author hmusum + */ +public class RankTypeResolver extends MultiFieldResolver { + + public RankTypeResolver(String indexName, List<SDField> fields, Schema schema, DeployLogger logger) { + super(indexName, fields, schema, logger); + } + + public void resolve() { + RankType rankType = null; + if (fields.size() > 0) { + boolean first = true; + for (SDField field : fields) { + if (first) { + rankType = fields.get(0).getRankType(); + first = false; + } else if (!field.getRankType().equals(rankType)) { + deployLogger.logApplicationPackage(Level.WARNING, "In field '" + field.getName() + "' " + + field.getRankType() + " for index '" + indexName + + "' conflicts with " + rankType + + " defined for the same index in field '" + + field.getName() + "'. Using " + + rankType + "."); + field.setRankType(rankType); + } + } + } + } +} + diff --git a/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/StemmingResolver.java b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/StemmingResolver.java new file mode 100644 index 00000000000..95d9a50a6ab --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/multifieldresolver/StemmingResolver.java @@ -0,0 +1,43 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.processing.multifieldresolver; + +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Stemming; + +import java.util.List; +import java.util.logging.Level; + +/** + * Class resolving conflicts when fields with different stemming-settings are + * combined into the same index + */ +public class StemmingResolver extends MultiFieldResolver { + + public StemmingResolver(String indexName, List<SDField> fields, Schema schema, DeployLogger logger) { + super(indexName, fields, schema, logger); + } + + @Override + public void resolve() { + checkStemmingForIndexFields(indexName, fields); + } + + private void checkStemmingForIndexFields(String indexName, List<SDField> fields) { + Stemming stemming = null; + SDField stemmingField = null; + for (SDField field : fields) { + if (stemming == null && stemmingField==null) { + stemming = field.getStemming(schema); + stemmingField = field; + } else if (stemming != field.getStemming(schema)) { + deployLogger.logApplicationPackage(Level.WARNING, "Field '" + field.getName() + "' has " + field.getStemming(schema) + + ", whereas field '" + stemmingField.getName() + "' has " + stemming + + ". All fields indexing to the index '" + indexName + "' must have the same stemming." + + " This should be corrected as it will make indexing fail in a few cases."); + } + } + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/processing/package-info.java b/config-model/src/main/java/com/yahoo/schema/processing/package-info.java new file mode 100644 index 00000000000..e81d50897ac --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/processing/package-info.java @@ -0,0 +1,14 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/** + * Classes in this package (processors) implements some search + * definition features by reducing them to simpler features. + * The processors are run after parsing of the search definition, + * before creating the derived model. + * + * For simplicity, features should always be implemented here + * rather than in the derived model if possible. + * + * New processors must be added to the list in Processing. + */ +@com.yahoo.api.annotations.PackageMarker +package com.yahoo.schema.processing; |