diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-05-19 12:03:06 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2022-05-19 12:03:06 +0200 |
commit | 5c24dc5c9642a8d9ed70aee4c950fd0678a1ebec (patch) | |
tree | bd9b74bf00c832456f0b83c1b2cd7010be387d68 /config-model/src/main/java/com/yahoo/schema/parser | |
parent | f17c4fe7de4c55f5c4ee61897eab8c2f588d8405 (diff) |
Rename the 'searchdefinition' package to 'schema'
Diffstat (limited to 'config-model/src/main/java/com/yahoo/schema/parser')
27 files changed, 3055 insertions, 0 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java new file mode 100644 index 00000000000..fa656b72530 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java @@ -0,0 +1,331 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.document.DataType; +import com.yahoo.document.DataTypeName; +import com.yahoo.schema.parser.ConvertParsedTypes.TypeResolver; +import com.yahoo.schema.Index; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.Attribute; +import com.yahoo.schema.document.BooleanIndexDefinition; +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.Dictionary; +import com.yahoo.schema.document.NormalizeLevel; +import com.yahoo.schema.document.RankType; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.Sorting; +import com.yahoo.schema.document.annotation.SDAnnotationType; +import com.yahoo.vespa.documentmodel.SummaryField; +import com.yahoo.vespa.documentmodel.SummaryTransform; + +import java.util.Locale; + +/** + * Helper for converting ParsedField etc to SDField with settings + * + * @author arnej27959 + **/ +public class ConvertParsedFields { + + private final TypeResolver context; + + ConvertParsedFields(TypeResolver context) { + this.context = context; + } + + static void convertMatchSettings(SDField field, ParsedMatchSettings parsed) { + parsed.getMatchType().ifPresent(matchingType -> field.setMatchingType(matchingType)); + parsed.getMatchCase().ifPresent(casing -> field.setMatchingCase(casing)); + parsed.getGramSize().ifPresent(gramSize -> field.getMatching().setGramSize(gramSize)); + parsed.getMaxLength().ifPresent(maxLength -> field.getMatching().maxLength(maxLength)); + parsed.getMatchAlgorithm().ifPresent + (matchingAlgorithm -> field.setMatchingAlgorithm(matchingAlgorithm)); + parsed.getExactTerminator().ifPresent + (exactMatchTerminator -> field.getMatching().setExactMatchTerminator(exactMatchTerminator)); + } + + void convertSorting(SDField field, ParsedSorting parsed, String name) { + Attribute attribute = field.getAttributes().get(name); + if (attribute == null) { + attribute = new Attribute(name, field.getDataType()); + field.addAttribute(attribute); + } + Sorting sorting = attribute.getSorting(); + if (parsed.getAscending()) { + sorting.setAscending(); + } else { + sorting.setDescending(); + } + parsed.getFunction().ifPresent(function -> sorting.setFunction(function)); + parsed.getStrength().ifPresent(strength -> sorting.setStrength(strength)); + parsed.getLocale().ifPresent(locale -> sorting.setLocale(locale)); + } + + void convertAttribute(SDField field, ParsedAttribute parsed) { + String name = parsed.name(); + String fieldName = field.getName(); + Attribute attribute = null; + if (fieldName.endsWith("." + name)) { + attribute = field.getAttributes().get(field.getName()); + } + if (attribute == null) { + attribute = field.getAttributes().get(name); + if (attribute == null) { + attribute = new Attribute(name, field.getDataType()); + field.addAttribute(attribute); + } + } + attribute.setHuge(parsed.getHuge()); + attribute.setPaged(parsed.getPaged()); + attribute.setFastSearch(parsed.getFastSearch()); + if (parsed.getFastRank()) { + attribute.setFastRank(parsed.getFastRank()); + } + attribute.setFastAccess(parsed.getFastAccess()); + attribute.setMutable(parsed.getMutable()); + attribute.setEnableBitVectors(parsed.getEnableBitVectors()); + attribute.setEnableOnlyBitVector(parsed.getEnableOnlyBitVector()); + + // attribute.setTensorType(?) + + for (String alias : parsed.getAliases()) { + field.getAliasToName().put(alias, parsed.lookupAliasedFrom(alias)); + } + var distanceMetric = parsed.getDistanceMetric(); + if (distanceMetric.isPresent()) { + String upper = distanceMetric.get().toUpperCase(Locale.ENGLISH); + attribute.setDistanceMetric(Attribute.DistanceMetric.valueOf(upper)); + } + var sorting = parsed.getSorting(); + if (sorting.isPresent()) { + convertSorting(field, sorting.get(), name); + } + } + + private void convertRankType(SDField field, String indexName, String rankType) { + RankType type = RankType.fromString(rankType); + if (indexName == null || indexName.equals("")) { + field.setRankType(type); // Set default if the index is not specified. + } else { + Index index = field.getIndex(indexName); + if (index == null) { + index = new Index(indexName); + field.addIndex(index); + } + index.setRankType(type); + } + } + + private void convertNormalizing(SDField field, String setting) { + NormalizeLevel.Level level; + if ("none".equals(setting)) { + level = NormalizeLevel.Level.NONE; + } else if ("codepoint".equals(setting)) { + level = NormalizeLevel.Level.CODEPOINT; + } else if ("lowercase".equals(setting)) { + level = NormalizeLevel.Level.LOWERCASE; + } else if ("accent".equals(setting)) { + level = NormalizeLevel.Level.ACCENT; + } else if ("all".equals(setting)) { + level = NormalizeLevel.Level.ACCENT; + } else { + throw new IllegalArgumentException("invalid normalizing setting: " + setting); + } + field.setNormalizing(new NormalizeLevel(level, true)); + } + + // from grammar, things that can be inside struct-field block + private void convertCommonFieldSettings(SDField field, ParsedField parsed) { + convertMatchSettings(field, parsed.matchSettings()); + var indexing = parsed.getIndexing(); + if (indexing.isPresent()) { + field.setIndexingScript(indexing.get().script()); + } + parsed.getWeight().ifPresent(value -> field.setWeight(value)); + parsed.getStemming().ifPresent(value -> field.setStemming(value)); + parsed.getNormalizing().ifPresent(value -> convertNormalizing(field, value)); + for (var attribute : parsed.getAttributes()) { + convertAttribute(field, attribute); + } + for (var summaryField : parsed.getSummaryFields()) { + var dataType = field.getDataType(); + var otherType = summaryField.getType(); + if (otherType != null) { + dataType = context.resolveType(otherType); + } + convertSummaryField(field, summaryField, dataType); + } + for (String command : parsed.getQueryCommands()) { + field.addQueryCommand(command); + } + for (var structField : parsed.getStructFields()) { + convertStructField(field, structField); + } + if (parsed.hasLiteral()) { + field.getRanking().setLiteral(true); + } + if (parsed.hasFilter()) { + field.getRanking().setFilter(true); + } + if (parsed.hasNormal()) { + field.getRanking().setNormal(true); + } + } + + private void convertStructField(SDField field, ParsedField parsed) { + SDField structField = field.getStructField(parsed.name()); + if (structField == null ) { + throw new IllegalArgumentException("Struct field '" + parsed.name() + "' has not been defined in struct " + + "for field '" + field.getName() + "'."); + } + convertCommonFieldSettings(structField, parsed); + } + + private void convertExtraFieldSettings(SDField field, ParsedField parsed) { + String name = parsed.name(); + for (var dictOp : parsed.getDictionaryOptions()) { + var dictionary = field.getOrSetDictionary(); + switch (dictOp) { + case HASH: dictionary.updateType(Dictionary.Type.HASH); break; + case BTREE: dictionary.updateType(Dictionary.Type.BTREE); break; + case CASED: dictionary.updateMatch(Case.CASED); break; + case UNCASED: dictionary.updateMatch(Case.UNCASED); break; + } + } + for (var index : parsed.getIndexes()) { + convertIndex(field, index); + } + for (var alias : parsed.getAliases()) { + field.getAliasToName().put(alias, parsed.lookupAliasedFrom(alias)); + } + parsed.getRankTypes().forEach((indexName, rankType) -> convertRankType(field, indexName, rankType)); + parsed.getSorting().ifPresent(sortInfo -> convertSorting(field, sortInfo, name)); + if (parsed.hasBolding()) { + // TODO must it be so ugly: + SummaryField summaryField = field.getSummaryField(name, true); + summaryField.addSource(name); + summaryField.addDestination("default"); + summaryField.setTransform(summaryField.getTransform().bold()); + } + } + + static void convertSummaryFieldSettings(SummaryField summary, ParsedSummaryField parsed) { + var transform = SummaryTransform.NONE; + if (parsed.getMatchedElementsOnly()) { + transform = SummaryTransform.MATCHED_ELEMENTS_FILTER; + } else if (parsed.getDynamic()) { + transform = SummaryTransform.DYNAMICTEASER; + } + if (parsed.getBolded()) { + transform = transform.bold(); + } + summary.setTransform(transform); + for (String source : parsed.getSources()) { + summary.addSource(source); + } + for (String destination : parsed.getDestinations()) { + summary.addDestination(destination); + } + summary.setImplicit(false); + } + + private void convertSummaryField(SDField field, ParsedSummaryField parsed, DataType type) { + var summary = new SummaryField(parsed.name(), type); + convertSummaryFieldSettings(summary, parsed); + summary.addDestination("default"); + if (parsed.getSources().isEmpty()) { + summary.addSource(field.getName()); + } + field.addSummaryField(summary); + } + + private void convertIndex(SDField field, ParsedIndex parsed) { + String indexName = parsed.name(); + Index index = field.getIndex(indexName); + if (index == null) { + index = new Index(indexName); + field.addIndex(index); + } + convertIndexSettings(index, parsed); + } + + private void convertIndexSettings(Index index, ParsedIndex parsed) { + parsed.getPrefix().ifPresent(prefix -> index.setPrefix(prefix)); + for (String alias : parsed.getAliases()) { + index.addAlias(alias); + } + parsed.getStemming().ifPresent(stemming -> index.setStemming(stemming)); + var arity = parsed.getArity(); + var lowerBound = parsed.getLowerBound(); + var upperBound = parsed.getUpperBound(); + var densePostingListThreshold = parsed.getDensePostingListThreshold(); + if (arity.isPresent() || + lowerBound.isPresent() || + upperBound.isPresent() || + densePostingListThreshold.isPresent()) + { + var bid = new BooleanIndexDefinition(arity, lowerBound, upperBound, densePostingListThreshold); + index.setBooleanIndexDefiniton(bid); + } + parsed.getEnableBm25().ifPresent(enableBm25 -> index.setInterleavedFeatures(enableBm25)); + parsed.getHnswIndexParams().ifPresent + (hnswIndexParams -> index.setHnswIndexParams(hnswIndexParams)); + } + + SDField convertDocumentField(Schema schema, SDDocumentType document, ParsedField parsed) { + String name = parsed.name(); + DataType dataType = context.resolveType(parsed.getType()); + var field = new SDField(document, name, dataType); + convertCommonFieldSettings(field, parsed); + convertExtraFieldSettings(field, parsed); + document.addField(field); + return field; + } + + void convertExtraField(Schema schema, ParsedField parsed) { + String name = parsed.name(); + DataType dataType = context.resolveType(parsed.getType()); + var field = new SDField(schema.getDocument(), name, dataType); + convertCommonFieldSettings(field, parsed); + convertExtraFieldSettings(field, parsed); + schema.addExtraField(field); + } + + void convertExtraIndex(Schema schema, ParsedIndex parsed) { + Index index = new Index(parsed.name()); + convertIndexSettings(index, parsed); + schema.addIndex(index); + } + + SDDocumentType convertStructDeclaration(Schema schema, SDDocumentType document, ParsedStruct parsed) { + // TODO - can we cleanup this mess + var structProxy = new SDDocumentType(parsed.name(), schema); + for (var parsedField : parsed.getFields()) { + var fieldType = context.resolveType(parsedField.getType()); + var field = new SDField(document, parsedField.name(), fieldType); + convertCommonFieldSettings(field, parsedField); + structProxy.addField(field); + if (parsedField.hasIdOverride()) { + structProxy.setFieldId(field, parsedField.idOverride()); + } + } + for (String inherit : parsed.getInherited()) { + structProxy.inherit(new DataTypeName(inherit)); + } + structProxy.setStruct(context.resolveStruct(parsed)); + return structProxy; + } + + void convertAnnotation(Schema schema, SDDocumentType document, ParsedAnnotation parsed) { + SDAnnotationType annType = context.resolveAnnotation(parsed.name()); + var withStruct = parsed.getStruct(); + if (withStruct.isPresent()) { + ParsedStruct parsedStruct = withStruct.get(); + SDDocumentType structProxy = convertStructDeclaration(schema, document, parsedStruct); + structProxy.setStruct(context.resolveStruct(parsedStruct)); + annType.setSdDocType(structProxy); + } + document.addAnnotation(annType); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java new file mode 100644 index 00000000000..bd628779b24 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedRanking.java @@ -0,0 +1,124 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.document.RankType; + +import java.util.List; + +/** + * Helper for converting ParsedRankProfile etc to RankProfile with settings + * + * @author arnej27959 + */ +public class ConvertParsedRanking { + + private final RankProfileRegistry rankProfileRegistry; + + // for unit test + ConvertParsedRanking() { + this(new RankProfileRegistry()); + } + + public ConvertParsedRanking(RankProfileRegistry rankProfileRegistry) { + this.rankProfileRegistry = rankProfileRegistry; + } + + private RankProfile makeRankProfile(Schema schema, String name) { + if (name.equals("default")) { + return rankProfileRegistry.get(schema, "default"); + } + return new RankProfile(name, schema, rankProfileRegistry); + } + + void convertRankProfile(Schema schema, ParsedRankProfile parsed) { + RankProfile profile = makeRankProfile(schema, parsed.name()); + for (String name : parsed.getInherited()) + profile.inherit(name); + + parsed.isStrict().ifPresent(value -> profile.setStrict(value)); + + for (var constant : parsed.getConstants().values()) + profile.add(constant); + + for (var onnxModel : parsed.getOnnxModels()) + profile.add(onnxModel); + + for (var input : parsed.getInputs().entrySet()) + profile.addInput(input.getKey(), input.getValue()); + + for (var func : parsed.getFunctions()) { + String name = func.name(); + List<String> parameters = func.getParameters(); + String expression = func.getExpression(); + boolean inline = func.getInline(); + profile.addFunction(name, parameters, expression, inline); + } + + parsed.getRankScoreDropLimit().ifPresent + (value -> profile.setRankScoreDropLimit(value)); + parsed.getTermwiseLimit().ifPresent + (value -> profile.setTermwiseLimit(value)); + parsed.getPostFilterThreshold().ifPresent + (value -> profile.setPostFilterThreshold(value)); + parsed.getApproximateThreshold().ifPresent + (value -> profile.setApproximateThreshold(value)); + parsed.getKeepRankCount().ifPresent + (value -> profile.setKeepRankCount(value)); + parsed.getMinHitsPerThread().ifPresent + (value -> profile.setMinHitsPerThread(value)); + parsed.getNumSearchPartitions().ifPresent + (value -> profile.setNumSearchPartitions(value)); + parsed.getNumThreadsPerSearch().ifPresent + (value -> profile.setNumThreadsPerSearch(value)); + parsed.getReRankCount().ifPresent + (value -> profile.setRerankCount(value)); + + parsed.getMatchPhaseSettings().ifPresent + (value -> profile.setMatchPhaseSettings(value)); + + parsed.getFirstPhaseExpression().ifPresent + (value -> profile.setFirstPhaseRanking(value)); + parsed.getSecondPhaseExpression().ifPresent + (value -> profile.setSecondPhaseRanking(value)); + + for (var value : parsed.getMatchFeatures()) { + profile.addMatchFeatures(value); + } + for (var value : parsed.getRankFeatures()) { + profile.addRankFeatures(value); + } + for (var value : parsed.getSummaryFeatures()) { + profile.addSummaryFeatures(value); + } + + parsed.getInheritedMatchFeatures().ifPresent + (value -> profile.setInheritedMatchFeatures(value)); + parsed.getInheritedSummaryFeatures().ifPresent + (value -> profile.setInheritedSummaryFeatures(value)); + if (parsed.getIgnoreDefaultRankFeatures()) { + profile.setIgnoreDefaultRankFeatures(true); + } + + for (var mutateOp : parsed.getMutateOperations()) { + profile.addMutateOperation(mutateOp); + } + parsed.getFieldsWithRankFilter().forEach + ((fieldName, isFilter) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.PREFERBITVECTOR, isFilter)); + + parsed.getFieldsWithRankWeight().forEach + ((fieldName, weight) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.WEIGHT, weight)); + + parsed.getFieldsWithRankType().forEach + ((fieldName, rankType) -> profile.addRankSetting(fieldName, RankProfile.RankSetting.Type.RANKTYPE, RankType.fromString(rankType))); + + parsed.getRankProperties().forEach + ((key, values) -> {for (String value : values) profile.addRankProperty(key, value);}); + + // always? + rankProfileRegistry.add(profile); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedSchemas.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedSchemas.java new file mode 100644 index 00000000000..f3289621ce1 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedSchemas.java @@ -0,0 +1,221 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.config.model.application.provider.MockFileRegistry; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.document.DataType; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.schema.DefaultRankProfile; +import com.yahoo.schema.DocumentOnlySchema; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; +import com.yahoo.schema.UnrankedRankProfile; +import com.yahoo.schema.document.SDDocumentType; +import com.yahoo.schema.document.SDField; +import com.yahoo.schema.document.TemporaryImportedField; +import com.yahoo.schema.parser.ConvertParsedTypes.TypeResolver; +import com.yahoo.vespa.documentmodel.DocumentSummary; +import com.yahoo.vespa.documentmodel.SummaryField; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * Class converting a collection of schemas from the intermediate format. + * + * @author arnej27959 + **/ +public class ConvertParsedSchemas { + + private final List<ParsedSchema> orderedInput; + private final DocumentTypeManager docMan; + private final ApplicationPackage applicationPackage; + private final FileRegistry fileRegistry; + private final DeployLogger deployLogger; + private final ModelContext.Properties properties; + private final RankProfileRegistry rankProfileRegistry; + private final boolean documentsOnly; + private final ConvertParsedTypes typeConverter; + + // for unit test + ConvertParsedSchemas(List<ParsedSchema> orderedInput, + DocumentTypeManager documentTypeManager) + { + this(orderedInput, documentTypeManager, + MockApplicationPackage.createEmpty(), + new MockFileRegistry(), + new BaseDeployLogger(), + new TestProperties(), + new RankProfileRegistry(), + true); + } + + public ConvertParsedSchemas(List<ParsedSchema> orderedInput, + DocumentTypeManager documentTypeManager, + ApplicationPackage applicationPackage, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry, + boolean documentsOnly) + { + this.orderedInput = orderedInput; + this.docMan = documentTypeManager; + this.applicationPackage = applicationPackage; + this.fileRegistry = fileRegistry; + this.deployLogger = deployLogger; + this.properties = properties; + this.rankProfileRegistry = rankProfileRegistry; + this.documentsOnly = documentsOnly; + this.typeConverter = new ConvertParsedTypes(orderedInput, docMan); + } + + private final Map<String, SDDocumentType> convertedDocuments = new LinkedHashMap<>(); + + public List<Schema> convertToSchemas() { + typeConverter.convert(false); + var resultList = new ArrayList<Schema>(); + for (var parsed : orderedInput) { + Optional<String> inherited; + var inheritList = parsed.getInherited(); + if (inheritList.size() == 0) { + inherited = Optional.empty(); + } else if (inheritList.size() == 1) { + inherited = Optional.of(inheritList.get(0)); + } else { + throw new IllegalArgumentException("schema " + parsed.name() + "cannot inherit more than once"); + } + Schema schema = parsed.getDocumentWithoutSchema() + ? new DocumentOnlySchema(applicationPackage, fileRegistry, deployLogger, properties) + : new Schema(parsed.name(), applicationPackage, inherited, fileRegistry, deployLogger, properties); + convertSchema(schema, parsed); + resultList.add(schema); + } + return resultList; + } + + private void convertDocument(Schema schema, ParsedDocument parsed, + ConvertParsedFields fieldConverter) + { + SDDocumentType document = new SDDocumentType(parsed.name()); + for (var struct : parsed.getStructs()) { + var structProxy = fieldConverter.convertStructDeclaration(schema, document, struct); + document.addType(structProxy); + } + for (String inherit : parsed.getInherited()) { + var parent = convertedDocuments.get(inherit); + assert(parent != null); + document.inherit(parent); + } + for (var annotation : parsed.getAnnotations()) { + fieldConverter.convertAnnotation(schema, document, annotation); + } + for (var field : parsed.getFields()) { + var sdf = fieldConverter.convertDocumentField(schema, document, field); + if (field.hasIdOverride()) { + document.setFieldId(sdf, field.idOverride()); + } + } + convertedDocuments.put(parsed.name(), document); + schema.addDocument(document); + } + + private void convertDocumentSummary(Schema schema, ParsedDocumentSummary parsed, TypeResolver typeContext) { + var docsum = new DocumentSummary(parsed.name(), schema); + var inheritList = parsed.getInherited(); + if (inheritList.size() == 1) { + docsum.setInherited(inheritList.get(0)); + } else if (inheritList.size() != 0) { + throw new IllegalArgumentException("document-summary "+parsed.name()+" cannot inherit more than once"); + } + if (parsed.getFromDisk()) { + docsum.setFromDisk(true); + } + if (parsed.getOmitSummaryFeatures()) { + docsum.setOmitSummaryFeatures(true); + } + for (var parsedField : parsed.getSummaryFields()) { + DataType dataType = typeContext.resolveType(parsedField.getType()); + var summaryField = new SummaryField(parsedField.name(), dataType); + // XXX does not belong here: + summaryField.setVsmCommand(SummaryField.VsmCommand.FLATTENSPACE); + ConvertParsedFields.convertSummaryFieldSettings(summaryField, parsedField); + docsum.add(summaryField); + } + schema.addSummary(docsum); + } + + private void convertImportField(Schema schema, ParsedSchema.ImportedField f) { + // needs rethinking + var importedFields = schema.temporaryImportedFields().get(); + if (importedFields.hasField(f.asFieldName)) { + throw new IllegalArgumentException("For schema '" + schema.getName() + + "', import field as '" + f.asFieldName + + "': Field already imported"); + } + importedFields.add(new TemporaryImportedField(f.asFieldName, f.refFieldName, f.foreignFieldName)); + } + + private void convertFieldSet(Schema schema, ParsedFieldSet parsed) { + String setName = parsed.name(); + for (String field : parsed.getFieldNames()) { + schema.fieldSets().addUserFieldSetItem(setName, field); + } + for (String command : parsed.getQueryCommands()) { + schema.fieldSets().userFieldSets().get(setName).queryCommands().add(command); + } + if (parsed.getMatchSettings().isPresent()) { + // same ugliness as SDParser.jj used to have: + var tmp = new SDField(setName, DataType.STRING); + ConvertParsedFields.convertMatchSettings(tmp, parsed.matchSettings()); + schema.fieldSets().userFieldSets().get(setName).setMatching(tmp.getMatching()); + } + } + + private void convertSchema(Schema schema, ParsedSchema parsed) { + if (parsed.hasStemming()) { + schema.setStemming(parsed.getStemming()); + } + parsed.getRawAsBase64().ifPresent(value -> schema.enableRawAsBase64(value)); + var typeContext = typeConverter.makeContext(parsed.getDocument()); + var fieldConverter = new ConvertParsedFields(typeContext); + convertDocument(schema, parsed.getDocument(), fieldConverter); + for (var field : parsed.getFields()) { + fieldConverter.convertExtraField(schema, field); + } + for (var index : parsed.getIndexes()) { + fieldConverter.convertExtraIndex(schema, index); + } + for (var docsum : parsed.getDocumentSummaries()) { + convertDocumentSummary(schema, docsum, typeContext); + } + for (var importedField : parsed.getImportedFields()) { + convertImportField(schema, importedField); + } + for (var fieldSet : parsed.getFieldSets()) { + convertFieldSet(schema, fieldSet); + } + if (documentsOnly) { + return; // skip ranking-only content, not used for document type generation + } + for (var constant : parsed.getConstants()) + schema.add(constant); + for (var onnxModel : parsed.getOnnxModels()) + schema.add(onnxModel); + rankProfileRegistry.add(new DefaultRankProfile(schema, rankProfileRegistry)); + rankProfileRegistry.add(new UnrankedRankProfile(schema, rankProfileRegistry)); + var rankConverter = new ConvertParsedRanking(rankProfileRegistry); + for (var rankProfile : parsed.getRankProfiles()) { + rankConverter.convertRankProfile(schema, rankProfile); + } + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedTypes.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedTypes.java new file mode 100644 index 00000000000..9f1203ffc9f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedTypes.java @@ -0,0 +1,337 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.document.DataType; +import com.yahoo.document.DocumentType; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.document.PositionDataType; +import com.yahoo.document.StructDataType; +import com.yahoo.document.WeightedSetDataType; +import com.yahoo.document.annotation.AnnotationReferenceDataType; +import com.yahoo.documentmodel.NewDocumentReferenceDataType; +import com.yahoo.documentmodel.OwnedStructDataType; +import com.yahoo.schema.document.annotation.SDAnnotationType; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Helper class for converting ParsedType instances to DataType + * + * @author arnej27959 + **/ +public class ConvertParsedTypes { + + private final List<ParsedSchema> orderedInput; + private final DocumentTypeManager docMan; + + ConvertParsedTypes(List<ParsedSchema> input) { + this.orderedInput = input; + this.docMan = new DocumentTypeManager(); + } + + public ConvertParsedTypes(List<ParsedSchema> input, DocumentTypeManager docMan) { + this.orderedInput = input; + this.docMan = docMan; + } + + public void convert(boolean andRegister) { + startDataTypes(); + fillDataTypes(); + if (andRegister) { + registerDataTypes(); + } + } + + private Map<String, DocumentType> documentsFromSchemas = new HashMap<>(); + private Map<String, StructDataType> structsFromSchemas = new HashMap<>(); + private Map<String, SDAnnotationType> annotationsFromSchemas = new HashMap<>(); + + private void startDataTypes() { + for (var schema : orderedInput) { + String name = schema.getDocument().name(); + documentsFromSchemas.put(name, new DocumentType(name)); + } + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var struct : doc.getStructs()) { + String structId = doc.name() + "->" + struct.name(); + var dt = new OwnedStructDataType(struct.name(), doc.name()); + structsFromSchemas.put(structId, dt); + } + for (var annotation : doc.getAnnotations()) { + String annId = doc.name() + "->" + annotation.name(); + var at = new SDAnnotationType(annotation.name()); + annotationsFromSchemas.put(annId, at); + for (String inherit : annotation.getInherited()) { + at.inherit(inherit); + } + var withStruct = annotation.getStruct(); + if (withStruct.isPresent()) { + ParsedStruct struct = withStruct.get(); + String structId = doc.name() + "->" + struct.name(); + var old = structsFromSchemas.put(structId, new OwnedStructDataType(struct.name(), doc.name())); + assert(old == null); + } + } + } + } + + void fillAnnotationStruct(ParsedAnnotation annotation) { + var withStruct = annotation.getStruct(); + if (withStruct.isPresent()) { + var doc = annotation.getOwnerDoc(); + var toFill = findStructFromParsed(withStruct.get()); + for (ParsedField field : withStruct.get().getFields()) { + var t = resolveFromContext(field.getType(), doc); + var f = field.hasIdOverride() + ? new com.yahoo.document.Field(field.name(), field.idOverride(), t) + : new com.yahoo.document.Field(field.name(), t); + toFill.addField(f); + } + for (var parent : annotation.getResolvedInherits()) { + parent.getStruct().ifPresent + (ps -> toFill.inherit(findStructFromParsed(ps))); + } + var at = findAnnotationFromParsed(annotation); + at.setDataType(toFill); + } + } + + private void fillDataTypes() { + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var annotation : doc.getAnnotations()) { + var at = findAnnotationFromParsed(annotation); + for (var parent : annotation.getResolvedInherits()) { + at.inherit(findAnnotationFromParsed(parent)); + } + fillAnnotationStruct(annotation); + } + for (var struct : doc.getStructs()) { + var toFill = findStructFromParsed(struct); + // evil ugliness + for (ParsedField field : struct.getFields()) { + if (! field.hasIdOverride()) { + var t = resolveFromContext(field.getType(), doc); + var f = new com.yahoo.document.Field(field.name(), t); + toFill.addField(f); + } + } + for (ParsedField field : struct.getFields()) { + if (field.hasIdOverride()) { + var t = resolveFromContext(field.getType(), doc); + var f = new com.yahoo.document.Field(field.name(), field.idOverride(), t); + toFill.addField(f); + } + } + for (var inherit : struct.getResolvedInherits()) { + var parent = findStructFromParsed(inherit); + // ensure a nice, compatible exception message + for (var field : toFill.getFields()) { + if (parent.hasField(field)) { + for (var base : parent.getInheritedTypes()) { + if (base.hasField(field)) { + parent = base; + } + } + throw new IllegalArgumentException + ("In document " + doc.name() + ": struct " + struct.name() + + " cannot inherit from " + parent.getName() + " and redeclare field " + field.getName()); + } + } + toFill.inherit(parent); + } + } + var docToFill = documentsFromSchemas.get(doc.name()); + Map<String, Collection<String>> fieldSets = new HashMap<>(); + List<String> inDocFields = new ArrayList<>(); + for (var docField : doc.getFields()) { + String name = docField.name(); + var t = resolveFromContext(docField.getType(), doc); + var f = new com.yahoo.document.Field(docField.name(), t); + docToFill.addField(f); + if (docField.hasIdOverride()) { + f.setId(docField.idOverride(), docToFill); + } + inDocFields.add(name); + } + fieldSets.put("[document]", inDocFields); + for (var extraField : schema.getFields()) { + String name = extraField.name(); + if (docToFill.hasField(name)) continue; + var t = resolveFromContext(extraField.getType(), doc); + var f = new com.yahoo.document.Field(name, t); + docToFill.addField(f); + } + for (var fieldset : schema.getFieldSets()) { + fieldSets.put(fieldset.name(), fieldset.getFieldNames()); + } + docToFill.addFieldSets(fieldSets); + for (String inherit : doc.getInherited()) { + docToFill.inherit(findDocFromSchemas(inherit)); + } + } + } + + private StructDataType findStructFromParsed(ParsedStruct resolved) { + String structId = resolved.getOwnerName() + "->" + resolved.name(); + var struct = structsFromSchemas.get(structId); + assert(struct != null); + return struct; + } + + private StructDataType findStructFromSchemas(String name, ParsedDocument context) { + var resolved = context.findParsedStruct(name); + if (resolved == null) { + throw new IllegalArgumentException("no struct named " + name + " in context " + context); + } + return findStructFromParsed(resolved); + } + + private SDAnnotationType findAnnotationFromSchemas(String name, ParsedDocument context) { + var resolved = context.findParsedAnnotation(name); + String annotationId = resolved.getOwnerName() + "->" + resolved.name(); + var annotation = annotationsFromSchemas.get(annotationId); + if (annotation == null) { + throw new IllegalArgumentException("no annotation named " + name + " in context " + context); + } + return annotation; + } + + private SDAnnotationType findAnnotationFromParsed(ParsedAnnotation resolved) { + String annotationId = resolved.getOwnerName() + "->" + resolved.name(); + var annotation = annotationsFromSchemas.get(annotationId); + if (annotation == null) { + throw new IllegalArgumentException("no annotation " + resolved.name() + " in " + resolved.getOwnerName()); + } + return annotation; + } + + private DataType createArray(ParsedType pType, ParsedDocument context) { + DataType nested = resolveFromContext(pType.nestedType(), context); + return DataType.getArray(nested); + } + + private DataType createWset(ParsedType pType, ParsedDocument context) { + DataType nested = resolveFromContext(pType.nestedType(), context); + boolean cine = pType.getCreateIfNonExistent(); + boolean riz = pType.getRemoveIfZero(); + return new WeightedSetDataType(nested, cine, riz); + } + + private DataType createMap(ParsedType pType, ParsedDocument context) { + DataType kt = resolveFromContext(pType.mapKeyType(), context); + DataType vt = resolveFromContext(pType.mapValueType(), context); + return DataType.getMap(kt, vt); + } + + private DocumentType findDocFromSchemas(String name) { + var dt = documentsFromSchemas.get(name); + if (dt == null) { + throw new IllegalArgumentException("missing document type for: " + name); + } + return dt; + } + + private DataType createAnnRef(ParsedType pType, ParsedDocument context) { + SDAnnotationType annotation = findAnnotationFromSchemas(pType.getNameOfReferencedAnnotation(), context); + return new AnnotationReferenceDataType(annotation); + } + + private DataType createDocRef(ParsedType pType) { + var ref = pType.getReferencedDocumentType(); + assert(ref.getVariant() == ParsedType.Variant.DOCUMENT); + return new NewDocumentReferenceDataType(findDocFromSchemas(ref.name())); + } + + private DataType getBuiltinType(String name) { + switch (name) { + case "bool": return DataType.BOOL; + case "byte": return DataType.BYTE; + case "int": return DataType.INT; + case "long": return DataType.LONG; + case "string": return DataType.STRING; + case "float": return DataType.FLOAT; + case "double": return DataType.DOUBLE; + case "uri": return DataType.URI; + case "predicate": return DataType.PREDICATE; + case "raw": return DataType.RAW; + case "tag": return DataType.TAG; + case "float16": return DataType.FLOAT16; + default: + throw new IllegalArgumentException("Unknown builtin type: "+name); + } + } + + private DataType resolveFromContext(ParsedType pType, ParsedDocument context) { + String name = pType.name(); + switch (pType.getVariant()) { + case NONE: return DataType.NONE; + case BUILTIN: return getBuiltinType(name); + case POSITION: return PositionDataType.INSTANCE; + case ARRAY: return createArray(pType, context); + case WSET: return createWset(pType, context); + case MAP: return createMap(pType, context); + case TENSOR: return DataType.getTensor(pType.getTensorType()); + case DOC_REFERENCE: return createDocRef(pType); + case ANN_REFERENCE: return createAnnRef(pType, context); + case DOCUMENT: return findDocFromSchemas(name); + case STRUCT: return findStructFromSchemas(name, context); + case UNKNOWN: + // fallthrough + } + // unknown is probably struct + var found = context.findParsedStruct(name); + if (found != null) { + pType.setVariant(ParsedType.Variant.STRUCT); + return findStructFromSchemas(name, context); + } + if (documentsFromSchemas.containsKey(name)) { + pType.setVariant(ParsedType.Variant.DOCUMENT); + return findDocFromSchemas(name); + } + throw new IllegalArgumentException("unknown type named '" + name + "' in context "+context); + } + + @SuppressWarnings("deprecation") + private void registerDataTypes() { + for (DataType t : structsFromSchemas.values()) { + docMan.register(t); + } + for (DocumentType t : documentsFromSchemas.values()) { + docMan.registerDocumentType(t); + } + for (SDAnnotationType t : annotationsFromSchemas.values()) { + docMan.getAnnotationTypeRegistry().register(t); + } + } + + public class TypeResolver { + private final ParsedDocument context; + public DataType resolveType(ParsedType parsed) { + return resolveFromContext(parsed, context); + } + public DataType resolveStruct(ParsedStruct parsed) { + String structId = context.name() + "->" + parsed.name(); + var r = structsFromSchemas.get(structId); + if (r == null) { + throw new IllegalArgumentException("no datatype found for struct: " + structId); + } + return r; + } + public SDAnnotationType resolveAnnotation(String name) { + return findAnnotationFromSchemas(name, context); + } + TypeResolver(ParsedDocument context) { + this.context = context; + } + } + + public TypeResolver makeContext(ParsedDocument doc) { + return new TypeResolver(doc); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertSchemaCollection.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertSchemaCollection.java new file mode 100644 index 00000000000..5509d11885c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertSchemaCollection.java @@ -0,0 +1,212 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.config.model.application.provider.MockFileRegistry; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.document.DocumentTypeManager; +import com.yahoo.schema.RankProfileRegistry; +import com.yahoo.schema.Schema; + +import java.util.ArrayList; +import java.util.List; + +/** + * Class converting a collection of schemas from the intermediate format. + * + * @author arnej27959 + **/ +public class ConvertSchemaCollection { + + private final IntermediateCollection input; + private final List<ParsedSchema> orderedInput = new ArrayList<>(); + private final DocumentTypeManager docMan; + private final ApplicationPackage applicationPackage; + private final FileRegistry fileRegistry; + private final DeployLogger deployLogger; + private final ModelContext.Properties properties; + private final RankProfileRegistry rankProfileRegistry; + private final boolean documentsOnly; + + // for unit test + ConvertSchemaCollection(IntermediateCollection input, + DocumentTypeManager documentTypeManager) + { + this(input, documentTypeManager, + MockApplicationPackage.createEmpty(), + new MockFileRegistry(), + new BaseDeployLogger(), + new TestProperties(), + new RankProfileRegistry(), + true); + } + + public ConvertSchemaCollection(IntermediateCollection input, + DocumentTypeManager documentTypeManager, + ApplicationPackage applicationPackage, + FileRegistry fileRegistry, + DeployLogger deployLogger, + ModelContext.Properties properties, + RankProfileRegistry rankProfileRegistry, + boolean documentsOnly) + { + this.input = input; + this.docMan = documentTypeManager; + this.applicationPackage = applicationPackage; + this.fileRegistry = fileRegistry; + this.deployLogger = deployLogger; + this.properties = properties; + this.rankProfileRegistry = rankProfileRegistry; + this.documentsOnly = documentsOnly; + + input.resolveInternalConnections(); + order(); + pushTypesToDocuments(); + } + + void order() { + var map = input.getParsedSchemas(); + for (var schema : map.values()) { + findOrdering(schema); + } + } + + void findOrdering(ParsedSchema schema) { + if (orderedInput.contains(schema)) return; + for (var parent : schema.getAllResolvedInherits()) { + findOrdering(parent); + } + orderedInput.add(schema); + } + + void pushTypesToDocuments() { + for (var schema : orderedInput) { + for (var struct : schema.getStructs()) { + schema.getDocument().addStruct(struct); + } + for (var annotation : schema.getAnnotations()) { + schema.getDocument().addAnnotation(annotation); + } + } + } + + private ConvertParsedTypes typeConverter; + + public void convertTypes() { + typeConverter = new ConvertParsedTypes(orderedInput, docMan); + typeConverter.convert(true); + } + + public List<Schema> convertToSchemas() { + resolveStructInheritance(); + resolveAnnotationInheritance(); + addMissingAnnotationStructs(); + var converter = new ConvertParsedSchemas(orderedInput, + docMan, + applicationPackage, + fileRegistry, + deployLogger, + properties, + rankProfileRegistry, + documentsOnly); + return converter.convertToSchemas(); + } + + private void resolveStructInheritance() { + List<ParsedStruct> all = new ArrayList<>(); + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var struct : doc.getStructs()) { + for (String inherit : struct.getInherited()) { + var parent = doc.findParsedStruct(inherit); + if (parent == null) { + throw new IllegalArgumentException("Can not find parent for "+struct+" in "+doc); + } + struct.resolveInherit(inherit, parent); + } + all.add(struct); + } + } + List<String> seen = new ArrayList<>(); + for (ParsedStruct struct : all) { + inheritanceCycleCheck(struct, seen); + } + } + + private void resolveAnnotationInheritance() { + List<ParsedAnnotation> all = new ArrayList(); + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var annotation : doc.getAnnotations()) { + for (String inherit : annotation.getInherited()) { + var parent = doc.findParsedAnnotation(inherit); + if (parent == null) { + throw new IllegalArgumentException("Can not find parent for "+annotation+" in "+doc); + } + annotation.resolveInherit(inherit, parent); + } + all.add(annotation); + } + } + List<String> seen = new ArrayList<>(); + for (ParsedAnnotation annotation : all) { + inheritanceCycleCheck(annotation, seen); + } + } + + private void fixupAnnotationStruct(ParsedAnnotation parsed) { + for (var parent : parsed.getResolvedInherits()) { + fixupAnnotationStruct(parent); + parent.getStruct().ifPresent(ps -> { + var myStruct = parsed.ensureStruct(); + if (! myStruct.getInherited().contains(ps.name())) { + myStruct.inherit(ps.name()); + myStruct.resolveInherit(ps.name(), ps); + } + }); + } + } + + private void addMissingAnnotationStructs() { + for (var schema : orderedInput) { + var doc = schema.getDocument(); + for (var annotation : doc.getAnnotations()) { + fixupAnnotationStruct(annotation); + } + } + } + + private void inheritanceCycleCheck(ParsedStruct struct, List<String> seen) { + String name = struct.name(); + if (seen.contains(name)) { + seen.add(name); + throw new IllegalArgumentException("Inheritance/reference cycle for structs: " + + String.join(" -> ", seen)); + } + seen.add(name); + for (ParsedStruct parent : struct.getResolvedInherits()) { + inheritanceCycleCheck(parent, seen); + } + seen.remove(name); + } + + private void inheritanceCycleCheck(ParsedAnnotation annotation, List<String> seen) { + String name = annotation.name(); + if (seen.contains(name)) { + seen.add(name); + throw new IllegalArgumentException("Inheritance/reference cycle for annotations: " + + String.join(" -> ", seen)); + } + seen.add(name); + for (ParsedAnnotation parent : annotation.getResolvedInherits()) { + inheritanceCycleCheck(parent, seen); + } + seen.remove(name); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/DictionaryOption.java b/config-model/src/main/java/com/yahoo/schema/parser/DictionaryOption.java new file mode 100644 index 00000000000..3acb51ace3f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/DictionaryOption.java @@ -0,0 +1,5 @@ +package com.yahoo.schema.parser; + +public enum DictionaryOption { + HASH, BTREE, CASED, UNCASED +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/InheritanceResolver.java b/config-model/src/main/java/com/yahoo/schema/parser/InheritanceResolver.java new file mode 100644 index 00000000000..ad9acf2f095 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/InheritanceResolver.java @@ -0,0 +1,130 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Class resolving some inheritance relationships. + * + * @author arnej27959 + **/ +public class InheritanceResolver { + + private final Map<String, ParsedSchema> parsedSchemas; + private final Map<String, ParsedDocument> parsedDocs = new HashMap<>(); + private final Map<String, ParsedSchema> schemaForDocs = new HashMap<>(); + + public InheritanceResolver(Map<String, ParsedSchema> parsedSchemas) { + this.parsedSchemas = parsedSchemas; + } + + private void inheritanceCycleCheck(ParsedSchema schema, List<String> seen) { + String name = schema.name(); + if (seen.contains(name)) { + seen.add(name); + throw new IllegalArgumentException("Inheritance/reference cycle for schemas: " + + String.join(" -> ", seen)); + } + seen.add(name); + for (ParsedSchema parent : schema.getAllResolvedInherits()) { + inheritanceCycleCheck(parent, seen); + } + seen.remove(name); + } + + private void resolveSchemaInheritance() { + for (ParsedSchema schema : parsedSchemas.values()) { + for (String inherit : schema.getInherited()) { + var parent = parsedSchemas.get(inherit); + if (parent == null) { + throw new IllegalArgumentException("schema '" + schema.name() + "' inherits '" + inherit + "', but this schema does not exist"); + } + schema.resolveInherit(inherit, parent); + } + } + } + + private void checkSchemaCycles() { + List<String> seen = new ArrayList<>(); + for (ParsedSchema schema : parsedSchemas.values()) { + inheritanceCycleCheck(schema, seen); + } + } + + private void resolveDocumentInheritance() { + for (ParsedSchema schema : parsedSchemas.values()) { + if (! schema.hasDocument()) { + throw new IllegalArgumentException("For schema '" + schema.name() + + "': A search specification must have an equally named document inside of it."); + } + ParsedDocument doc = schema.getDocument(); + var old = parsedDocs.put(doc.name(), doc); + if (old != null) { + throw new IllegalArgumentException("duplicate document declaration for " + doc.name()); + } + schemaForDocs.put(doc.name(), schema); + for (String docInherit : doc.getInherited()) { + schema.inheritByDocument(docInherit); + } + for (String docReferenced : doc.getReferencedDocuments()) { + schema.inheritByDocument(docReferenced); + } + } + for (ParsedDocument doc : parsedDocs.values()) { + for (String inherit : doc.getInherited()) { + var parentDoc = parsedDocs.get(inherit); + if (parentDoc == null) { + throw new IllegalArgumentException("document " + doc.name() + " inherits from unavailable document " + inherit); + } + doc.resolveInherit(inherit, parentDoc); + } + for (String docRefName : doc.getReferencedDocuments()) { + var refDoc = parsedDocs.get(docRefName); + if (refDoc == null) { + throw new IllegalArgumentException("document " + doc.name() + " references unavailable document " + docRefName); + } + doc.resolveReferenced(refDoc); + } + } + for (ParsedSchema schema : parsedSchemas.values()) { + for (String docName : schema.getInheritedByDocument()) { + var parent = schemaForDocs.get(docName); + assert(parent.hasDocument()); + assert(parent.getDocument().name().equals(docName)); + schema.resolveInheritByDocument(docName, parent); + } + } + } + + private void inheritanceCycleCheck(ParsedDocument document, List<String> seen) { + String name = document.name(); + if (seen.contains(name)) { + seen.add(name); + throw new IllegalArgumentException("Inheritance/reference cycle for documents: " + + String.join(" -> ", seen)); + } + seen.add(name); + for (ParsedDocument parent : document.getAllResolvedParents()) { + inheritanceCycleCheck(parent, seen); + } + seen.remove(name); + } + + private void checkDocumentCycles() { + List<String> seen = new ArrayList<>(); + for (ParsedDocument doc : parsedDocs.values()) { + inheritanceCycleCheck(doc, seen); + } + } + + public void resolveInheritance() { + resolveSchemaInheritance(); + resolveDocumentInheritance(); + checkDocumentCycles(); + checkSchemaCycles(); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java b/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java new file mode 100644 index 00000000000..8bb9bca3249 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/IntermediateCollection.java @@ -0,0 +1,159 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.application.provider.BaseDeployLogger; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.io.IOUtils; +import com.yahoo.io.reader.NamedReader; +import com.yahoo.yolean.Exceptions; + +import java.io.File; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * Class wrapping parsing of schema files and holding a collection of + * schemas in the intermediate format. + * + * @author arnej27959 + **/ +public class IntermediateCollection { + + private final DeployLogger deployLogger; + private final ModelContext.Properties modelProperties; + + private Map<String, ParsedSchema> parsedSchemas = new LinkedHashMap<>(); + + IntermediateCollection() { + this.deployLogger = new BaseDeployLogger(); + this.modelProperties = new TestProperties(); + } + + public IntermediateCollection(DeployLogger logger, ModelContext.Properties properties) { + this.deployLogger = logger; + this.modelProperties = properties; + } + + public Map<String, ParsedSchema> getParsedSchemas() { return Collections.unmodifiableMap(parsedSchemas); } + + public ParsedSchema getParsedSchema(String name) { return parsedSchemas.get(name); } + + public ParsedSchema addSchemaFromString(String input) throws ParseException { + var stream = new SimpleCharStream(input); + var parser = new SchemaParser(stream, deployLogger, modelProperties); + try { + var schema = parser.schema(); + if (parsedSchemas.containsKey(schema.name())) { + throw new IllegalArgumentException("Duplicate schemas named: " + schema.name()); + } + parsedSchemas.put(schema.name(), schema); + return schema; + } catch (TokenMgrException e) { + throw new ParseException("Unknown symbol: " + e.getMessage()); + } catch (ParseException pe) { + throw new ParseException(stream.formatException(Exceptions.toMessageString(pe))); + } + } + + private String addSchemaFromStringWithFileName(String input, String fileName) throws ParseException { + var parsed = addSchemaFromString(input); + String nameFromFile = baseName(fileName); + if (! parsed.name().equals(nameFromFile)) { + throw new IllegalArgumentException("The file containing schema '" + + parsed.name() + "' must be named '" + + parsed.name() + ApplicationPackage.SD_NAME_SUFFIX + + "', was '" + stripDirs(fileName) + "'"); + } + return parsed.name(); + } + + private String baseName(String filename) { + int pos = filename.lastIndexOf('/'); + if (pos != -1) { + filename = filename.substring(pos + 1); + } + pos = filename.lastIndexOf('.'); + if (pos != -1) { + filename = filename.substring(0, pos); + } + return filename; + } + + private String stripDirs(String filename) { + int pos = filename.lastIndexOf('/'); + if (pos != -1) { + return filename.substring(pos + 1); + } + return filename; + } + + /** + * parse a schema from the given reader and add result to collection + **/ + public String addSchemaFromReader(NamedReader reader) throws ParseException { + try { + var nameParsed = addSchemaFromStringWithFileName(IOUtils.readAll(reader.getReader()), reader.getName()); + reader.close(); + return nameParsed; + } catch (ParseException ex) { + throw new ParseException("Failed parsing schema from " + reader.getName() + ": " + ex.getMessage()); + } catch (java.io.IOException ex) { + throw new IllegalArgumentException("Failed reading from " + reader.getName() + ": " + ex.getMessage()); + } + } + + /** for unit tests */ + public String addSchemaFromFile(String fileName) throws ParseException { + try { + // return addSchemaFromStringWithFileName(IOUtils.readFile(new File(fileName)), fileName); + var parsed = addSchemaFromString(IOUtils.readFile(new File(fileName))); + return parsed.name(); + } catch (ParseException ex) { + throw new ParseException("Failed parsing schema from " + fileName + ": " + ex.getMessage()); + } catch (java.io.IOException ex) { + throw new IllegalArgumentException("Could not read file " + fileName + ": " + ex.getMessage()); + } + } + + /** + * parse a rank profile from the given reader and add to the schema identified by name. + * note: the named schema must have been parsed already. + **/ + public void addRankProfileFile(String schemaName, NamedReader reader) throws ParseException { + try { + ParsedSchema schema = parsedSchemas.get(schemaName); + if (schema == null) { + throw new IllegalArgumentException("No schema named: " + schemaName); + } + var stream = new SimpleCharStream(IOUtils.readAll(reader.getReader())); + var parser = new SchemaParser(stream, deployLogger, modelProperties); + try { + parser.rankProfile(schema); + } catch (ParseException pe) { + throw new ParseException("Failed parsing rank-profile from " + reader.getName() + ": " + + stream.formatException(Exceptions.toMessageString(pe))); + } + } catch (java.io.IOException ex) { + throw new IllegalArgumentException("Failed reading from " + reader.getName() + ": " + ex.getMessage()); + } + } + + // for unit test + void addRankProfileFile(String schemaName, String fileName) throws ParseException { + try { + var reader = IOUtils.createReader(fileName, "UTF-8"); + addRankProfileFile(schemaName, new NamedReader(fileName, reader)); + } catch (java.io.IOException ex) { + throw new IllegalArgumentException("Could not read file " + fileName + ": " + ex.getMessage()); + } + } + + void resolveInternalConnections() { + var resolver = new InheritanceResolver(parsedSchemas); + resolver.resolveInheritance(); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedAnnotation.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedAnnotation.java new file mode 100644 index 00000000000..c36656838f7 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedAnnotation.java @@ -0,0 +1,57 @@ +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a + * "annotation" block, using simple data structures as far as + * possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedAnnotation extends ParsedBlock { + + private ParsedStruct wrappedStruct = null; + private final List<String> inherited = new ArrayList<>(); + private final List<ParsedAnnotation> resolvedInherits = new ArrayList<>(); + private ParsedDocument ownedBy = null; + + ParsedAnnotation(String name) { + super(name, "annotation"); + } + + public List<String> getInherited() { return List.copyOf(inherited); } + public List<ParsedAnnotation> getResolvedInherits() { + assert(inherited.size() == resolvedInherits.size()); + return List.copyOf(resolvedInherits); + } + + + public Optional<ParsedStruct> getStruct() { return Optional.ofNullable(wrappedStruct); } + public ParsedDocument getOwnerDoc() { return ownedBy; } + public String getOwnerName() { return ownedBy.name(); } + + public ParsedStruct ensureStruct() { + if (wrappedStruct == null) { + wrappedStruct = new ParsedStruct("annotation." + name()); + wrappedStruct.tagOwner(ownedBy); + } + return wrappedStruct; + } + void setStruct(ParsedStruct struct) { this.wrappedStruct = struct; } + + void inherit(String other) { inherited.add(other); } + + void tagOwner(ParsedDocument owner) { + verifyThat(ownedBy == null, "already owned by", ownedBy); + this.ownedBy = owner; + getStruct().ifPresent(s -> s.tagOwner(owner)); + } + + void resolveInherit(String name, ParsedAnnotation parsed) { + verifyThat(inherited.contains(name), "resolveInherit for non-inherited name", name); + verifyThat(name.equals(parsed.name()), "resolveInherit name mismatch for", name); + resolvedInherits.add(parsed); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedAttribute.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedAttribute.java new file mode 100644 index 00000000000..be8d20fbe93 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedAttribute.java @@ -0,0 +1,69 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a + * "attribute" block, using simple data structures as far as + * possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedAttribute extends ParsedBlock { + + private boolean enableBitVectors = false; + private boolean enableOnlyBitVector = false; + private boolean enableFastAccess = false; + private boolean enableFastRank = false; + private boolean enableFastSearch = false; + private boolean enableHuge = false; + private boolean enableMutable = false; + private boolean enablePaged = false; + private final Map<String, String> aliases = new LinkedHashMap<>(); + private ParsedSorting sortSettings = null; + private String distanceMetric = null; + + ParsedAttribute(String name) { + super(name, "attribute"); + } + + List<String> getAliases() { return List.copyOf(aliases.keySet()); } + String lookupAliasedFrom(String alias) { return aliases.get(alias); } + Optional<String> getDistanceMetric() { return Optional.ofNullable(distanceMetric); } + boolean getEnableBitVectors() { return this.enableBitVectors; } + boolean getEnableOnlyBitVector() { return this.enableOnlyBitVector; } + boolean getFastAccess() { return this.enableFastAccess; } + boolean getFastRank() { return this.enableFastRank; } + boolean getFastSearch() { return this.enableFastSearch; } + boolean getHuge() { return this.enableHuge; } + boolean getMutable() { return this.enableMutable; } + boolean getPaged() { return this.enablePaged; } + Optional<ParsedSorting> getSorting() { return Optional.ofNullable(sortSettings); } + + void addAlias(String from, String to) { + verifyThat(! aliases.containsKey(to), "already has alias", to); + aliases.put(to, from); + } + + void setDistanceMetric(String value) { + verifyThat(distanceMetric == null, "already has distance-metric", distanceMetric); + this.distanceMetric = value; + } + + ParsedSorting sortInfo() { + if (sortSettings == null) sortSettings = new ParsedSorting(name(), "attribute.sorting"); + return this.sortSettings; + } + + void setEnableBitVectors(boolean value) { this.enableBitVectors = value; } + void setEnableOnlyBitVector(boolean value) { this.enableOnlyBitVector = value; } + void setFastAccess(boolean value) { this.enableFastAccess = true; } + void setFastRank(boolean value) { this.enableFastRank = true; } + void setFastSearch(boolean value) { this.enableFastSearch = true; } + void setHuge(boolean value) { this.enableHuge = true; } + void setMutable(boolean value) { this.enableMutable = true; } + void setPaged(boolean value) { this.enablePaged = true; } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedBlock.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedBlock.java new file mode 100644 index 00000000000..c20abf52bf3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedBlock.java @@ -0,0 +1,36 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +/** + * Common methods for various Parsed* classes. + * @author arnej27959 + **/ +public class ParsedBlock { + private final String name; + private final String blockType; + + public ParsedBlock(String name, String blockType) { + this.name = name; + this.blockType = blockType; + } + + public final String name() { return name; } + public final String blockType() { return blockType; } + + protected void verifyThat(boolean check, String msg, Object ... msgDetails) { + if (check) return; + var buf = new StringBuilder(); + buf.append(blockType).append(" '").append(name).append("' error: "); + buf.append(msg); + for (Object detail : msgDetails) { + buf.append(" "); + buf.append(detail.toString()); + } + throw new IllegalArgumentException(buf.toString()); + } + + public String toString() { + return blockType + " '" + name + "'"; + } +} + diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocument.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocument.java new file mode 100644 index 00000000000..281e7989885 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocument.java @@ -0,0 +1,127 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * This class holds the extracted information after parsing a + * "document" block in a schema (.sd) file, using simple data + * structures as far as possible. Do not put advanced logic here! + * @author arnej27959 + **/ +public class ParsedDocument extends ParsedBlock { + private final List<String> inherited = new ArrayList<>(); + private final Map<String, ParsedDocument> resolvedInherits = new LinkedHashMap(); + private final Map<String, ParsedDocument> resolvedReferences = new LinkedHashMap(); + private final Map<String, ParsedField> docFields = new LinkedHashMap<>(); + private final Map<String, ParsedStruct> docStructs = new LinkedHashMap<>(); + private final Map<String, ParsedAnnotation> docAnnotations = new LinkedHashMap<>(); + + public ParsedDocument(String name) { + super(name, "document"); + } + + List<String> getInherited() { return List.copyOf(inherited); } + List<ParsedAnnotation> getAnnotations() { return List.copyOf(docAnnotations.values()); } + List<ParsedDocument> getResolvedInherits() { + assert(inherited.size() == resolvedInherits.size()); + return List.copyOf(resolvedInherits.values()); + } + List<ParsedDocument> getResolvedReferences() { + return List.copyOf(resolvedReferences.values()); + } + List<ParsedDocument> getAllResolvedParents() { + List<ParsedDocument> all = new ArrayList<>(); + all.addAll(getResolvedInherits()); + all.addAll(getResolvedReferences()); + return all; + } + List<ParsedField> getFields() { return List.copyOf(docFields.values()); } + List<ParsedStruct> getStructs() { return List.copyOf(docStructs.values()); } + ParsedStruct getStruct(String name) { return docStructs.get(name); } + ParsedAnnotation getAnnotation(String name) { return docAnnotations.get(name); } + + List<String> getReferencedDocuments() { + var result = new ArrayList<String>(); + for (var field : docFields.values()) { + var type = field.getType(); + if (type.getVariant() == ParsedType.Variant.DOC_REFERENCE) { + var docType = type.getReferencedDocumentType(); + assert(docType.getVariant() == ParsedType.Variant.DOCUMENT); + result.add(docType.name()); + } + } + return result; + } + + void inherit(String other) { inherited.add(other); } + + void addField(ParsedField field) { + String fieldName = field.name().toLowerCase(); + verifyThat(! docFields.containsKey(fieldName), + "Duplicate (case insensitively) " + field + " in document type '" + this.name() + "'"); + docFields.put(fieldName, field); + } + + void addStruct(ParsedStruct struct) { + String sName = struct.name(); + verifyThat(! docStructs.containsKey(sName), "already has struct", sName); + docStructs.put(sName, struct); + struct.tagOwner(this); + } + + void addAnnotation(ParsedAnnotation annotation) { + String annName = annotation.name(); + verifyThat(! docAnnotations.containsKey(annName), "already has annotation", annName); + docAnnotations.put(annName, annotation); + annotation.tagOwner(this); + } + + void resolveInherit(String name, ParsedDocument parsed) { + verifyThat(inherited.contains(name), "resolveInherit for non-inherited name", name); + verifyThat(name.equals(parsed.name()), "resolveInherit name mismatch for", name); + verifyThat(! resolvedInherits.containsKey(name), "double resolveInherit for", name); + resolvedInherits.put(name, parsed); + } + + void resolveReferenced(ParsedDocument parsed) { + var old = resolvedReferences.put(parsed.name(), parsed); + assert(old == null || old == parsed); + } + + ParsedStruct findParsedStruct(String name) { + ParsedStruct found = getStruct(name); + if (found != null) return found; + for (var parent : getAllResolvedParents()) { + var fromParent = parent.findParsedStruct(name); + if (fromParent == null) continue; + if (fromParent == found) continue; + if (found == null) { + found = fromParent; + } else { + throw new IllegalArgumentException("conflicting values for struct " + name + " in " +this); + } + } + return found; + } + + ParsedAnnotation findParsedAnnotation(String name) { + ParsedAnnotation found = docAnnotations.get(name); + if (found != null) return found; + for (var parent : getResolvedInherits()) { + var fromParent = parent.findParsedAnnotation(name); + if (fromParent == null) continue; + if (fromParent == found) continue; + if (found == null) { + found = fromParent; + } else { + throw new IllegalArgumentException("conflicting values for annotation " + name + " in " +this); + } + } + return found; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocumentSummary.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocumentSummary.java new file mode 100644 index 00000000000..93469a86fe3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedDocumentSummary.java @@ -0,0 +1,49 @@ + +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * This class holds the extracted information after parsing a + * "document-summary" block, using simple data structures as far as + * possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedDocumentSummary extends ParsedBlock { + + private boolean omitSummaryFeatures; + private boolean fromDisk; + private final List<String> inherited = new ArrayList<>(); + private final Map<String, ParsedSummaryField> fields = new LinkedHashMap<>(); + + ParsedDocumentSummary(String name) { + super(name, "document-summary"); + } + + boolean getOmitSummaryFeatures() { return omitSummaryFeatures; } + boolean getFromDisk() { return fromDisk; } + List<ParsedSummaryField> getSummaryFields() { return List.copyOf(fields.values()); } + List<String> getInherited() { return List.copyOf(inherited); } + + ParsedSummaryField addField(ParsedSummaryField field) { + String fieldName = field.name(); + // TODO disallow this on Vespa 8 + // verifyThat(! fields.containsKey(fieldName), "already has field", fieldName); + return fields.put(fieldName, field); + } + + void setFromDisk(boolean value) { + this.fromDisk = value; + } + + void setOmitSummaryFeatures(boolean value) { + this.omitSummaryFeatures = value; + } + + void inherit(String other) { + inherited.add(other); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedField.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedField.java new file mode 100644 index 00000000000..a4df2ac6dc2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedField.java @@ -0,0 +1,159 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.document.Stemming; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a "field" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +class ParsedField extends ParsedBlock { + + private ParsedType type; + private boolean hasBolding = false; + private boolean isFilter = false; + private int overrideId = 0; + private boolean isLiteral = false; + private boolean isNormal = false; + private Integer weight; + private String normalizing = null; + private final ParsedMatchSettings matchInfo = new ParsedMatchSettings(); + private Stemming stemming = null; + private ParsedIndexingOp indexingOp = null; + private ParsedSorting sortSettings = null; + private final Map<String, ParsedAttribute> attributes = new LinkedHashMap<>(); + private final Map<String, ParsedIndex> fieldIndexes = new LinkedHashMap<>(); + private final Map<String, String> aliases = new LinkedHashMap<>(); + private final Map<String, String> rankTypes = new LinkedHashMap<>(); + private final Map<String, ParsedField> structFields = new LinkedHashMap<>(); + private final Map<String, ParsedSummaryField> summaryFields = new LinkedHashMap<>(); + private final List<DictionaryOption> dictionaryOptions = new ArrayList<>(); + private final List<String> queryCommands = new ArrayList<>(); + + ParsedField(String name, ParsedType type) { + super(name, "field"); + this.type = type; + } + + ParsedType getType() { return this.type; } + boolean hasBolding() { return this.hasBolding; } + boolean hasFilter() { return this.isFilter; } + boolean hasLiteral() { return this.isLiteral; } + boolean hasNormal() { return this.isNormal; } + boolean hasIdOverride() { return overrideId != 0; } + int idOverride() { return overrideId; } + List<DictionaryOption> getDictionaryOptions() { return List.copyOf(dictionaryOptions); } + List<ParsedAttribute> getAttributes() { return List.copyOf(attributes.values()); } + List<ParsedIndex> getIndexes() { return List.copyOf(fieldIndexes.values()); } + List<ParsedSummaryField> getSummaryFields() { return List.copyOf(summaryFields.values()); } + List<ParsedField> getStructFields() { return List.copyOf(structFields.values()); } + List<String> getAliases() { return List.copyOf(aliases.keySet()); } + List<String> getQueryCommands() { return List.copyOf(queryCommands); } + String lookupAliasedFrom(String alias) { return aliases.get(alias); } + ParsedMatchSettings matchSettings() { return this.matchInfo; } + Optional<Integer> getWeight() { return Optional.ofNullable(weight); } + Optional<Stemming> getStemming() { return Optional.ofNullable(stemming); } + Optional<String> getNormalizing() { return Optional.ofNullable(normalizing); } + Optional<ParsedIndexingOp> getIndexing() { return Optional.ofNullable(indexingOp); } + Optional<ParsedSorting> getSorting() { return Optional.ofNullable(sortSettings); } + Map<String, String> getRankTypes() { return Collections.unmodifiableMap(rankTypes); } + + /** get an existing summary field for modification, or create it */ + ParsedSummaryField summaryFieldFor(String name) { + if (summaryFields.containsKey(name)) { + return summaryFields.get(name); + } + var sf = new ParsedSummaryField(name, getType()); + summaryFields.put(name, sf); + return sf; + } + + /** get an existing summary field for modification, or create it */ + ParsedSummaryField summaryFieldFor(String name, ParsedType type) { + if (summaryFields.containsKey(name)) { + var sf = summaryFields.get(name); + if (sf.getType() == null) { + sf.setType(type); + } else { + // TODO check that types are properly equal here + String oldName = sf.getType().name(); + String newName = type.name(); + verifyThat(newName.equals(oldName), "type mismatch for summary field", name, ":", oldName, "/", newName); + } + return sf; + } + var sf = new ParsedSummaryField(name, type); + summaryFields.put(name, sf); + return sf; + } + + void addAlias(String from, String to) { + verifyThat(! aliases.containsKey(to), "already has alias", to); + aliases.put(to, from); + } + + void addIndex(ParsedIndex index) { + String idxName = index.name(); + verifyThat(! fieldIndexes.containsKey(idxName), "already has index", idxName); + fieldIndexes.put(idxName, index); + } + + void addRankType(String index, String rankType) { + rankTypes.put(index, rankType); + } + + void dictionary(DictionaryOption option) { + dictionaryOptions.add(option); + } + + void setBolding(boolean value) { this.hasBolding = value; } + void setFilter(boolean value) { this.isFilter = value; } + void setId(int id) { this.overrideId = id; } + void setLiteral(boolean value) { this.isLiteral = value; } + void setNormal(boolean value) { this.isNormal = value; } + void setNormalizing(String value) { this.normalizing = value; } + void setStemming(Stemming stemming) { this.stemming = stemming; } + void setWeight(int weight) { this.weight = weight; } + + ParsedAttribute attributeFor(String attrName) { + return attributes.computeIfAbsent(attrName, n -> new ParsedAttribute(n)); + } + + void setIndexingOperation(ParsedIndexingOp idxOp) { + verifyThat(indexingOp == null, "already has indexing"); + indexingOp = idxOp; + } + + ParsedSorting sortInfo() { + if (sortSettings == null) sortSettings = new ParsedSorting(name(), "field.sorting"); + return this.sortSettings; + } + + void addQueryCommand(String command) { + queryCommands.add(command); + } + + void addStructField(ParsedField structField) { + String fieldName = structField.name(); + verifyThat(! structFields.containsKey(fieldName), "already has struct-field", fieldName); + structFields.put(fieldName, structField); + } + + void addSummaryField(ParsedSummaryField summaryField) { + String fieldName = summaryField.name(); + verifyThat(! summaryFields.containsKey(fieldName), "already has summary field", fieldName); + if (summaryField.getType() == null) { + summaryField.setType(getType()); + } + summaryFields.put(fieldName, summaryField); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedFieldSet.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedFieldSet.java new file mode 100644 index 00000000000..9e8906a41a4 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedFieldSet.java @@ -0,0 +1,36 @@ +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a "fieldset" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +class ParsedFieldSet extends ParsedBlock { + + private final List<String> fields = new ArrayList<>(); + private final List<String> queryCommands = new ArrayList<>(); + private ParsedMatchSettings matchInfo = null; + + ParsedFieldSet(String name) { + super(name, "fieldset"); + } + + ParsedMatchSettings matchSettings() { + if (matchInfo == null) matchInfo = new ParsedMatchSettings(); + return this.matchInfo; + } + + List<String> getQueryCommands() { return List.copyOf(queryCommands); } + List<String> getFieldNames() { return List.copyOf(fields); } + Optional<ParsedMatchSettings> getMatchSettings() { + return Optional.ofNullable(this.matchInfo); + } + + void addField(String field) { fields.add(field); } + void addQueryCommand(String command) { queryCommands.add(command); } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndex.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndex.java new file mode 100644 index 00000000000..cf70168e8d2 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndex.java @@ -0,0 +1,79 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.document.HnswIndexParams; +import com.yahoo.schema.document.Stemming; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing an "index" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +class ParsedIndex extends ParsedBlock { + + private Boolean enableBm25 = null; + private Boolean isPrefix = null; + private HnswIndexParams hnswParams = null; + private final List<String> aliases = new ArrayList<>(); + private Stemming stemming = null; + private Integer arity = null; + private Long lowerBound = null; + private Long upperBound = null; + private Double densePLT = null; + + ParsedIndex(String name) { + super(name, "index"); + } + + Optional<Boolean> getEnableBm25() { return Optional.ofNullable(this.enableBm25); } + Optional<Boolean> getPrefix() { return Optional.ofNullable(this.isPrefix); } + Optional<HnswIndexParams> getHnswIndexParams() { return Optional.ofNullable(this.hnswParams); } + List<String> getAliases() { return List.copyOf(aliases); } + boolean hasStemming() { return stemming != null; } + Optional<Stemming> getStemming() { return Optional.ofNullable(stemming); } + Optional<Integer> getArity() { return Optional.ofNullable(this.arity); } + Optional<Long> getLowerBound() { return Optional.ofNullable(this.lowerBound); } + Optional<Long> getUpperBound() { return Optional.ofNullable(this.upperBound); } + Optional<Double> getDensePostingListThreshold() { return Optional.ofNullable(this.densePLT); } + + void addAlias(String alias) { + aliases.add(alias); + } + + void setArity(int arity) { + this.arity = arity; + } + + void setDensePostingListThreshold(double threshold) { + this.densePLT = threshold; + } + + void setEnableBm25(boolean value) { + this.enableBm25 = value; + } + + void setHnswIndexParams(HnswIndexParams params) { + this.hnswParams = params; + } + + void setLowerBound(long value) { + this.lowerBound = value; + } + + void setPrefix(boolean value) { + this.isPrefix = value; + } + + void setStemming(Stemming stemming) { + this.stemming = stemming; + } + + void setUpperBound(long value) { + this.upperBound = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndexingOp.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndexingOp.java new file mode 100644 index 00000000000..3a2df2aac4c --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedIndexingOp.java @@ -0,0 +1,37 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.vespa.indexinglanguage.ExpressionSearcher; +import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression; +import com.yahoo.vespa.indexinglanguage.expressions.Expression; +import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression; +import com.yahoo.vespa.indexinglanguage.expressions.LowerCaseExpression; +import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; +import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; + +/** + * This class wraps an indexing script expression, with some helper + * methods for extracting information from it + * @author arnej27959 + **/ +class ParsedIndexingOp { + + private final ScriptExpression script; + + ParsedIndexingOp(ScriptExpression script) { + this.script = script; + } + + ScriptExpression script() { return this.script; } + + public boolean doesAttributing() { return containsExpression(AttributeExpression.class); } + public boolean doesIndexing() { return containsExpression(IndexExpression.class); } + public boolean doesLowerCasing() { return containsExpression(LowerCaseExpression.class); } + public boolean doesSummarying() { return containsExpression(SummaryExpression.class); } + + private <T extends Expression> boolean containsExpression(Class<T> searchFor) { + var searcher = new ExpressionSearcher<>(searchFor); + var expr = searcher.searchIn(script); + return (expr != null); + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java new file mode 100644 index 00000000000..4d3c45ad67f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedMatchSettings.java @@ -0,0 +1,38 @@ +package com.yahoo.schema.parser; + +import com.yahoo.schema.document.Case; +import com.yahoo.schema.document.MatchType; +import com.yahoo.schema.document.MatchAlgorithm; + +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a "match" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +public class ParsedMatchSettings { + + private MatchType matchType = null; + private Case matchCase = null; + private MatchAlgorithm matchAlgorithm = null; + private String exactTerminator = null; + private Integer gramSize = null; + private Integer maxLength = null; + + Optional<MatchType> getMatchType() { return Optional.ofNullable(matchType); } + Optional<Case> getMatchCase() { return Optional.ofNullable(matchCase); } + Optional<MatchAlgorithm> getMatchAlgorithm() { return Optional.ofNullable(matchAlgorithm); } + Optional<String> getExactTerminator() { return Optional.ofNullable(exactTerminator); } + Optional<Integer> getGramSize() { return Optional.ofNullable(gramSize); } + Optional<Integer> getMaxLength() { return Optional.ofNullable(maxLength); } + + // TODO - consider allowing each set only once: + void setType(MatchType value) { this.matchType = value; } + void setCase(Case value) { this.matchCase = value; } + void setAlgorithm(MatchAlgorithm value) { this.matchAlgorithm = value; } + void setExactTerminator(String value) { this.exactTerminator = value; } + void setGramSize(int value) { this.gramSize = value; } + void setMaxLength(int value) { this.maxLength = value; } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankFunction.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankFunction.java new file mode 100644 index 00000000000..73f1316d468 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankFunction.java @@ -0,0 +1,39 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class holds the extracted information after parsing a + * "function" block in a rank-profile, using simple data structures as + * far as possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedRankFunction extends ParsedBlock { + + private boolean inline; + private String expression; + private final List<String> parameters = new ArrayList<>(); + + ParsedRankFunction(String name) { + super(name, "function"); + } + + boolean getInline() { return this.inline; } + String getExpression() { return this.expression; } + List<String> getParameters() { return List.copyOf(parameters); } + + void addParameter(String param) { + verifyThat(! parameters.contains(param), "cannot have parameter", param, "twice"); + parameters.add(param); + } + + void setInline(boolean value) { + this.inline = value; + } + + void setExpression(String value) { + this.expression = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java new file mode 100644 index 00000000000..64dd8dd0ad4 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedRankProfile.java @@ -0,0 +1,220 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.RankProfile.MatchPhaseSettings; +import com.yahoo.schema.RankProfile.MutateOperation; +import com.yahoo.searchlib.rankingexpression.FeatureList; +import com.yahoo.searchlib.rankingexpression.Reference; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a + * rank-profile block in a schema (.sd) file, using simple data + * structures as far as possible. Do not put advanced logic here! + * + * @author arnej27959 + */ +class ParsedRankProfile extends ParsedBlock { + + private boolean ignoreDefaultRankFeatures = false; + private Double rankScoreDropLimit = null; + private Double termwiseLimit = null; + private Double postFilterThreshold = null; + private Double approximateThreshold = null; + private final List<FeatureList> matchFeatures = new ArrayList<>(); + private final List<FeatureList> rankFeatures = new ArrayList<>(); + private final List<FeatureList> summaryFeatures = new ArrayList<>(); + private Integer keepRankCount = null; + private Integer minHitsPerThread = null; + private Integer numSearchPartitions = null; + private Integer numThreadsPerSearch = null; + private Integer reRankCount = null; + private MatchPhaseSettings matchPhaseSettings = null; + private String firstPhaseExpression = null; + private String inheritedSummaryFeatures = null; + private String inheritedMatchFeatures = null; + private String secondPhaseExpression = null; + private Boolean strict = null; + private final List<MutateOperation> mutateOperations = new ArrayList<>(); + private final List<String> inherited = new ArrayList<>(); + private final Map<String, Boolean> fieldsRankFilter = new LinkedHashMap<>(); + private final Map<String, Integer> fieldsRankWeight = new LinkedHashMap<>(); + private final Map<String, ParsedRankFunction> functions = new LinkedHashMap<>(); + private final Map<String, String> fieldsRankType = new LinkedHashMap<>(); + private final Map<String, List<String>> rankProperties = new LinkedHashMap<>(); + private final Map<Reference, RankProfile.Constant> constants = new LinkedHashMap<>(); + private final Map<Reference, RankProfile.Input> inputs = new LinkedHashMap<>(); + private final List<OnnxModel> onnxModels = new ArrayList<>(); + + ParsedRankProfile(String name) { + super(name, "rank-profile"); + } + + boolean getIgnoreDefaultRankFeatures() { return this.ignoreDefaultRankFeatures; } + Optional<Double> getRankScoreDropLimit() { return Optional.ofNullable(this.rankScoreDropLimit); } + Optional<Double> getTermwiseLimit() { return Optional.ofNullable(this.termwiseLimit); } + Optional<Double> getPostFilterThreshold() { return Optional.ofNullable(this.postFilterThreshold); } + Optional<Double> getApproximateThreshold() { return Optional.ofNullable(this.approximateThreshold); } + List<FeatureList> getMatchFeatures() { return List.copyOf(this.matchFeatures); } + List<FeatureList> getRankFeatures() { return List.copyOf(this.rankFeatures); } + List<FeatureList> getSummaryFeatures() { return List.copyOf(this.summaryFeatures); } + Optional<Integer> getKeepRankCount() { return Optional.ofNullable(this.keepRankCount); } + Optional<Integer> getMinHitsPerThread() { return Optional.ofNullable(this.minHitsPerThread); } + Optional<Integer> getNumSearchPartitions() { return Optional.ofNullable(this.numSearchPartitions); } + Optional<Integer> getNumThreadsPerSearch() { return Optional.ofNullable(this.numThreadsPerSearch); } + Optional<Integer> getReRankCount() { return Optional.ofNullable(this.reRankCount); } + Optional<MatchPhaseSettings> getMatchPhaseSettings() { return Optional.ofNullable(this.matchPhaseSettings); } + Optional<String> getFirstPhaseExpression() { return Optional.ofNullable(this.firstPhaseExpression); } + Optional<String> getInheritedMatchFeatures() { return Optional.ofNullable(this.inheritedMatchFeatures); } + List<ParsedRankFunction> getFunctions() { return List.copyOf(functions.values()); } + List<MutateOperation> getMutateOperations() { return List.copyOf(mutateOperations); } + List<String> getInherited() { return List.copyOf(inherited); } + + Map<String, Boolean> getFieldsWithRankFilter() { return Collections.unmodifiableMap(fieldsRankFilter); } + Map<String, Integer> getFieldsWithRankWeight() { return Collections.unmodifiableMap(fieldsRankWeight); } + Map<String, String> getFieldsWithRankType() { return Collections.unmodifiableMap(fieldsRankType); } + Map<String, List<String>> getRankProperties() { return Collections.unmodifiableMap(rankProperties); } + Map<Reference, RankProfile.Constant> getConstants() { return Collections.unmodifiableMap(constants); } + Map<Reference, RankProfile.Input> getInputs() { return Collections.unmodifiableMap(inputs); } + List<OnnxModel> getOnnxModels() { return List.copyOf(onnxModels); } + + Optional<String> getInheritedSummaryFeatures() { return Optional.ofNullable(this.inheritedSummaryFeatures); } + Optional<String> getSecondPhaseExpression() { return Optional.ofNullable(this.secondPhaseExpression); } + Optional<Boolean> isStrict() { return Optional.ofNullable(this.strict); } + + void addSummaryFeatures(FeatureList features) { this.summaryFeatures.add(features); } + void addMatchFeatures(FeatureList features) { this.matchFeatures.add(features); } + void addRankFeatures(FeatureList features) { this.rankFeatures.add(features); } + + void inherit(String other) { inherited.add(other); } + + void setInheritedSummaryFeatures(String other) { + verifyThat(inheritedSummaryFeatures == null, "already inherits summary-features"); + this.inheritedSummaryFeatures = other; + } + + void add(RankProfile.Constant constant) { + verifyThat(! constants.containsKey(constant.name()), "already has constant", constant.name()); + constants.put(constant.name(), constant); + } + + void addInput(Reference name, RankProfile.Input input) { + verifyThat(! inputs.containsKey(name), "already has input", name); + inputs.put(name, input); + } + + void add(OnnxModel model) { + onnxModels.add(model); + } + + void addFieldRankFilter(String field, boolean filter) { + fieldsRankFilter.put(field, filter); + } + + void addFieldRankType(String field, String type) { + verifyThat(! fieldsRankType.containsKey(field), "already has rank type for field", field); + fieldsRankType.put(field, type); + } + + void addFieldRankWeight(String field, int weight) { + verifyThat(! fieldsRankType.containsKey(field), "already has weight for field", field); + fieldsRankWeight.put(field, weight); + } + + ParsedRankFunction addOrReplaceFunction(ParsedRankFunction func) { + // allowed with warning + // verifyThat(! functions.containsKey(func.name()), "already has function", func.name()); + return functions.put(func.name(), func); + } + + void addMutateOperation(MutateOperation.Phase phase, String attrName, String operation) { + mutateOperations.add(new MutateOperation(phase, attrName, operation)); + } + + void addRankProperty(String key, String value) { + List<String> values = rankProperties.computeIfAbsent(key, k -> new ArrayList<String>()); + values.add(value); + } + + void setFirstPhaseRanking(String expression) { + verifyThat(firstPhaseExpression == null, "already has first-phase expression"); + this.firstPhaseExpression = expression; + } + + void setIgnoreDefaultRankFeatures(boolean value) { + this.ignoreDefaultRankFeatures = value; + } + + void setInheritedMatchFeatures(String other) { + this.inheritedMatchFeatures = other; + } + + void setKeepRankCount(int count) { + verifyThat(keepRankCount == null, "already has rerank-count"); + this.keepRankCount = count; + } + + void setMatchPhaseSettings(MatchPhaseSettings settings) { + verifyThat(matchPhaseSettings == null, "already has match-phase"); + this.matchPhaseSettings = settings; + } + + void setMinHitsPerThread(int minHits) { + verifyThat(minHitsPerThread == null, "already has min-hits-per-thread"); + this.minHitsPerThread = minHits; + } + + void setNumSearchPartitions(int numParts) { + verifyThat(numSearchPartitions == null, "already has num-search-partitions"); + this.numSearchPartitions = numParts; + } + + void setNumThreadsPerSearch(int threads) { + verifyThat(numThreadsPerSearch == null, "already has num-threads-per-search"); + this.numThreadsPerSearch = threads; + } + + void setRankScoreDropLimit(double limit) { + verifyThat(rankScoreDropLimit == null, "already has rank-score-drop-limit"); + this.rankScoreDropLimit = limit; + } + + void setRerankCount(int count) { + verifyThat(reRankCount == null, "already has rerank-count"); + this.reRankCount = count; + } + + void setSecondPhaseRanking(String expression) { + verifyThat(secondPhaseExpression == null, "already has second-phase expression"); + this.secondPhaseExpression = expression; + } + + void setStrict(boolean strict) { + verifyThat(this.strict == null, "already has strict"); + this.strict = strict; + } + + void setTermwiseLimit(double limit) { + verifyThat(termwiseLimit == null, "already has termwise-limit"); + this.termwiseLimit = limit; + } + + void setPostFilterThreshold(double threshold) { + verifyThat(postFilterThreshold == null, "already has post-filter-threshold"); + this.postFilterThreshold = threshold; + } + + void setApproximateThreshold(double threshold) { + verifyThat(approximateThreshold == null, "already has approximate-threshold"); + this.approximateThreshold = threshold; + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSchema.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSchema.java new file mode 100644 index 00000000000..5ee483db044 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSchema.java @@ -0,0 +1,176 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.schema.OnnxModel; +import com.yahoo.schema.RankProfile; +import com.yahoo.schema.document.Stemming; +import com.yahoo.searchlib.rankingexpression.Reference; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * This class holds the extracted information after parsing + * one schema (.sd) file, using simple data structures + * as far as possible. + * + * Do not put complicated logic here! + * + * @author arnej27959 + */ +public class ParsedSchema extends ParsedBlock { + + public static class ImportedField { + public final String asFieldName; + public final String refFieldName; + public final String foreignFieldName; + public ImportedField(String asField, String refField, String foreignField) { + this.asFieldName = asField; + this.refFieldName = refField; + this.foreignFieldName = foreignField; + } + } + + private boolean documentWithoutSchema = false; + private Boolean rawAsBase64 = null; + private ParsedDocument myDocument = null; + private Stemming defaultStemming = null; + private final List<ImportedField> importedFields = new ArrayList<>(); + private final List<OnnxModel> onnxModels = new ArrayList<>(); + private final Map<Reference, RankProfile.Constant> constants = new LinkedHashMap<>(); + private final List<String> inherited = new ArrayList<>(); + private final List<String> inheritedByDocument = new ArrayList<>(); + private final Map<String, ParsedSchema> resolvedInherits = new LinkedHashMap<>(); + private final Map<String, ParsedSchema> allResolvedInherits = new LinkedHashMap<>(); + private final Map<String, ParsedAnnotation> extraAnnotations = new LinkedHashMap<>(); + private final Map<String, ParsedDocumentSummary> docSums = new LinkedHashMap<>(); + private final Map<String, ParsedField> extraFields = new LinkedHashMap<>(); + private final Map<String, ParsedFieldSet> fieldSets = new LinkedHashMap<>(); + private final Map<String, ParsedIndex> extraIndexes = new LinkedHashMap<>(); + private final Map<String, ParsedRankProfile> rankProfiles = new LinkedHashMap<>(); + private final Map<String, ParsedStruct> extraStructs = new LinkedHashMap<>(); + + public ParsedSchema(String name) { + super(name, "schema"); + } + + boolean getDocumentWithoutSchema() { return documentWithoutSchema; } + Optional<Boolean> getRawAsBase64() { return Optional.ofNullable(rawAsBase64); } + boolean hasDocument() { return myDocument != null; } + ParsedDocument getDocument() { return myDocument; } + boolean hasStemming() { return defaultStemming != null; } + Stemming getStemming() { return defaultStemming; } + List<ImportedField> getImportedFields() { return List.copyOf(importedFields); } + List<OnnxModel> getOnnxModels() { return List.copyOf(onnxModels); } + List<ParsedAnnotation> getAnnotations() { return List.copyOf(extraAnnotations.values()); } + List<ParsedDocumentSummary> getDocumentSummaries() { return List.copyOf(docSums.values()); } + List<ParsedField> getFields() { return List.copyOf(extraFields.values()); } + List<ParsedFieldSet> getFieldSets() { return List.copyOf(fieldSets.values()); } + List<ParsedIndex> getIndexes() { return List.copyOf(extraIndexes.values()); } + List<ParsedStruct> getStructs() { return List.copyOf(extraStructs.values()); } + List<String> getInherited() { return List.copyOf(inherited); } + List<String> getInheritedByDocument() { return List.copyOf(inheritedByDocument); } + List<ParsedRankProfile> getRankProfiles() { return List.copyOf(rankProfiles.values()); } + List<ParsedSchema> getResolvedInherits() { return List.copyOf(resolvedInherits.values()); } + List<ParsedSchema> getAllResolvedInherits() { return List.copyOf(allResolvedInherits.values()); } + List<RankProfile.Constant> getConstants() { return List.copyOf(constants.values()); } + + void addAnnotation(ParsedAnnotation annotation) { + String annName = annotation.name(); + verifyThat(! extraAnnotations.containsKey(annName), "already has annotation", annName); + extraAnnotations.put(annName, annotation); + } + + void addDocument(ParsedDocument document) { + verifyThat(myDocument == null, + "already has", myDocument, "so cannot add", document); + // TODO - disallow? + // verifyThat(name().equals(document.name()), + // "schema " + name() + " can only contain document named " + name() + ", was: "+ document.name()); + this.myDocument = document; + } + + void setDocumentWithoutSchema() { this.documentWithoutSchema = true; } + + void addDocumentSummary(ParsedDocumentSummary docsum) { + String dsName = docsum.name(); + verifyThat(! docSums.containsKey(dsName), "already has document-summary", dsName); + docSums.put(dsName, docsum); + } + + void addField(ParsedField field) { + String fieldName = field.name(); + verifyThat(! extraFields.containsKey(fieldName), "already has field", fieldName); + extraFields.put(fieldName, field); + } + + void addFieldSet(ParsedFieldSet fieldSet) { + String fsName = fieldSet.name(); + verifyThat(! fieldSets.containsKey(fsName), "already has fieldset", fsName); + fieldSets.put(fsName, fieldSet); + } + + void addImportedField(String asFieldName, String refFieldName, String foregnFieldName) { + importedFields.add(new ImportedField(asFieldName, refFieldName, foregnFieldName)); + } + + void addIndex(ParsedIndex index) { + String idxName = index.name(); + verifyThat(! extraIndexes.containsKey(idxName), "already has index", idxName); + extraIndexes.put(idxName, index); + } + + void add(OnnxModel model) { + onnxModels.add(model); + } + + void addRankProfile(ParsedRankProfile profile) { + String rpName = profile.name(); + verifyThat(! rankProfiles.containsKey(rpName), "already has rank-profile", rpName); + rankProfiles.put(rpName, profile); + } + + void add(RankProfile.Constant constant) { + constants.put(constant.name(), constant); + } + + void addStruct(ParsedStruct struct) { + String sName = struct.name(); + verifyThat(! extraStructs.containsKey(sName), "already has struct", sName); + extraStructs.put(sName, struct); + } + + void enableRawAsBase64(boolean value) { + this.rawAsBase64 = value; + } + + void inherit(String other) { inherited.add(other); } + + void inheritByDocument(String other) { inheritedByDocument.add(other); } + + void setStemming(Stemming value) { + verifyThat((defaultStemming == null) || (defaultStemming == value), + "already has stemming", defaultStemming, "cannot also set", value); + defaultStemming = value; + } + + void resolveInherit(String name, ParsedSchema parsed) { + verifyThat(inherited.contains(name), "resolveInherit for non-inherited name", name); + verifyThat(name.equals(parsed.name()), "resolveInherit name mismatch for", name); + verifyThat(! resolvedInherits.containsKey(name), "double resolveInherit for", name); + resolvedInherits.put(name, parsed); + var old = allResolvedInherits.put("schema " + name, parsed); + verifyThat(old == null || old == parsed, "conflicting resolveInherit for", name); + } + + void resolveInheritByDocument(String name, ParsedSchema parsed) { + verifyThat(inheritedByDocument.contains(name), + "resolveInheritByDocument for non-inherited name", name); + var old = allResolvedInherits.put("document " + name, parsed); + verifyThat(old == null || old == parsed, "conflicting resolveInheritByDocument for", name); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSorting.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSorting.java new file mode 100644 index 00000000000..af84bbbb5bd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSorting.java @@ -0,0 +1,48 @@ + +package com.yahoo.schema.parser; + +import com.yahoo.schema.document.Sorting.Function; +import com.yahoo.schema.document.Sorting.Strength; + +import java.util.Optional; + +/** + * This class holds the extracted information after parsing a "sorting" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +class ParsedSorting extends ParsedBlock { + + private boolean ascending = true; + private Function sortFunction = null; + private Strength sortStrength = null; + private String sortLocale = null; + + ParsedSorting(String blockName, String blockType) { + super(blockName, blockType); + } + + boolean getAscending() { return this.ascending; } + boolean getDescending() { return ! this.ascending; } + Optional<Function> getFunction() { return Optional.ofNullable(sortFunction); } + Optional<Strength> getStrength() { return Optional.ofNullable(sortStrength); } + Optional<String> getLocale() { return Optional.ofNullable(sortLocale); } + + void setAscending() { this.ascending = true; } + + void setDescending() { this.ascending = false; } + + void setLocale(String value) { + verifyThat(sortLocale == null, "sorting already has locale", sortLocale); + this.sortLocale = value; + } + void setFunction(Function value) { + verifyThat(sortFunction == null, "sorting already has function", sortFunction); + this.sortFunction = value; + } + void setStrength(Strength value) { + verifyThat(sortStrength == null, "sorting already has strength", sortStrength); + this.sortStrength = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedStruct.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedStruct.java new file mode 100644 index 00000000000..abe14b3689f --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedStruct.java @@ -0,0 +1,60 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * This class holds the extracted information after parsing a "struct" + * block, using simple data structures as far as possible. Do not put + * advanced logic here! + * @author arnej27959 + **/ +public class ParsedStruct extends ParsedBlock { + private final List<String> inherited = new ArrayList<>(); + private final List<ParsedStruct> resolvedInherits = new ArrayList<>(); + private final Map<String, ParsedField> fields = new LinkedHashMap<>(); + private final ParsedType asParsedType; + private ParsedDocument ownedBy = null; + + public ParsedStruct(String name) { + super(name, "struct"); + this.asParsedType = ParsedType.fromName(name); + asParsedType.setVariant(ParsedType.Variant.STRUCT); + } + + List<ParsedField> getFields() { return List.copyOf(fields.values()); } + List<String> getInherited() { return List.copyOf(inherited); } + ParsedDocument getOwnerDoc() { return ownedBy; } + String getOwnerName() { return ownedBy.name(); } + List<ParsedStruct> getResolvedInherits() { + assert(inherited.size() == resolvedInherits.size()); + return List.copyOf(resolvedInherits); + } + + void addField(ParsedField field) { + String fieldName = field.name(); + verifyThat(! fields.containsKey(fieldName), "already has field", fieldName); + fields.put(fieldName, field); + } + + void inherit(String other) { + verifyThat(! name().equals(other), "cannot inherit from itself"); + inherited.add(other); + } + + void tagOwner(ParsedDocument document) { + verifyThat(ownedBy == null, "already owned by document "+ownedBy); + this.ownedBy = document; + } + + void resolveInherit(String name, ParsedStruct parsed) { + verifyThat(inherited.contains(name), "resolveInherit for non-inherited name", name); + verifyThat(name.equals(parsed.name()), "resolveInherit name mismatch for", name); + resolvedInherits.add(parsed); + } + +} + diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java new file mode 100644 index 00000000000..38ee52c9d06 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class holds the extracted information after parsing a summary + * field declaration, either from "field" inside "document-summary" or + * "summary" inside "field". Using simple data structures as far as + * possible. Do not put advanced logic here! + * @author arnej27959 + **/ +class ParsedSummaryField extends ParsedBlock { + + private ParsedType type; + private boolean isDyn = false; + private boolean isMEO = false; + private boolean isFull = false; + private boolean isBold = false; + private final List<String> sources = new ArrayList<>(); + private final List<String> destinations = new ArrayList<>(); + + ParsedSummaryField(String name) { + this(name, null); + } + + ParsedSummaryField(String name, ParsedType type) { + super(name, "summary field"); + this.type = type; + } + + ParsedType getType() { return type; } + List<String> getDestinations() { return List.copyOf(destinations); } + List<String> getSources() { return List.copyOf(sources); } + boolean getBolded() { return isBold; } + boolean getDynamic() { return isDyn; } + boolean getFull() { return isFull; } + boolean getMatchedElementsOnly() { return isMEO; } + + void addDestination(String dst) { destinations.add(dst); } + void addSource(String src) { sources.add(src); } + void setBold(boolean value) { this.isBold = value; } + void setDynamic() { this.isDyn = true; } + void setFull() { this.isFull = true; } + void setMatchedElementsOnly() { this.isMEO = true; } + void setType(ParsedType value) { + verifyThat(type == null, "Cannot change type from ", type, "to", value); + this.type = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedType.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedType.java new file mode 100644 index 00000000000..9c3206a333a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedType.java @@ -0,0 +1,224 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.tensor.TensorType; + +/** + * This class holds the extracted information after parsing a type + * declaration (typically for a field). Since types can be complex, + * struct names (known or unknown), or even document names, this class + * is somewhat complicated. + * @author arnej27959 + **/ +class ParsedType { + public enum Variant { + NONE, + BUILTIN, + POSITION, + TENSOR, + ARRAY, WSET, MAP, + DOC_REFERENCE, + ANN_REFERENCE, + STRUCT, + DOCUMENT, + UNKNOWN + } + + private final String name; + private final ParsedType keyType; + private final ParsedType valType; + private final TensorType tensorType; + private Variant variant; + private boolean createIfNonExistent = false; + private boolean removeIfZero = false; + + public String toString() { + var buf = new StringBuilder(); + buf.append("[type ").append(variant).append("] {"); + switch (variant) { + case NONE: + break; + case BUILTIN: + buf.append(name); + break; + case POSITION: + buf.append(name); + break; + case TENSOR: + buf.append(tensorType.toString()); + break; + case ARRAY: buf + .append(" array<") + .append(valType.toString()) + .append("> "); + break; + case WSET: buf + .append(" weightedset<") + .append(valType.toString()) + .append(">"); + if (createIfNonExistent) buf.append(",createIfNonExistent"); + if (removeIfZero) buf.append(",removeIfZero"); + buf.append(" "); + break; + case MAP: buf + .append(" map<") + .append(keyType.toString()) + .append(",") + .append(valType.toString()) + .append("> "); + break; + case DOC_REFERENCE: buf + .append(" reference<") + .append(valType.toString()) + .append("> "); + break; + case ANN_REFERENCE: buf + .append(" ") + .append(toString()) + .append(" "); + break; + case STRUCT: + case DOCUMENT: + case UNKNOWN: + buf.append(" ").append(name).append(" "); + break; + } + buf.append("}"); + return buf.toString(); + } + + private static Variant guessVariant(String name) { + switch (name) { + case "bool": return Variant.BUILTIN; + case "byte": return Variant.BUILTIN; + case "int": return Variant.BUILTIN; + case "long": return Variant.BUILTIN; + case "string": return Variant.BUILTIN; + case "float": return Variant.BUILTIN; + case "double": return Variant.BUILTIN; + case "uri": return Variant.BUILTIN; + case "predicate": return Variant.BUILTIN; + case "raw": return Variant.BUILTIN; + case "tag": return Variant.BUILTIN; + case "position": return Variant.POSITION; + case "float16": return Variant.BUILTIN; + } + return Variant.UNKNOWN; + } + + public String name() { return name; } + public Variant getVariant() { return variant; } + public ParsedType mapKeyType() { assert(variant == Variant.MAP); return keyType; } + public ParsedType mapValueType() { assert(variant == Variant.MAP); return valType; } + public ParsedType nestedType() { assert(variant == Variant.ARRAY || variant == Variant.WSET); assert(valType != null); return valType; } + public boolean getCreateIfNonExistent() { assert(variant == Variant.WSET); return this.createIfNonExistent; } + public boolean getRemoveIfZero() { assert(variant == Variant.WSET); return this.removeIfZero; } + public ParsedType getReferencedDocumentType() { assert(variant == Variant.DOC_REFERENCE); return valType; } + public TensorType getTensorType() { assert(variant == Variant.TENSOR); return tensorType; } + + public String getNameOfReferencedAnnotation() { + assert(variant == Variant.ANN_REFERENCE); + String prefix = "annotationreference<"; + int fromPos = prefix.length(); + int toPos = name.length() - 1; + return name.substring(fromPos, toPos); + } + + private ParsedType(String name, Variant variant) { + this(name, variant, null, null, null); + } + private ParsedType(String name, Variant variant, ParsedType vt) { + this(name, variant, null, vt, null); + } + private ParsedType(String name, Variant variant, ParsedType kt, ParsedType vt) { + this(name, variant, kt, vt, null); + } + private ParsedType(String name, Variant variant, ParsedType kt, ParsedType vt, TensorType tType) { + this.name = name; + this.variant = variant; + this.keyType = kt; + this.valType = vt; + this.tensorType = tType; + } + + static ParsedType mapType(ParsedType kt, ParsedType vt) { + assert(kt != null); + assert(vt != null); + String name = "map<" + kt.name() + "," + vt.name() + ">"; + return new ParsedType(name, Variant.MAP, kt, vt); + } + static ParsedType arrayOf(ParsedType vt) { + assert(vt != null); + return new ParsedType("array<" + vt.name() + ">", Variant.ARRAY, vt); + } + static ParsedType wsetOf(ParsedType vt) { + assert(vt != null); + if (vt.getVariant() != Variant.BUILTIN) { + throw new IllegalArgumentException("weightedset of complex type '" + vt + "' is not supported"); + } + switch (vt.name()) { + // allowed types: + case "bool": + case "byte": + case "int": + case "long": + case "string": + case "uri": + break; + case "predicate": + case "raw": + case "tag": + throw new IllegalArgumentException("weightedset of complex type '" + vt + "' is not supported"); + case "float16": + case "float": + case "double": + /* TODO Vespa 8: + throw new IllegalArgumentException("weightedset of inexact type '" + vt + "' is not supported"); + */ + break; + default: + throw new IllegalArgumentException("weightedset of unknown type '" + vt + "' is not supported"); + } + return new ParsedType("weightedset<" + vt.name() + ">", Variant.WSET, vt); + } + static ParsedType documentRef(ParsedType docType) { + assert(docType != null); + return new ParsedType("reference<" + docType.name + ">", Variant.DOC_REFERENCE, docType); + } + static ParsedType annotationRef(String name) { + return new ParsedType("annotationreference<" + name + ">", Variant.ANN_REFERENCE); + } + static ParsedType tensorType(TensorType tType) { + assert(tType != null); + return new ParsedType(tType.toString(), Variant.TENSOR, null, null, tType); + } + static ParsedType fromName(String name) { + return new ParsedType(name, guessVariant(name)); + } + static ParsedType documentType(String name) { + return new ParsedType(name, Variant.DOCUMENT); + } + + void setCreateIfNonExistent(boolean value) { + if (variant != Variant.WSET) { + throw new IllegalArgumentException("CreateIfNonExistent only valid for weightedset, not " + variant); + } + this.createIfNonExistent = value; + } + + void setRemoveIfZero(boolean value) { + if (variant != Variant.WSET) { + throw new IllegalArgumentException("RemoveIfZero only valid for weightedset, not " + variant); + } + this.removeIfZero = value; + } + + void setVariant(Variant value) { + if (variant == value) return; // already OK + if (variant != Variant.UNKNOWN) { + throw new IllegalArgumentException("setVariant(" + value + ") only valid for UNKNOWN, not: " + variant); + } + // maybe even more checking would be useful + this.variant = value; + } +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/SimpleCharStream.java b/config-model/src/main/java/com/yahoo/schema/parser/SimpleCharStream.java new file mode 100644 index 00000000000..0a53e0477ac --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/SimpleCharStream.java @@ -0,0 +1,16 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +import com.yahoo.javacc.FastCharStream; + +/** + * @author Simon Thoresen Hult + */ +public class SimpleCharStream extends FastCharStream implements com.yahoo.schema.parser.CharStream, + com.yahoo.vespa.indexinglanguage.parser.CharStream { + + public SimpleCharStream(String input) { + super(input); + } + +} diff --git a/config-model/src/main/java/com/yahoo/schema/parser/Utils.java b/config-model/src/main/java/com/yahoo/schema/parser/Utils.java new file mode 100644 index 00000000000..cdb299c92df --- /dev/null +++ b/config-model/src/main/java/com/yahoo/schema/parser/Utils.java @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.schema.parser; + +/** + * @author bjorncs + */ +class Utils { + + private Utils() {} + + // Separate class since javacc does not accept Java code using lambdas + static int count(String str, char ch) { + return (int) str.chars().filter(c -> c == ch).count(); + } +} |