// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. options { UNICODE_INPUT = true; CACHE_TOKENS = false; DEBUG_PARSER = false; ERROR_REPORTING = true; FORCE_LA_CHECK = true; USER_CHAR_STREAM = true; } PARSER_BEGIN(SchemaParser) package com.yahoo.schema.parser; import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.model.api.ModelContext; import com.yahoo.language.Linguistics; import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.search.query.ranking.Diversity; import com.yahoo.schema.DistributableResource; import com.yahoo.schema.OnnxModel; import com.yahoo.schema.RankProfile.DiversitySettings; import com.yahoo.schema.RankProfile.MatchPhaseSettings; import com.yahoo.schema.RankProfile; import com.yahoo.schema.Schema; import com.yahoo.schema.document.Case; import com.yahoo.schema.document.MatchType; import com.yahoo.schema.document.MatchAlgorithm; import com.yahoo.schema.document.HnswIndexParams; import com.yahoo.schema.document.Sorting; import com.yahoo.schema.document.Stemming; import com.yahoo.schema.document.SDField; import com.yahoo.schema.FeatureNames; import com.yahoo.schema.fieldoperation.IndexingOperation; import com.yahoo.search.schema.RankProfile.InputType; import com.yahoo.searchlib.rankingexpression.FeatureList; import com.yahoo.searchlib.rankingexpression.Reference; import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; import com.yahoo.searchlib.rankingexpression.evaluation.Value; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.IndexedTensor; import com.yahoo.tensor.MixedTensor; import com.yahoo.tensor.TensorType; import com.yahoo.tensor.TensorAddress; import java.util.Optional; import java.util.Map; import java.util.List; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.logging.Level; /** * The schema parser * * NOTE: When this grammar is changed, also change integration/intellij/src/main/bnf/ai/vespa/intellij/schema/parser/sd.bnf * * @author bratseth */ public class SchemaParser { private DeployLogger deployLogger; private ModelContext.Properties properties; /** Creates a parser. */ public SchemaParser(SimpleCharStream stream, DeployLogger deployLogger, ModelContext.Properties properties) { this(stream); this.deployLogger = deployLogger; this.properties = properties; } /** * Consumes an indexing language script which will use the simple linguistics implementation * for testing, by taking input from the current input stream. * * @param multiline Whether or not to allow multi-line expressions. */ @SuppressWarnings("deprecation") private IndexingOperation newIndexingOperation(boolean multiline) throws ParseException { return newIndexingOperation(multiline, new SimpleLinguistics(), Embedder.throwsOnUse.asMap()); } /** * Consumes an indexing language script from the current input stream. * * @param multiline Whether or not to allow multi-line expressions. * @param linguistics What to use for tokenizing. */ private IndexingOperation newIndexingOperation(boolean multiline, Linguistics linguistics, Map embedders) throws ParseException { SimpleCharStream input = (SimpleCharStream)token_source.input_stream; if (token.next != null) { input.backup(token.next.image.length()); } try { return IndexingOperation.fromStream(input, multiline, linguistics, embedders); } finally { token.next = null; jj_ntk = -1; } } /** * Parses the given token image as a ranking expression feature list. * * @param image The token image to parse. * @return The consumed feature list. * @throws ParseException Thrown if the image could not be parsed. */ private FeatureList getFeatureList(String image) throws ParseException { try { return new FeatureList(image); } catch (com.yahoo.searchlib.rankingexpression.parser.ParseException e) { throw (ParseException) new ParseException("Could not parse feature list '" + image + "' at line " + token_source.input_stream.getBeginLine() + ", column " + token_source.input_stream.getBeginColumn() + ".").initCause(e); } } } PARSER_END(SchemaParser) // -------------------------------------------------------------------------------- // // Token declarations. // // -------------------------------------------------------------------------------- // Declare white space characters. These do not include newline because it has // special meaning in several of the production rules. SKIP : { " " | "\t" | "\r" | "\f" } // Declare all tokens to be recognized. When a word token is added it MUST be // added to the identifier() production rule. TOKEN : { < NL: "\n" > | < ANNOTATION: "annotation" > | < ANNOTATIONREFERENCE: "annotationreference" > | < SCHEMA: "schema" > | < SEARCH: "search" > | < DIVERSITY: "diversity" > | < MIN_GROUPS: "min-groups" > | < CUTOFF_FACTOR: "cutoff-factor" > | < CUTOFF_STRATEGY: "cutoff-strategy" > | < LOOSE: "loose" > | < STRICT: "strict" > | < DOCUMENT: "document" > | < OPERATION: "operation" > | < ON_MATCH: "on-match" > | < ON_FIRST_PHASE: "on-first-phase" > | < ON_SECOND_PHASE: "on-second-phase" > | < ON_SUMMARY: "on-summary" > | < STRUCT: "struct" > | < INHERITS: "inherits" > | < FIELD: "field" > | < FIELDS: "fields" > | < FIELDSET: "fieldset" > | < STRUCT_FIELD: "struct-field" > | < IMPORT: "import" > | < AS: "as" > | < INDEXING: "indexing" > | < SUMMARY_TO: "summary-to" > | < DOCUMENT_SUMMARY: "document-summary" > | < RANK_TYPE: "rank-type" > | < WEIGHT: "weight" > | < TYPE: "type" > | < INDEX: "index" > | < INPUTS: "inputs"> | < MTOKEN: "token" > | < TEXT: "text" > | < WORD: "word" > | < GRAM: "gram" > | < GRAM_SIZE: "gram-size" > | < MAX_LENGTH: "max-length" > | < MAX_OCCURRENCES: "max-occurrences" > | < PREFIX: "prefix" > | < SUBSTRING: "substring" > | < SUFFIX: "suffix" > | < CONSTANT: "constant"> | < ONNX_MODEL: "onnx-model"> | < INTRAOP_THREADS: "intraop-threads"> | < INTEROP_THREADS: "interop-threads"> | < GPU_DEVICE: "gpu-device"> | < EXECUTION_MODE: "execution-mode"> | < PARALLEL: "parallel"> | < SEQUENTIAL: "sequential"> | < MODEL: "model" > | < MUTATE: "mutate" > | < QUERY: "query" > | < RANK_PROFILE: "rank-profile" > | < RAW_AS_BASE64_IN_SUMMARY: "raw-as-base64-in-summary" > | < SUMMARY: "summary" > | < FULL: "full" > | < STATIC: "static" > | < DYNAMIC: "dynamic" > | < TOKENS: "tokens" > | < MATCHED_ELEMENTS_ONLY: "matched-elements-only" > | < SSCONTEXTUAL: "contextual" > | < SSOVERRIDE: "override" > | < SSTITLE: "title" > | < SSURL: "url" > | < PROPERTIES: "properties" > | < ATTRIBUTE: "attribute" > | < SORTING: "sorting" > | < DICTIONARY: "dictionary" > | < ASCENDING: "ascending" > | < DESCENDING: "descending" > | < UCA: "uca" > | < RAW: "raw" > | < LOWERCASE: "lowercase" > | < FUNCTION: "function" > | < LOCALE: "locale" > | < STRENGTH: "strength" > | < PRIMARY: "primary" > | < SECONDARY: "secondary" > | < TERTIARY: "tertiary" > | < QUATERNARY: "quaternary" > | < IDENTICAL: "identical" > | < STEMMING: "stemming" > | < NORMALIZING: "normalizing" > | < HASH: "hash" > | < BTREE: "btree" > | < CASED: "cased" > | < UNCASED: "uncased" > | < BOLDING: "bolding" > | < NONE: "none" > | < ON: "on" > | < OFF: "off" > | < TRUE: "true" > | < FALSE: "false" > | < SYMMETRIC: "symmetric" > | < QUERY_COMMAND: "query-command" > | < ALIAS: "alias" > | < MATCH: "match" > | < RANK: "rank" > | < LITERAL: "literal" > | < EXACT: "exact" > | < FILTER: "filter" > | < NORMAL: "normal" > | < EXACT_TERMINATOR: "exact-terminator" > | < IGNORE_DEFAULT_RANK_FEATURES: "ignore-default-rank-features" > | < ID: "id" > | < SOURCE: "source" > | < TO: "to" > | < DIRECT: "direct" > | < FROM_DISK: "from-disk" > | < OMIT_SUMMARY_FEATURES: "omit-summary-features" > | < ALWAYS: "always" > | < ON_DEMAND: "on-demand" > | < NEVER: "never" > | < ENABLE_BIT_VECTORS: "enable-bit-vectors" > | < ENABLE_ONLY_BIT_VECTOR: "enable-only-bit-vector" > | < FAST_ACCESS: "fast-access" > | < MUTABLE: "mutable" > | < PAGED: "paged" > | < FAST_RANK: "fast-rank" > | < FAST_SEARCH: "fast-search" > | < TENSOR_TYPE: "tensor" ("<" (~["<",">"])+ ">")? "(" (~["(",")"])* ")" > | < TENSOR_VALUE_SL: "value" (" ")* ":" (" ")* ("{") ("\n")? > | < TENSOR_VALUE_ML: "value" ()? "{" (["\n"," "])* ("{") (["\n"," "])* "}" ("\n")? > | < LBRACE: "{" > | < RBRACE: "}" > | < COLON: ":" > | < DOT: "." > | < COMMA: "," > | < ARRAY: "array" > | < WEIGHTEDSET: "weightedset" > | < MAP: "map" > | < REFERENCE: "reference" > | < QUESTIONMARK: "?" > | < CREATE_IF_NONEXISTENT: "create-if-nonexistent" > | < REMOVE_IF_ZERO: "remove-if-zero" > | < MATCH_PHASE: "match-phase" > | < EVALUATION_POINT: "evaluation-point" > | < PRE_POST_FILTER_TIPPING_POINT: "pre-post-filter-tipping-point" > | < ORDER: "order" > | < MAX_FILTER_COVERAGE: "max-filter-coverage" > | < MAX_HITS: "max-hits" > | < FIRST_PHASE: "first-phase" > | < SECOND_PHASE: "second-phase" > | < GLOBAL_PHASE: "global-phase" > | < MACRO: "macro" > | < INLINE: "inline" > | < ARITY: "arity" > | < LOWER_BOUND: "lower-bound" > | < UPPER_BOUND: "upper-bound" > | < DENSE_POSTING_LIST_THRESHOLD: "dense-posting-list-threshold" > | < ENABLE_BM25: "enable-bm25" > | < HNSW: "hnsw" > | < MAX_LINKS_PER_NODE: "max-links-per-node" > | < DOUBLE_KEYWORD: "double" > | < FLOAT_KEYWORD: "float" > | < LONG_KEYWORD: "long" > | < STRING_KEYWORD: "string" > | < DISTANCE_METRIC: "distance-metric" > | < NEIGHBORS_TO_EXPLORE_AT_INSERT: "neighbors-to-explore-at-insert" > | < MULTI_THREADED_INDEXING: "multi-threaded-indexing" > | < MATCHFEATURES_SL: "match-features" (" ")* ":" (~["}","\n"])* ("\n")? > | < MATCHFEATURES_ML: "match-features" ()? "{" (~["}"])* "}" > | < MATCHFEATURES_ML_INHERITS: "match-features inherits " () ()? "{" (~["}"])* "}" > | < SUMMARYFEATURES_SL: "summary-features" (" ")* ":" (~["}","\n"])* ("\n")? > | < SUMMARYFEATURES_ML: "summary-features" ()? "{" (~["}"])* "}" > | < SUMMARYFEATURES_ML_INHERITS: "summary-features inherits " () ()? "{" (~["}"])* "}" > | < RANKFEATURES_SL: "rank-features" (" ")* ":" (~["}","\n"])* ("\n")? > | < RANKFEATURES_ML: "rank-features" ()? "{" (~["}"])* "}" > | < EXPRESSION_SL: "expression" (" ")* ":" (("{")|)* ("\n")? > | < EXPRESSION_ML: "expression" ()? "{" (("{")|)* "}" > | < #BRACE_SL_LEVEL_1: (("{")|)* "}" > | < #BRACE_SL_LEVEL_2: (("{")|)* "}" > | < #BRACE_SL_LEVEL_3: "}" > | < #BRACE_SL_CONTENT: (~["{","}","\n"])* > | < #BRACE_ML_LEVEL_1: (("{")|)* "}" > | < #BRACE_ML_LEVEL_2: (("{")|)* "}" > | < #BRACE_ML_LEVEL_3: "}" > | < #BRACE_ML_CONTENT: (~["{","}"])* > | < #SEARCHLIB_SKIP: ([" ","\f","\n","\r","\t"])+ > | < RANK_PROPERTIES: "rank-properties" > | < RERANK_COUNT: "rerank-count" > | < NUM_THREADS_PER_SEARCH: "num-threads-per-search" > | < MIN_HITS_PER_THREAD: "min-hits-per-thread" > | < NUM_SEARCH_PARTITIONS: "num-search-partitions" > | < TERMWISE_LIMIT: "termwise-limit" > | < POST_FILTER_THRESHOLD: "post-filter-threshold" > | < APPROXIMATE_THRESHOLD: "approximate-threshold" > | < TARGET_HITS_MAX_ADJUSTMENT_FACTOR: "target-hits-max-adjustment-factor" > | < KEEP_RANK_COUNT: "keep-rank-count" > | < RANK_SCORE_DROP_LIMIT: "rank-score-drop-limit" > | < CONSTANTS: "constants" > | < FILE: "file" > | < URI: "uri" > | < IDENTIFIER: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_"])* > | < IDENTIFIER_WITH_DASH: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_","-"])* > | < DOUBLEQUOTEDSTRING: "\"" ( ~["\""] )* "\"" > | < SINGLEQUOTEDSTRING: "'" ( ~["'"] )* "'" > | < CONTEXT: ["a"-"z","A"-"Z"] (["a"-"z", "A"-"Z", "0"-"9"])* > | < DOUBLE: ("-")? (["0"-"9"])+ "." (["0"-"9"])+ > | < INTEGER: ("-")? (["0"-"9"])+ > | < LONG: ("-")? (["0"-"9"])+"L" > | < STRING: (["a"-"z","A"-"Z","_","0"-"9","."])+ > | < FILE_PATH: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_","-", "/", "."])+ > | < HTTP: ["h","H"] ["t","T"] ["t","T"] ["p","P"] (["s","S"])? > | < URI_PATH: ("//")? (["a"-"z","A"-"Z","0"-"9","_","-", "/", ".",":"])+ > | < LESSTHAN: "<" > | < GREATERTHAN: ">" > | < VARIABLE: "$" > | < ONNX_INPUT_SL: "input" (" ")* (|) (" ")* ":" (" ")* (~["\n"])* ("\n")? > | < ONNX_OUTPUT_SL: "output" (" ")* (|) (" ")* ":" (" ")* (~["\n"])* ("\n")? > } // Declare a special skip token for comments. SPECIAL_TOKEN : { } // -------------------------------------------------------------------------------- // // Production rules. // // -------------------------------------------------------------------------------- /** * The rule consumes any schema and returns the corresponding object. This is the only production that should * ever consume leading newlines. * * @return the schema object */ ParsedSchema schema() : { ParsedSchema schema; } { ()* (schema = rootSchema() | schema = rootDocument()) { return schema; } } /** * This rule consumes a proper schema block. This and rootDocument() are the only rules that should ever consume * trailing newline tokens. * * @return the schema definition object. */ ParsedSchema rootSchema() : { String name; String inherited = null; ParsedSchema schema; } { ( ( | ) name = identifier() ( inherited = identifier() )? { schema = new ParsedSchema(name); if (inherited != null) schema.inherit(inherited); } lbrace() (rootSchemaItem(schema) ()*)* ()* ) { return schema; } } /** * Consumes an element of a schema block. This and rootSearch() are the only rules that should ever consume * trailing newline tokens. * * @param schema the schema object to modify. */ void rootSchemaItem(ParsedSchema schema) : { } { ( document(schema) | rawAsBase64(schema) | searchStemming(schema) | importField(schema) | rankingConstant(schema) // Deprecated: TODO: Vespa > 8: Emit warning | rankProfile(schema) | documentSummary(schema) | fieldOutsideDoc(schema) | indexOutsideDoc(schema) | structOutside(schema) | annotationOutside(schema) | fieldSet(schema) | onnxModelInSchema(schema) // Deprecated: TODO: Vespa > 8: Emit warning ) } /** * Consumes a schema definition that contains only documents to be used for inheritance, etc. * * @return the schema definition object. */ ParsedSchema rootDocument() : { ParsedSchema schema = null; } { ( (schema = rootDocumentItem(schema) ()*)* ) { return schema; } } /** * Consumes a single item from within a root document node. * * @param schema the schema object to modify. */ ParsedSchema rootDocumentItem(ParsedSchema schema) : { ParsedDocument doc = null; } { ( doc = namedDocument() { if (schema == null) schema = new ParsedSchema(doc.name()); schema.addDocument(doc); schema.setDocumentWithoutSchema(); return schema; } ) } /** * Consumes a document element. The name defaults to the schema's name, but may be set. * * @param schema the schema object to add content to. */ void document(ParsedSchema schema) : { String name = schema.name(); ParsedDocument document; } { ( (name = identifier())? ()* { document = new ParsedDocument(name); } [ inheritsDocument(document) ()* ] ()* (documentBody(document) ()*)* ) { schema.addDocument(document); } } /** * Consumes a document element, explicitly named */ ParsedDocument namedDocument() : { String name; ParsedDocument document; } { ( name = identifier() ()* { document = new ParsedDocument(name); } [ inheritsDocument(document) ()* ] ()* (documentBody(document) ()*)* ) { return document; } } /** * Consumes a document body block * * @param document the document type to modify. */ void documentBody(ParsedDocument document) : { } { ( annotation(document) | structInside(document) | fieldInsideDoc(document) ) } void rawAsBase64(ParsedSchema schema) : { boolean enabled = false; } { { enabled = true; } [ ( | ( { enabled = false; } ) ) ] { schema.enableRawAsBase64(enabled); } } /** * Consumes struct inheritance * * @param struct The struct type to modify. */ void inheritsStruct(ParsedStruct struct) : { String name; } { name = identifier() { struct.inherit(name); } ( name = identifier() { struct.inherit(name); } )* } /** * Consumes a document inheritance statement. * * @param document The document type to modify. */ void inheritsDocument(ParsedDocument document) : { String name; } { name = identifier() { document.inherit(name); } ( name = identifier() { document.inherit(name); } )* } /** * Consumes a field block from within a document element. * * @param document the document type to modify */ void fieldInsideDoc(ParsedDocument document) : { ParsedField field; } { field = field() { document.addField(field); } } /** * Consumes a field block from outside a document element. * * @param schema the schema to modify */ void fieldOutsideDoc(ParsedSchema schema) : { ParsedField field; } { field = field() { schema.addField(field); } } /** * Consumes a field block */ ParsedField field() : { String name; ParsedField field; ParsedType type; } { name = identifier() type = dataType() { if (name != null && Schema.isReservedName(name.toLowerCase())) { throw new IllegalArgumentException("Reserved name '" + name + "' can not be used as a field name."); } field = new ParsedField(name, type); } lbrace() (fieldBody(field) ()*)* { return field; } } /** Consumes a fieldset block */ void fieldSet(ParsedSchema schema) : { String name; String queryCommand; ParsedMatchSettings matchInfo; ParsedFieldSet fieldSet; } {
name = identifier() lbrace() { fieldSet = new ParsedFieldSet(name); } (( ( name = identifier() { fieldSet.addField(name); } ( name = identifier() { fieldSet.addField(name); } )* ) | ( (queryCommand = identifierWithDash() | queryCommand = quotedString())) { fieldSet.addQueryCommand(queryCommand); } | matchSettings(fieldSet.matchSettings()) ) ()* )+ { schema.addFieldSet(fieldSet); } } /** * This rule consumes a annotation block from within a schema element. * * @param schema the schema object to add content to */ void annotationOutside(ParsedSchema schema) : { String name; ParsedAnnotation type; } { name = identifier() { type = new ParsedAnnotation(name); } [ inheritsAnnotation(type) ()* ] lbrace() annotationBody(type) { schema.addAnnotation(type); } } /** * Consumes an annotation block from within a document element * * @param document the document object to add content to */ void annotation(ParsedDocument document) : { String name; ParsedAnnotation type; } { name = identifier() { type = new ParsedAnnotation(name); } [ inheritsAnnotation(type) ()* ] lbrace() annotationBody(type) { document.addAnnotation(type); } } /** * Consumes a single element of an annotation body block. */ void annotationBody(ParsedAnnotation type) : { ParsedStruct struct = new ParsedStruct("annotation." + type.name()); boolean seenField = false; } { (structFieldDefinition(struct) { seenField = true; } ()*)* { if (seenField) type.setStruct(struct); } } void inheritsAnnotation(ParsedAnnotation annotation) : { String name; } { name = identifier() { annotation.inherit(name); } } /** * This rule consumes a struct block from within a document element. * @param document the document object to add content to */ void structInside(ParsedDocument document) : { ParsedStruct struct; } { struct = structDefinition() { document.addStruct(struct); } } /** * This rule consumes a struct block from within a schema element. * @param schema the schema object to add content to */ void structOutside(ParsedSchema schema) : { ParsedStruct struct; } { struct = structDefinition() { schema.addStruct(struct); } } /** * This rule consumes a struct declaration block */ ParsedStruct structDefinition() : { String name; String inherited; ParsedStruct struct; } { ( name = identifier() ()* { struct = new ParsedStruct(name); } [ inheritsStruct(struct) ()* ] lbrace() (structFieldDefinition(struct) ()*)* ) { return struct; } } /** * This rule consumes a data type block from within a field element. * * @return the consumed data type */ ParsedType dataType() : { String typeName = null; boolean isArrayOldStyle = false; ParsedType mapType = null; ParsedType arrayType = null; ParsedType wsetType = null; TensorType tensorType; ParsedType referenceType; } { ( LOOKAHEAD( ) ( arrayType = dataType() { return ParsedType.arrayOf(arrayType); } ) | LOOKAHEAD( ) ( wsetType = dataType() { return ParsedType.wsetOf(wsetType); } ) | LOOKAHEAD( ) ( mapType = mapDataType() { return mapType; } ) | LOOKAHEAD( ) ( mapType = annotationRefDataType() { return mapType; } ) | LOOKAHEAD() ( tensorType = tensorType("Field type") { return ParsedType.tensorType(tensorType); } ) | LOOKAHEAD() ( referenceType = referenceType() { return ParsedType.documentRef(referenceType); } ) | ( typeName = identifier() ["[]" { isArrayOldStyle = true; }] ) ) { ParsedType type = ParsedType.fromName(typeName); if (isArrayOldStyle) { deployLogger.logApplicationPackage(Level.WARNING, "Data type syntax '" + typeName + "[]' is deprecated, use 'array<" + typeName + ">' instead."); type = ParsedType.arrayOf(type); } return type; } } ParsedType referenceType() : { String documentName; } { ( documentName = identifier() ) { return ParsedType.documentType(documentName); } } ParsedType annotationRefDataType() : { ParsedType dataType; String targetName; } { ( targetName = identifier() ) { return ParsedType.annotationRef(targetName); } } ParsedType mapDataType() : { ParsedType keyType; ParsedType valType; } { ( keyType = dataType() valType = dataType() ) { return ParsedType.mapType(keyType, valType); } } /** * This rule consumes a field block of a struct body. * * @param struct The struct to modify. */ void structFieldDefinition(ParsedStruct struct) : { String name; ParsedType type; ParsedField field; int fieldId; } { name = identifier() type = dataType() { if (name != null && Schema.isReservedName(name.toLowerCase())) { throw new IllegalArgumentException("Reserved name '" + name + "' can not be used as a field name."); } field = new ParsedField(name, type); } lbrace() (id(field) ()*)? (matchSettings(field.matchSettings()) ()*)* { struct.addField(field); } } /** * This rule consumes a struct subfield from a document field body. This is not to be confused with a document * struct's fields, but rather this is a subfield of a document field of type struct. * * @param field the field to modify */ void structField(ParsedField field) : { String name; ParsedField structField; } { name = identifier() { if (name != null && Schema.isReservedName(name.toLowerCase())) { throw new IllegalArgumentException("Reserved name '" + name + "' can not be used as a field name."); } structField = new ParsedField(name, null); } lbrace() (structFieldBody(structField) ()*)* { field.addStructField(structField); } } /** * This rule consumes a single element of a field body block. * * @param field the field being built */ void fieldBody(ParsedField field) : { } { ( alias(field) | attribute(field) | bolding(field) | dictionary(field) | fieldStemming(field) | id(field) | summaryInField(field) | indexInsideField(field) | indexing(field) | matchSettings(field.matchSettings()) | normalizing(field) | queryCommand(field) | rank(field) | rankType(field) | fieldSorting(field) | structField(field) | summaryTo(field) | weight(field) | weightedset(field.getType()) ) } /** * This rule consumes a single element of a struct subfield body block. * Only elements that are supported in streaming schema and indexed schema (with complex attributes) are allowed. * * @param field the field being built */ void structFieldBody(ParsedField field) : { } { ( summaryInField(field) | indexing(field) | attribute(field) | matchSettings(field.matchSettings()) | queryCommand(field) | rank(field) | structField(field) | summaryTo(field) ) } /** * This rule consumes an indexing block of a field element. * * @param field The field to modify. */ void indexing(ParsedField field) : { } { ( ( ( indexingOperation(field, false)) | indexingOperation(field, true) ) ) } /** * This rule consumes an IL script block. This is expected to consume trailing newlines. * * @param field The field to modify. */ void indexingOperation(ParsedField field, boolean multiLine) : { } { { IndexingOperation oldOp = newIndexingOperation(multiLine); ParsedIndexingOp newOp = new ParsedIndexingOp(oldOp.getScript()); field.setIndexingOperation(newOp); } } /** * This rule consumes a summary-to statement of a field element. * * @param field The field to modify. */ void summaryTo(ParsedField field) : { String name = field.name(); String destination; ParsedSummaryField psf; } { [name = identifier()] destination = identifier() { psf = field.summaryFieldFor(name); psf.addDestination(destination); } ( destination = identifier() { psf.addDestination(destination); } )* } /** * This rule consumes a weight statement of a field element. * * @param field The field to modify. */ void weight(ParsedField field) : { int num; } { num = integer() { field.setWeight(num); } } /** * This rule consumes a weighted set statement of a field element. * * @param fieldType The field type to modify. */ void weightedset(ParsedType fieldType) : { } { ( ( weightedsetBody(fieldType)) | (lbrace() (weightedsetBody(fieldType) ()*)* ) ) } /** * This rule consumes one body item of a weighted set block. * * @param type The field type to modify. */ void weightedsetBody(ParsedType type) : { } { ( { type.setCreateIfNonExistent(true); } | { type.setRemoveIfZero(true); } ) } /** * This rule consumes a rank-type statement of a field element. * * @param field The field to modify. */ void rankType(ParsedField field) : { String typeName; String indexName = ""; } { [indexName = identifier()] typeName = identifier() { field.addRankType(indexName, typeName); } } /** * This rule consumes an attribute statement of a field element. * * @param field The field to modify. */ void attribute(ParsedField field) : { String name = field.name(); } { [name = identifier()] { // TODO: Remove support for attribute with different name than field name in Vespa 9 if ( ! name.equals(field.name())) deployLogger.logApplicationPackage(Level.WARNING, "Creating an attribute for field '" + field.name() + "' with a different name '" + name + "' than the field name" + " is deprecated, and support will be removed in Vespa 9. Define a field with the wanted name outside the document instead."); ParsedAttribute attr = field.attributeFor(name); } ( ( attributeSetting(attr)) | (lbrace() (attributeSetting(attr) ()*)* ) ) } /* pick up sorting in field block */ void fieldSorting(ParsedField field) : { } { sorting(field.sortInfo()) } /* pick up sorting in field block */ void attributeSorting(ParsedAttribute attribute) : { } { sorting(attribute.sortInfo()) } void sorting(ParsedSorting sort) : { } { ( ( sortingSetting(sort)) | (lbrace() (sortingSetting(sort) ()*)* ) ) } void sortingSetting(ParsedSorting sorting) : { String locale; } { ( { sorting.setAscending(); } | { sorting.setDescending(); } | ( { sorting.setFunction(Sorting.Function.UCA); } | { sorting.setFunction(Sorting.Function.RAW); } | { sorting.setFunction(Sorting.Function.LOWERCASE); } ) | ( { sorting.setStrength(Sorting.Strength.PRIMARY); } | { sorting.setStrength(Sorting.Strength.SECONDARY); } | { sorting.setStrength(Sorting.Strength.TERTIARY); } | { sorting.setStrength(Sorting.Strength.QUATERNARY); } | { sorting.setStrength(Sorting.Strength.IDENTICAL); } ) | locale = identifierWithDash() { sorting.setLocale(locale); } ) } /** * This rule consumes a single attribute setting statement of an attribute element. * * @param attribute The attribute to change. */ void attributeSetting(ParsedAttribute attribute) : { String str; } { ( { attribute.setFastRank(true); } | { attribute.setFastSearch(true); } | { attribute.setFastAccess(true); } | { attribute.setMutable(true); } | { attribute.setPaged(true); } | { deployLogger.logApplicationPackage(Level.WARNING, "'enable-bit-vectors' is deprecated and void -> remove it. Will be removed in vespa-9"); } | { attribute.setEnableOnlyBitVector(true); } | attributeSorting(attribute) | { String alias; String aliasedName=attribute.name(); } [aliasedName = identifier()] alias = identifierWithDash() { attribute.addAlias(aliasedName, alias); } | str = identifierWithDash() { attribute.setDistanceMetric(str); } ) } /** * This rule consumes a summary statement defined inside a document-summary block. * * @param docsum The document summary to modify. */ void summaryInDocument(ParsedDocumentSummary docsum) : { String name; ParsedType type = null; ParsedSummaryField psf; } { name = identifierWithDash() { } ( type = dataType())? lbrace() { psf = new ParsedSummaryField(name, type); if (type != null) { psf.setHasExplicitType(); } } (summaryItem(psf) ()*)* { var old = docsum.addField(psf); if (old != null) { deployLogger.logApplicationPackage(Level.WARNING, "Summary field '" + psf.name() + "' is defined twice in document-summary '" + docsum.name() + "'"); } } } /** * The rule consumes a summary statement defined inside a field. * * @param field The field to modify. */ void summaryInField(ParsedField field) : { } { ( LOOKAHEAD(2) summaryInFieldShort(field) | summaryInFieldLong(field) ) } /** * This rule consumes a single-line summary field. */ void summaryInFieldShort(ParsedField field) : { String name = field.name(); ParsedSummaryField psf; } { [ name = identifier() ] { psf = field.summaryFieldFor(name); } ( { psf.setDynamic(); } | { psf.setMatchedElementsOnly(); } | ( | ) { psf.setFull(); } | { psf.setTokens(); } ) } /** * This rule consumes a multi-line summary field. */ void summaryInFieldLong(ParsedField field) : { String name = field.name(); ParsedType type = field.getType(); boolean explicitType = false; ParsedSummaryField psf; } { ( [ name = identifier() [ { type = dataType(); explicitType = true; } ] ] lbrace() { psf = field.summaryFieldFor(name, type); if (explicitType) { psf.setHasExplicitType(); } } (summaryItem(psf) ()*)* ) } /** * This rule consumes an item of a summary field block. * * @param field The field to modify. */ void summaryItem(ParsedSummaryField field) : { } { ( summaryTransform(field) | summaryBolding(field) | summarySourceList(field) | summaryDestinationList(field) ) } /** * This rule consumes a transform statement for a summary field element. * * @param field The field to modify. */ void summaryTransform(ParsedSummaryField field) : { } { ( { field.setDynamic(); } | { field.setMatchedElementsOnly(); } | ( | ) { field.setFull(); } | { field.setTokens(); } ) } /** * This rule consumes a bolding statement for a summary field element. * * @param field The summary field to modify. */ void summaryBolding(ParsedSummaryField field) : { boolean bold; } { bold = bool() { field.setBold(bold); } } /** * This rule consumes a source-list statement for a summary field element. * * @param field The summary field to modify. */ void summarySourceList(ParsedSummaryField field) : { String str; } { ( str = identifier() { field.addSource(str); } ( str = identifier() { field.addSource(str); } )* ) + } /** * This rule consumes a destination-list statement for a summary field element. * * @param field The summary field to modify. */ void summaryDestinationList(ParsedSummaryField field) : { String str; } { str = identifier() { field.addDestination(str); } ( str = identifier() { field.addDestination(str); } )* } /** * This rule consumes a stemming block of a field element. * * @param field The field to modify. */ void fieldStemming(ParsedField field) : { String setting; } { setting = identifierWithDash() { field.setStemming(Stemming.get(setting)); } } /** * This rule consumes a stemming statement for a schema element. * * @param schema the schema to modify */ void searchStemming(ParsedSchema schema) : { String setting; } { setting = identifierWithDash() { schema.setStemming(Stemming.get(setting)); } } /** * This rule consumes a normalizing statement of a field element. * At the moment, this can only be used to turn off normalizing. * * @param field The field to modify. */ void normalizing(ParsedField field) : { String setting; } { setting = identifierWithDash() { field.setNormalizing(setting); } } /** * This rule consumes a bolding statement of a field element. * * @param field The field to modify. */ void bolding(ParsedField field) : { boolean bold; } { bold = bool() { field.setBolding(bold); } } /** * This rule consumes a dictionary statement of a field element. * * @param field The field to modify. */ void dictionary(ParsedField field) : { } { ( ( dictionarySetting(field)) | (lbrace() (dictionarySetting(field) ()*)* )) { } } void dictionarySetting(ParsedField field) : { } { ( { field.dictionary(DictionaryOption.HASH); } | { field.dictionary(DictionaryOption.BTREE); } | { field.dictionary(DictionaryOption.CASED); } | { field.dictionary(DictionaryOption.UNCASED); } ) } void queryCommand(ParsedField field) : { String command; } { ( command = identifierWithDash() | command = quotedString() ) { field.addQueryCommand(command); } } void alias(ParsedField field) : { String aliasedName = field.name(); String alias; } { [aliasedName = identifier()] alias = identifierWithDash() { field.addAlias(aliasedName, alias); } } void matchSettings(ParsedMatchSettings matchInfo) : { } { ( ( matchType(matchInfo)) | (lbrace() (matchItem(matchInfo) ()*)* ) ) } void matchType(ParsedMatchSettings matchInfo) : { } { ( { matchInfo.setType(MatchType.TEXT); } // Deprecated synonym to TEXT | { matchInfo.setType(MatchType.TEXT); } | { matchInfo.setType(MatchType.WORD); } | { matchInfo.setType(MatchType.EXACT); } | { matchInfo.setType(MatchType.GRAM); } | { matchInfo.setCase(Case.CASED); } | { matchInfo.setCase(Case.UNCASED); } | { matchInfo.setAlgorithm(MatchAlgorithm.PREFIX); } | { matchInfo.setAlgorithm(MatchAlgorithm.SUBSTRING); } | { matchInfo.setAlgorithm(MatchAlgorithm.SUFFIX); } ) } /** * This rule consumes a single match item for a match block. * * @param matchInfo The settings to modify. */ void matchItem(ParsedMatchSettings matchInfo) : { } { ( matchType(matchInfo) | exactTerminator(matchInfo) | gramSize(matchInfo) | matchSize(matchInfo) | maxTermOccurrences(matchInfo)) } void exactTerminator(ParsedMatchSettings matchInfo) : { String terminator; } { terminator = quotedString() { matchInfo.setExactTerminator(terminator); } } void gramSize(ParsedMatchSettings matchInfo) : { int gramSize; } { gramSize = integer() { matchInfo.setGramSize(gramSize); } } void matchSize(ParsedMatchSettings matchInfo) : { int matchSize; } { matchSize = integer() { matchInfo.setMaxLength(matchSize); } } void maxTermOccurrences(ParsedMatchSettings matchInfo) : { int maxTermOccurrences; } { maxTermOccurrences = integer() { matchInfo.setMaxTermOccurrences(maxTermOccurrences); } } /** * Consumes a rank statement of a field element. * * @param field The field to modify. */ void rank(ParsedField field) : { } { ( ( rankSetting(field)) | (lbrace() (rankSetting(field) ()*)* ) ) } /** * Consumes a single rank setting of a rank statement. * * @param field The field to modify. */ void rankSetting(ParsedField field) : { } { ( { field.setLiteral(true); } | { field.setNormal(true); } | { field.setFilter(true); } ) } /** * Consumes an id statement of a field body block. * * @param field The field to modify. */ void id(ParsedField field) : { int fieldId; } { fieldId = integer() { field.setId(fieldId); } } /** * Consumes a document-summary block from within a schema block. * * @param schema the schema object to add content to */ void documentSummary(ParsedSchema schema) : { String name; ParsedDocumentSummary summary; } { ( name = identifierWithDash() { summary = new ParsedDocumentSummary(name); } [inheritsDocumentSummary(summary)] lbrace() ( { summary.setFromDisk(true); } | { summary.setOmitSummaryFeatures(true); } | documentSummaryItem(summary) | )* ) { schema.addDocumentSummary(summary); } } /** * This rule consumes an inherits statement of a document summary. * * @param documentSummary the document summary to modify */ void inheritsDocumentSummary(ParsedDocumentSummary documentSummary) : { String name; } { name = identifierWithDash() { documentSummary.inherit(name); } ( name = identifierWithDash() { documentSummary.inherit(name); } )* } /** * Consumes a single document-summary item. * * @param summary The document summary to modify. */ void documentSummaryItem(ParsedDocumentSummary summary) : { } { summaryInDocument(summary) } /** * Consumes an index block in a schema element. * * @param schema the schema object to add content to */ void indexOutsideDoc(ParsedSchema schema) : { ParsedIndex op; String indexName; } { indexName = identifier() { op = new ParsedIndex(indexName); } ( ( indexBody(op) ( indexBody(op))*) | (lbrace() (indexBody(op) ()*)* ) ) { schema.addIndex(op); } } /** * Consumes an index block for a field element. * * @param field the field to modify */ void indexInsideField(ParsedField field) : { ParsedIndex op; String indexName = field.name(); } { [indexName = identifier()] { // TODO: Remove support for index with different name than field name in Vespa 9 if ( ! indexName.equals(field.name())) deployLogger.logApplicationPackage(Level.WARNING, "Creating an index for field '" + field.name() + "' with a different name '" + indexName + "' than the field name" + " is deprecated, and support will be removed in Vespa 9. Define a field with the wanted name outside the document instead."); op = new ParsedIndex(indexName); } ( ( indexBody(op) ( indexBody(op))*) | (lbrace() (indexBody(op) ()*)* ) ) { field.addIndex(op); } } /** * Consumes a single index statement for an index block. * * @param index The index to modify. */ void indexBody(ParsedIndex index) : { String str; int arity; long num; double threshold; } { ( { index.setPrefix(true); } | str = identifierWithDash() { index.addAlias(str); } | str = identifierWithDash() { index.setStemming(Stemming.get(str)); } | arity = integer() { index.setArity(arity); } | num = longValue() { index.setLowerBound(num); } | num = longValue() { index.setUpperBound(num); } | threshold = floatValue() { index.setDensePostingListThreshold(threshold); } | { index.setEnableBm25(true); } | hnswIndex(index) { } ) } void hnswIndex(ParsedIndex index) : { HnswIndexParams.Builder params = new HnswIndexParams.Builder(); } { ( LOOKAHEAD( lbrace()) ( (lbrace() (hnswIndexBody(params) ()*)* ) ) | ) { index.setHnswIndexParams(params.build()); } } void hnswIndexBody(HnswIndexParams.Builder params) : { int num; boolean bool; } { ( num = integer() { params.setMaxLinksPerNode(num); } | num = integer() { params.setNeighborsToExploreAtInsert(num); } | bool = bool() { params.setMultiThreadedIndexing(bool); } ) } void onnxModelInSchema(ParsedSchema schema) : { OnnxModel onnxModel; } { onnxModel = onnxModel() { schema.add(onnxModel); } } void onnxModelInProfile(ParsedRankProfile profile) : { OnnxModel onnxModel; } { onnxModel = onnxModel() { profile.add(onnxModel); } } /** Consumes an onnx-model block. */ OnnxModel onnxModel() : { String name; OnnxModel onnxModel; } { ( name = identifier() { onnxModel = new OnnxModel(name); } lbrace() (onnxModelItem(onnxModel) ()*)+ ) { return onnxModel; } } /** * Consumes an onnx-model block. * * @param onnxModel the onnxModel to modify */ void onnxModelItem(OnnxModel onnxModel) : { String path = null; int num; } { ( (path = fileItem()) { onnxModel.setFileName(path); } | (path = uriItem()) { onnxModel.setUri(path); } | num = integer() { onnxModel.setGpuDevice(num, false); } | num = integer() { onnxModel.setStatelessIntraOpThreads(num); } | num = integer() { onnxModel.setStatelessInterOpThreads(num); } | ( { onnxModel.setStatelessExecutionMode("parallel"); } | { onnxModel.setStatelessExecutionMode("sequential"); } ) | () { String name = token.image.substring(5, token.image.lastIndexOf(":")).trim(); if (name.startsWith("\"")) { name = name.substring(1, name.length() - 1); } String source = token.image.substring(token.image.lastIndexOf(":") + 1).trim(); onnxModel.addInputNameMapping(name, source); } | () { String name = token.image.substring(6, token.image.lastIndexOf(":")).trim(); if (name.startsWith("\"")) { name = name.substring(1, name.length() - 1); } String as = token.image.substring(token.image.lastIndexOf(":") + 1).trim(); onnxModel.addOutputNameMapping(name, as); } ) } /** * Consumes a constant block of a schema element. * * @param schema the schema object to add content to */ void rankingConstant(ParsedSchema schema) : { String name; String path = null; DistributableResource.PathType pathType = DistributableResource.PathType.FILE; TensorType type = null; } { ( name = identifier() lbrace() (path = fileItem() { pathType = DistributableResource.PathType.FILE; } | path = uriItem() { pathType = DistributableResource.PathType.URI; } // Undocumented. Remove? | type = tensorTypeWithPrefix(rankingConstantErrorMessage(name)) ()* )+ ) { if (type == null) throw new IllegalArgumentException("constant '" + name + "' must have a type"); if (path == null) throw new IllegalArgumentException("constant '" + name + "' must have a file"); schema.add(new RankProfile.Constant(FeatureNames.asConstantFeature(name), type, path, pathType)); } } String fileItem() : { String path; } { ( ( | | ) { path = com.yahoo.path.Path.fromString(token.image).getRelative(); } { } ()*) { return path; } } String uriItem() : { String path; } { ( ( ) { path = token.image; } ()*) { return path; } } String rankingConstantErrorMessage(String name) : {} { { return "For ranking constant ' " + name + "'"; } } /** * Consumes a rank-profile block of a schema element. * * @param schema the schema object to add content to */ void rankProfile(ParsedSchema schema) : { String name; ParsedRankProfile profile; } { ( | ) name = identifierWithDash() { profile = new ParsedRankProfile(name); } [inheritsRankProfile(profile)] lbrace() (rankProfileItem(schema, profile) ()*)* { schema.addRankProfile(profile); } } /** * This rule consumes a single statement for a rank-profile block. * * @param profile The rank profile to modify. */ void rankProfileItem(ParsedSchema schema, ParsedRankProfile profile) : { } { ( fieldRankType(profile) | fieldWeight(profile) | fieldRankFilter(profile) | firstPhase(profile) | matchPhase(profile) | function(profile) | mutate(profile) | ignoreRankFeatures(profile) | numThreadsPerSearch(profile) | minHitsPerThread(profile) | numSearchPartitions(profile) | termwiseLimit(profile) | postFilterThreshold(profile) | approximateThreshold(profile) | targetHitsMaxAdjustmentFactor(profile) | rankFeatures(profile) | rankProperties(profile) | secondPhase(profile) | globalPhase(profile) | inputs(profile) | constants(schema, profile) | matchFeatures(profile) | summaryFeatures(profile) | onnxModelInProfile(profile) | strict(profile) ) } /** * Consumes an inherits statement of a rank-profile. * * @param profile the profile to modify */ void inheritsRankProfile(ParsedRankProfile profile) : { String name; } { name = identifierWithDash() { profile.inherit(name); } ( name = identifierWithDash() { profile.inherit(name); } )* } /** * This rule consumes an mutate statement of a rank-profile. * * @param profile The profile to modify. */ void mutate(ParsedRankProfile profile) : { } { lbrace() (mutate_operation(profile) )+ } void mutate_operation(ParsedRankProfile profile) : { String attribute, operation; RankProfile.MutateOperation.Phase phase; } { ( { phase = RankProfile.MutateOperation.Phase.on_match; } | { phase = RankProfile.MutateOperation.Phase.on_first_phase; } | { phase = RankProfile.MutateOperation.Phase.on_second_phase; } | { phase = RankProfile.MutateOperation.Phase.on_summary; } ) lbrace() attribute = identifier() operation = mutate_expr() ()* { profile.addMutateOperation(phase, attribute, operation); } } String mutate_expr() : { String op; Number constant = null; } { (("+=" | "-=" | "=") { op = token.image; } constant = number()) { return constant != null ? (op + constant) : op; } } /** * This rule consumes a function statement of a rank-profile. * * @param profile The profile to modify. */ void function(ParsedRankProfile profile) : { String name, expression, parameter; boolean inline = false; ParsedRankFunction func; } { ( ( | ) inline = inline() name = identifier() [ "$" { name = name + token.image; } ] "(" { func = new ParsedRankFunction(name); } [ parameter = identifier() { func.addParameter(parameter); } ( parameter = identifier() { func.addParameter(parameter); } )* ] ")" lbrace() expression = expression() ()* ) { func.setExpression(expression); func.setInline(inline); var old = profile.addOrReplaceFunction(func); if (old != null) { throw new IllegalArgumentException("Function '" + func.name() + "' is defined twice in rank profile '" + profile.name() + "'"); } } } boolean inline() : { } { ( { return true; } ) ? { return false; } } /** * This rule consumes a match-phase block of a rank profile. * * @param profile The rank profile to modify. */ void matchPhase(ParsedRankProfile profile) : { MatchPhaseSettings settings = new MatchPhaseSettings(); } { lbrace() (matchPhaseItem(settings) ()*)* { settings.checkValid(); profile.setMatchPhaseSettings(settings); } } void matchPhaseItem(MatchPhaseSettings settings) : { String str; int num; double multiplier; double coverage; } { ( str = identifier() { settings.setAttribute(str); } | diversity(settings) | ( { settings.setAscending(true); } | { settings.setAscending(false); } ) | num = integer() { settings.setMaxHits(num); } | coverage = floatValue() { settings.setMaxFilterCoverage(coverage); } | multiplier = floatValue() { settings.setEvaluationPoint(multiplier); } | multiplier = floatValue() { settings.setPrePostFilterTippingPoint(multiplier); } ) } /** * This rule consumes a diversity block of a rank profile. * * @param profile The rank profile to modify. */ void diversity(MatchPhaseSettings profile) : { DiversitySettings settings = new DiversitySettings(); } { lbrace() (diversityItem(settings) ()*)* { profile.setDiversity(settings); } } void diversityItem(DiversitySettings settings) : { String str; int num; double multiplier; } { ( str = identifier() { settings.setAttribute(str); } | num = integer() { settings.setMinGroups(num); } | multiplier = floatValue() { settings.setCutoffFactor(multiplier); } | ( { settings.setCutoffStrategy(Diversity.CutoffStrategy.strict); } | { settings.setCutoffStrategy(Diversity.CutoffStrategy.loose); } ) ) } /** * Consumes the first-phase block of a rank profile. * * @param profile The rank profile to modify. */ void firstPhase(ParsedRankProfile profile) : { String exp; } { lbrace() (firstPhaseItem(profile) ()*)* } void firstPhaseItem(ParsedRankProfile profile) : { String expression; int keepRankCount; double dropLimit; } { ( expression = expression() { profile.setFirstPhaseRanking(expression); } | ( keepRankCount = integer()) { profile.setKeepRankCount(keepRankCount); } | ( dropLimit = floatValue()) { profile.setRankScoreDropLimit(dropLimit); } ) } /** * Consumes the second-phase block of a rank profile. * * @param profile The rank profile to modify. */ void secondPhase(ParsedRankProfile profile) : { } { lbrace() (secondPhaseItem(profile) ()*)* } /** * Consumes a statement for a second-phase block. * * @param profile The rank profile to modify. */ void secondPhaseItem(ParsedRankProfile profile) : { String expression; int rerankCount; } { ( expression = expression() { profile.setSecondPhaseRanking(expression); } | ( rerankCount = integer()) { profile.setRerankCount(rerankCount); } ) } /** * Consumes the global-phase block of a rank profile. * * @param profile The rank profile to modify. */ void globalPhase(ParsedRankProfile profile) : { } { lbrace() (globalPhaseItem(profile) ()*)* } /** * Consumes a statement for a global-phase block. * * @param profile The rank profile to modify. */ void globalPhaseItem(ParsedRankProfile profile) : { String expression; int rerankCount; } { ( expression = expression() { profile.setGlobalPhaseExpression(expression); } | ( rerankCount = integer()) { profile.setGlobalPhaseRerankCount(rerankCount); } ) } /** Consumes an inputs block of a rank profile. */ void inputs(ParsedRankProfile profile) : { Reference reference; TensorType type; List seenInputs = new ArrayList<>(); } { ()* ( reference = input(profile) { seenInputs.add(reference); } ( { seenInputs.add(null); })* )* { Reference last = null; for (Reference current : seenInputs) { if (last != null && current != null) { deployLogger.logApplicationPackage(Level.WARNING, "Expected newline between inputs " + last + " and " + current); } last = current; } } } Reference input(ParsedRankProfile profile) : { Reference reference; InputType type = new InputType(TensorType.empty, false); Tensor defaultValue = null; } { reference = inputName() ( type = valueType(reference))? ( ()* defaultValue = tensorValue(type.tensorType()) )? { profile.addInput(reference, new RankProfile.Input(reference, type, Optional.ofNullable(defaultValue))); return reference; } } /** Returns the reference "query(name)" for both "query(name)" and "name". */ Reference inputName() : { String name; } { ( ( "(" name = identifier() ")" ) | name = identifier() ) { return FeatureNames.asQueryFeature(name); } } InputType valueType(Reference reference) : { TensorType type; InputType result; } { ( ( type = tensorType("Type of " + reference) { result = new InputType(type, false); } ) | ( { result = new InputType(TensorType.empty, false); } ) | ( { result = new InputType(TensorType.empty, false); deployLogger.logApplicationPackage(Level.WARNING, "Input " + reference + ": 'long' is not possible, treated as 'double'"); } ) | ( { result = new InputType(TensorType.empty, false); deployLogger.logApplicationPackage(Level.WARNING, "Input " + reference + ": 'float' is not possible, treated as 'double'"); } ) | ( { result = new InputType(TensorType.empty, true); } ) ) { return result; } } /** * Consumes a summary-features block of a rank profile. * * @param profile the rank profile to modify */ void summaryFeatures(ParsedRankProfile profile) : { String features; String inherited = null; } { ( { features = token.image.substring(token.image.indexOf(":") + 1).trim(); } | { features = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")).trim(); } | { int inheritsIndex = token.image.indexOf("inherits "); String rest = token.image.substring(inheritsIndex + "inherits ".length()); profile.setInheritedSummaryFeatures(rest.substring(0, rest.indexOf(" ")).trim()); features = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")).trim(); } ) { profile.addSummaryFeatures(getFeatureList(features)); } } void strict(ParsedRankProfile profile) : {} { ( ( { profile.setStrict(true); } ) | ( { profile.setStrict(false); } ) ) } /** * Consumes a match-features block of a rank profile. * * @param profile the rank profile to modify */ void matchFeatures(ParsedRankProfile profile) : { String features; } { ( { features = token.image.substring(token.image.indexOf(":") + 1).trim(); } | { features = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")).trim(); } | { int inheritsIndex = token.image.indexOf("inherits "); String rest = token.image.substring(inheritsIndex + "inherits ".length()); profile.setInheritedMatchFeatures(rest.substring(0, rest.indexOf(" ")).trim()); features = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")).trim(); } ) { profile.addMatchFeatures(getFeatureList(features)); } } /** Consumes a rank-features block of a rank profile */ void rankFeatures(ParsedRankProfile profile) : { String features; } { ( { features = token.image.substring(token.image.indexOf(":") + 1).trim(); } | { features = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")).trim(); } ) { profile.addRankFeatures(getFeatureList(features)); } } /** * This rule consumes a ignore-default-rank-features statement for a rank profile. * * @param profile The rank profile to modify. */ void ignoreRankFeatures(ParsedRankProfile profile) : { } { { profile.setIgnoreDefaultRankFeatures(true); } } /** * This rule consumes a num-threads-per-search statement for a rank profile. * * @param profile The rank profile to modify. */ void numThreadsPerSearch(ParsedRankProfile profile) : { int num; } { ( num = integer()) { profile.setNumThreadsPerSearch(num); } } /** * This rule consumes a min-hits-per-thread statement for a rank profile. * * @param profile The rank profile to modify. */ void minHitsPerThread(ParsedRankProfile profile) : { int num; } { ( num = integer()) { profile.setMinHitsPerThread(num); } } /** * This rule consumes a num-search-partitions statement for a rank profile. * * @param profile the rank profile to modify */ void numSearchPartitions(ParsedRankProfile profile) : { int num; } { ( num = integer()) { profile.setNumSearchPartitions(num); } } /** * This rule consumes a termwise-limit statement for a rank profile. * * @param profile the rank profile to modify */ void termwiseLimit(ParsedRankProfile profile) : { double num; } { ( num = floatValue()) { profile.setTermwiseLimit(num); } } /** * This rule consumes a post-filter-threshold statement for a rank profile. * * @param profile the rank profile to modify */ void postFilterThreshold(ParsedRankProfile profile) : { double threshold; } { ( threshold = floatValue()) { profile.setPostFilterThreshold(threshold); } } /** * This rule consumes an approximate-threshold statement for a rank profile. * * @param profile the rank profile to modify */ void approximateThreshold(ParsedRankProfile profile) : { double threshold; } { ( threshold = floatValue()) { profile.setApproximateThreshold(threshold); } } /** * This rule consumes a target-hits-max-adjustment-factor statement for a rank profile. * * @param profile the rank profile to modify */ void targetHitsMaxAdjustmentFactor(ParsedRankProfile profile) : { double factor; } { ( factor = floatValue()) { profile.setTargetHitsMaxAdjustmentFactor(factor); } } /** * Consumes a rank-properties block of a rank profile. There * is a little trick within this rule to allow the final rank property * to skip the terminating newline token. * * @param profile the rank profile to modify */ void rankProperties(ParsedRankProfile profile) : { } { lbrace() (LOOKAHEAD(rankPropertyItem() rankPropertyItem() ) rankProperty(profile) ()+)* [rankProperty(profile)] } /** * Consumes a single rank property pair for a rank profile. * * @param profile the rank profile to modify */ void rankProperty(ParsedRankProfile profile) : { String key, val; } { key = rankPropertyItem() val = rankPropertyItem() { profile.addRankProperty(key, val); } } /** * Consumes a single rank property for a rank-properties block. * * @return the token image of the consumed item */ String rankPropertyItem() : { String image = null; String ret = ""; Token dToken = null; } { ( ( image = identifierWithDash() { ret += image; } | dToken = { ret += dToken.image; } | image = quotedString() { ret += image; } | ( "(" | ")" | | ) { ret += token.image; } )+ ) { return ret; } } /** * Consumes a field-weight statement of a rank profile. * * @param profile the rank profile to modify */ void fieldWeight(ParsedRankProfile profile) : { Integer num; String name; } { name = identifier() num = integer() { profile.addFieldRankWeight(name, num); } } /** * Consumes a rank-type statement of a rank profile. * * @param profile the rank profile to modify */ void fieldRankType(ParsedRankProfile profile) : { String name; String type; } { name = identifier() type = identifier() { profile.addFieldRankType(name, type); } } /** * Consumes a rank filter statement of a rank profile. * * @param profile the rank profile to modify */ void fieldRankFilter(ParsedRankProfile profile) : { String name; } { name = identifier() { profile.addFieldRankFilter(name, true); } } /** * Consumes a set of constants available in ranking expressions in the enclosing profile. */ void constants(ParsedSchema schema, ParsedRankProfile profile) : { String name; } { ()* ( constant(schema, profile) ()* )* } void constant(ParsedSchema schema, ParsedRankProfile profile) : { Reference name = null; InputType type = new InputType(TensorType.empty, false); Tensor value = null; String valuePath = null; } { ( name = constantName() ( LOOKAHEAD(4) ( ( type = valueType(name) )? ()* ( value = tensorValue(type.tensorType()) | valuePath = fileItem()) { if (value != null) profile.add(new RankProfile.Constant(name, value)); else profile.add(new RankProfile.Constant(name, type.tensorType(), valuePath)); } ) | // Deprecated forms (TODO: Vespa > 8: Add warning): ( constantValue(profile, name) | constantTensor(profile, name) ) ) ) } /** Returns the reference "constant(name)" for both "constant(name)" and "name". */ Reference constantName() : { String name; } { ( ( "(" name = identifier() ")" ) | name = identifier() ) { return FeatureNames.asConstantFeature(name); } } // Deprecated form void constantValue(ParsedRankProfile profile, Reference name) : { Token value; } { ( value = | value = | value = ) { profile.add(new RankProfile.Constant(name, Tensor.from(value.image))); } } // Deprecated form void constantTensor(ParsedRankProfile profile, Reference name) : { String tensorString = ""; TensorType type = null; } { ()* (( tensorString = tensorValuePrefixedByValue() | type = tensorTypeWithPrefix(constantTensorErrorMessage(profile.name(), name)) ) ()* )* { profile.add(new RankProfile.Constant(name, type != null ? Tensor.from(type, tensorString) : Tensor.from(tensorString))); } } String constantTensorErrorMessage(String rankProfileName, Reference name) : {} { { return "For constant tensor '" + name + "' in rank profile '" + rankProfileName + "'"; } } /** * Parses a tensor written in a tensor literal form, * https://docs.vespa.ai/en/reference/tensor.html#tensor-literal-form */ Tensor tensorValue(TensorType type) : { Tensor.Builder builder = Tensor.Builder.of(type); Number doubleValue = null; } { ( mappedTensorValue(builder) | indexedTensorValues(builder) | doubleValue = number() ) { if (doubleValue != null) { if (type.rank() > 0) throw new IllegalArgumentException("A tensor of type " + type + " cannot be a number"); builder.cell(doubleValue.doubleValue()); } return builder.build(); } } /** A mapped or mixed tensor value. */ void mappedTensorValue(Tensor.Builder builder) : {} { "{" ( mappedTensorBlock(builder) )* ( ()* mappedTensorBlock(builder) )* "}" } void mappedTensorBlock(Tensor.Builder builder) : { TensorAddress mappedAddress; } { mappedAddress = tensorAddress(builder.type()) ()* ( mappedTensorCellValue(mappedAddress, builder) | indexedTensorBlockValues(mappedAddress, builder) ) } void indexedTensorBlockValues(TensorAddress mappedAddress, Tensor.Builder builder) : { List values = new ArrayList(); } { arrayTensorValues(values) { MixedTensor.BoundBuilder boundBuilder = (MixedTensor.BoundBuilder)builder; double[] arrayValues = new double[values.size()]; for (int i = 0; i < values.size(); i++ ) { arrayValues[i] = values.get(i); } boundBuilder.block(mappedAddress, arrayValues); } } void indexedTensorValues(Tensor.Builder builder) : { List values = new ArrayList(); } { arrayTensorValues(values) { IndexedTensor.BoundBuilder boundBuilder = (IndexedTensor.BoundBuilder)builder; double[] arrayValues = new double[values.size()]; for (int i = 0; i < values.size(); i++ ) { arrayValues[i] = values.get(i); } boundBuilder.fill(arrayValues); } } /** Tensor array values. Using sub-bracketing for multiple dimensions is optional and therefore ignored here. */ void arrayTensorValues(List values) : {} { "[" ( ( indexedTensorValue(values) | arrayTensorValues(values)) )* ( ()* ( indexedTensorValue(values) | arrayTensorValues(values)) )* "]" } void indexedTensorValue(List values) : { Number value; } { value = number() { values.add(value.doubleValue()); } } void mappedTensorCellValue(TensorAddress address, Tensor.Builder builder) : { double value; } { value = tensorCellValue() { builder.cell(address, value); } } TensorAddress tensorAddress(TensorType type) : { TensorAddress.Builder builder = new TensorAddress.PartialBuilder(type); String label; } { ( label = tensorAddressLabel() { builder.add(label); } | ( "{" ( tensorAddressElement(builder) )* ( tensorAddressElement(builder) )* "}" ) ) { return builder.build(); } } void tensorAddressElement(TensorAddress.Builder builder) : { String dimension; String label; } { dimension = identifier() ()* label = tensorAddressLabel() { builder.add(dimension, label); } } String tensorAddressLabel() : { String label; } { ( label = identifier() | label = quotedString() ) { return label; } } double tensorCellValue() : { Number value; } { value = number() { return value.doubleValue(); } } /** Undocumented syntax for supplying a tensor constant value by a string prefixed by "value" */ String tensorValuePrefixedByValue() : { String tensor; } { ( { tensor = token.image.substring(token.image.indexOf(":") + 1); } | { tensor = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")); } ) { return tensor; } } TensorType tensorTypeWithPrefix(String errorMessage) : { TensorType type; } { type = tensorType(errorMessage) { return type; } } TensorType tensorType(String errorMessage) : { String tensorTypeString; } { { tensorTypeString = token.image; } { TensorType tensorType; try { tensorType = TensorType.fromSpec(tensorTypeString); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(errorMessage + ": Illegal tensor type spec: " + e.getMessage()); } return tensorType; } } void importField(ParsedSchema schema) : { String fieldRefSpec; String aliasFieldName; } { fieldRefSpec = identifier() aliasFieldName = identifier() lbrace() { long nDots = Utils.count(fieldRefSpec, '.'); if (nDots != 1) { throw new IllegalArgumentException("Illegal field reference spec '" + fieldRefSpec + "': Does not include a single '.'"); } int indexOfDot = fieldRefSpec.indexOf('.'); String documentReferenceFieldName = fieldRefSpec.substring(0, indexOfDot); String foreignFieldName = fieldRefSpec.substring(indexOfDot + 1); schema.addImportedField(aliasFieldName, documentReferenceFieldName, foreignFieldName); } } /** * This rule consumes an expression token and returns its image. * * @return The consumed token image. */ String expression() : { String exp; } { ( { exp = token.image.substring(token.image.indexOf(":") + 1); } | { exp = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")); } ) { return exp.trim(); } } String identifierWithDash() : { String identifier; } { ( identifier = identifier() { return identifier; } ) | ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ) { return token.image; } } /** * Consumes an identifier. This must be kept in sync with all word tokens that should be parseable as * identifiers. * * @return the identifier string */ String identifier() : { } { ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ) { return token.image; } } /** * Consumes a string token and returns the token image. * * @return The consumed token image. */ String string() : { } { { return token.image; } } /** * Consumes a quoted string token and returns the token image minus the quotes. This does not perform * unescaping of the content, it simply removes the first and last character of the image. However, the token itself can * contain anything but a double quote. * * @return the unquoted token image */ String quotedString() : { } { ( | ) { return token.image.substring(1, token.image.length() - 1); } } /** A boolean value. */ Boolean bool() : { } { ( ( | ) { return true; } | ( | ) { return false; } ) } /** Consumes an integer token and returns its numeric value. */ int integer() : { } { { return Integer.parseInt(token.image); } } /** Consumes a long or integer token and returns its numeric value. */ long longValue() : { } { ( { return Long.parseLong(token.image); } | { return Long.parseLong(token.image.substring(0, token.image.length()-1)); } ) } /** Consumes a floating-point token and returns its numeric value. */ double floatValue() : { } { { return Double.valueOf(token.image); } } Number number() : { Number num; } { ( num = floatValue() | num = longValue() ) { return num; } } /** Consumes an opening brace with leading and trailing newline tokens. */ void lbrace() : { } { ()* ()* }