// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. // -------------------------------------------------------------------------------- // // JavaCC options. When this file is changed, run "mvn generate-sources" to rebuild // the parser classes. // // -------------------------------------------------------------------------------- options { UNICODE_INPUT = true; CACHE_TOKENS = false; STATIC = false; DEBUG_PARSER = false; ERROR_REPORTING = true; FORCE_LA_CHECK = true; USER_CHAR_STREAM = true; } // -------------------------------------------------------------------------------- // // Parser body. // // -------------------------------------------------------------------------------- PARSER_BEGIN(SDParser) package com.yahoo.searchdefinition.parser; import com.yahoo.document.*; import com.yahoo.documentmodel.*; import com.yahoo.compress.Compressor; import com.yahoo.compress.CompressionType; import com.yahoo.searchdefinition.document.*; import com.yahoo.searchdefinition.document.annotation.SDAnnotationType; import com.yahoo.searchdefinition.document.annotation.TemporaryAnnotationReferenceDataType; import com.yahoo.searchdefinition.RankingConstant; import com.yahoo.searchdefinition.OnnxModel; import com.yahoo.searchdefinition.Index; import com.yahoo.searchdefinition.RankProfile; import com.yahoo.searchdefinition.DocumentsOnlyRankProfile; import com.yahoo.searchdefinition.DefaultRankProfile; import com.yahoo.searchdefinition.RankProfileRegistry; import com.yahoo.searchdefinition.RankProfile.MatchPhaseSettings; import com.yahoo.searchdefinition.RankProfile.DiversitySettings; import com.yahoo.searchdefinition.Search; import com.yahoo.searchdefinition.DocumentOnlySearch; import com.yahoo.searchdefinition.UnrankedRankProfile; import com.yahoo.searchdefinition.fieldoperation.*; import com.yahoo.searchlib.rankingexpression.FeatureList; import com.yahoo.searchlib.rankingexpression.evaluation.Value; import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; import com.yahoo.tensor.Tensor; import com.yahoo.tensor.TensorType; import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.documentmodel.SummaryField; import com.yahoo.vespa.documentmodel.SummaryTransform; import com.yahoo.config.model.test.MockApplicationPackage; import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.application.api.FileRegistry; import com.yahoo.config.model.api.ModelContext; import com.yahoo.language.Linguistics; import com.yahoo.language.process.Embedder; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.search.query.ranking.Diversity; import java.util.Map; import java.util.List; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.logging.Level; /** * A search definition parser * * @author bratseth */ @SuppressWarnings("deprecation") public class SDParser { private DocumentTypeManager docMan = null; private ApplicationPackage app; private FileRegistry fileRegistry; private DeployLogger deployLogger; private ModelContext.Properties properties; private RankProfileRegistry rankProfileRegistry; private boolean documentsOnly; /** * Creates a parser * * @param documentsOnly true to only parse the document aspect of a search definition (e.g skip rank profiles) */ public SDParser(SimpleCharStream stream, FileRegistry fileRegistry, DeployLogger deployLogger, ModelContext.Properties properties, ApplicationPackage applicationPackage, RankProfileRegistry rankProfileRegistry, boolean documentsOnly) { this(stream); this.fileRegistry = fileRegistry; this.deployLogger = deployLogger; this.properties = properties; this.app = applicationPackage; this.rankProfileRegistry = rankProfileRegistry; this.documentsOnly = documentsOnly; } /** * Consumes an indexing language script which will use the simple linguistics implementation * for testing, by taking input from the current input stream. * * @param multiline Whether or not to allow multi-line expressions. */ @SuppressWarnings("deprecation") private IndexingOperation newIndexingOperation(boolean multiline) throws ParseException { return newIndexingOperation(multiline, new SimpleLinguistics(), Embedder.throwsOnUse); } /** * Consumes an indexing language script from the current input stream. * * @param multiline Whether or not to allow multi-line expressions. * @param linguistics What to use for tokenizing. */ private IndexingOperation newIndexingOperation(boolean multiline, Linguistics linguistics, Embedder embedder) throws ParseException { SimpleCharStream input = (SimpleCharStream)token_source.input_stream; if (token.next != null) { input.backup(token.next.image.length()); } try { return IndexingOperation.fromStream(input, multiline, linguistics, embedder); } finally { token.next = null; jj_ntk = -1; } } /** * Parses the given token image as a ranking expression feature list. * * @param image The token image to parse. * @return The consumed feature list. * @throws ParseException Thrown if the image could not be parsed. */ private FeatureList getFeatureList(String image) throws ParseException { try { return new FeatureList(image); } catch (com.yahoo.searchlib.rankingexpression.parser.ParseException e) { throw (ParseException) new ParseException("Could not parse feature list '" + image + "' at line " + token_source.input_stream.getBeginLine() + ", column " + token_source.input_stream.getBeginColumn() + ".").initCause(e); } } /** * Sets the compression threshold in each item in the compression config array. * * @param cfg The array of configs to modify. * @param val The compression threshold to set. */ private void setCompressionThreshold(CompressionConfig cfg, int val) { cfg.threshold = val; } /** * Sets the compression level in each item in the compression config array. * * @param cfg The array of configs to modify. * @param val The compression level to set. */ private void setCompressionLevel(CompressionConfig cfg, int val) { cfg.compressionLevel = val; } } PARSER_END(SDParser) // -------------------------------------------------------------------------------- // // Token declarations. // // -------------------------------------------------------------------------------- // Declare white space characters. These do not include newline because it has // special meaning in several of the production rules. SKIP : { " " | "\t" | "\r" | "\f" } // Declare all tokens to be recognized. When a word token is added it MUST be // added to the identifier() production rule. TOKEN : { < NL: "\n" > | < ANNOTATION: "annotation" > | < ANNOTATIONREFERENCE: "annotationreference" > | < SCHEMA: "schema" > | < SEARCH: "search" > | < DIVERSITY: "diversity" > | < MIN_GROUPS: "min-groups" > | < CUTOFF_FACTOR: "cutoff-factor" > | < CUTOFF_STRATEGY: "cutoff-strategy" > | < LOOSE: "loose" > | < STRICT: "strict" > | < DOCUMENT: "document" > | < EXECUTE: "execute" > | < OPERATION: "operation" > | < ON_MATCH: "on-match" > | < ON_RERANK: "on-rerank" > | < ON_SUMMARY: "on-summary" > | < STRUCT: "struct" > | < INHERITS: "inherits" > | < FIELD: "field" > | < FIELDS: "fields" > | < FIELDSET: "fieldset" > | < STRUCTFIELD: "struct-field" > | < IMPORT: "import" > | < AS: "as" > | < INDEXING: "indexing" > | < SUMMARYTO: "summary-to" > | < DOCUMENTSUMMARY: "document-summary" > | < RANKTYPE: "rank-type" > | < WEIGHT: "weight" > | < TYPE: "type" > | < INDEX: "index" > | < MTOKEN: "token" > | < TEXT: "text" > | < WORD: "word" > | < GRAM: "gram" > | < GRAMSIZE: "gram-size" > | < MAXLENGTH: "max-length" > | < PREFIX: "prefix" > | < SUBSTRING: "substring" > | < SUFFIX: "suffix" > | < CONSTANT: "constant"> | < ONNXMODEL: "onnx-model"> | < MODEL: "model" > | < RANKPROFILE: "rank-profile" > | < RANKDEGRADATIONFREQ: "rank-degradation-frequency" > | < RANKDEGRADATION: "rank-degradation" > | < RAW_AS_BASE64_IN_SUMMARY: "raw-as-base64-in-summary" > | < RPBINSIZE: "doc-frequency" > | < RPBINLOW: "min-fullrank-docs"> | < RPPOSBINSIZE: "occurrences-per-doc" > | < SUMMARY: "summary" > | < FULL: "full" > | < STATIC: "static" > | < DYNAMIC: "dynamic" > | < MATCHEDELEMENTSONLY: "matched-elements-only" > | < SSCONTEXTUAL: "contextual" > | < SSOVERRIDE: "override" > | < SSTITLE: "title" > | < SSURL: "url" > | < PROPERTIES: "properties" > | < ATTRIBUTE: "attribute" > | < SORTING: "sorting" > | < DICTIONARY: "dictionary" > | < ASCENDING: "ascending" > | < DESCENDING: "descending" > | < UCA: "uca" > | < RAW: "raw" > | < LOWERCASE: "lowercase" > | < FUNCTION: "function" > | < LOCALE: "locale" > | < STRENGTH: "strength" > | < PRIMARY: "primary" > | < SECONDARY: "secondary" > | < TERTIARY: "tertiary" > | < QUATERNARY: "quaternary" > | < IDENTICAL: "identical" > | < STEMMING: "stemming" > | < NORMALIZING: "normalizing" > | < HASH: "hash" > | < BTREE: "btree" > | < CASED: "cased" > | < UNCASED: "uncased" > | < BOLDING: "bolding" > | < BODY: "body" > | < HEADER: "header" > | < NONE: "none" > | < ON: "on" > | < OFF: "off" > | < TRUE: "true" > | < FALSE: "false" > | < SYMMETRIC: "symmetric" > | < QUERYCOMMAND: "query-command" > | < ALIAS: "alias" > | < MATCH: "match" > | < RANK: "rank" > | < LITERAL: "literal" > | < EXACT: "exact" > | < FILTER: "filter" > | < NORMAL: "normal" > | < EXACTTERMINATOR: "exact-terminator" > | < INDEXINGREWRITE: "indexing-rewrite" > | < IGNOREDEFAULTRANKFEATURES: "ignore-default-rank-features" > | < ID: "id" > | < SOURCE: "source" > | < TO: "to" > | < DIRECT: "direct" > | < FROMDISK: "from-disk" > | < OMITSUMMARYFEATURES: "omit-summary-features" > | < ALWAYS: "always" > | < ONDEMAND: "on-demand" > | < NEVER: "never" > | < ENABLEBITVECTORS: "enable-bit-vectors" > | < ENABLEONLYBITVECTOR: "enable-only-bit-vector" > | < FASTACCESS: "fast-access" > | < MUTABLE: "mutable" > | < PAGED: "paged" > | < FASTSEARCH: "fast-search" > | < HUGE: "huge" > | < TENSOR_TYPE: "tensor" ("<" (~["<",">"])+ ">")? "(" (~["(",")"])+ ")" > | < TENSOR_VALUE_SL: "value" (" ")* ":" (" ")* ("{") ("\n")? > | < TENSOR_VALUE_ML: "value" ()? "{" (["\n"," "])* ("{") (["\n"," "])* "}" ("\n")? > | < COMPRESSION: "compression" > | < COMPRESSIONLEVEL: "level" > | < COMPRESSIONTHRESHOLD: "threshold" > | < LZ4: "lz4" > | < USEDOCUMENT: "use-document" > | < LBRACE: "{" > | < RBRACE: "}" > | < COLON: ":" > | < DOT: "." > | < COMMA: "," > | < ARRAY: "array" > | < WEIGHTEDSET: "weightedset" > | < MAP: "map" > | < REFERENCE: "reference" > | < QUESTIONMARK: "?" > | < CREATEIFNONEXISTENT: "create-if-nonexistent" > | < REMOVEIFZERO: "remove-if-zero" > | < MATCHPHASE: "match-phase" > | < EVALUATION_POINT: "evaluation-point" > | < PRE_POST_FILTER_TIPPING_POINT: "pre-post-filter-tipping-point" > | < ORDER: "order" > | < MAXFILTERCOVERAGE: "max-filter-coverage" > | < MAXHITS: "max-hits" > | < FIRSTPHASE: "first-phase" > | < SECONDPHASE: "second-phase" > | < MACRO: "macro" > | < INLINE: "inline" > | < ARITY: "arity" > | < LOWERBOUND: "lower-bound" > | < UPPERBOUND: "upper-bound" > | < DENSEPOSTINGLISTTHRESHOLD: "dense-posting-list-threshold" > | < ENABLE_BM25: "enable-bm25" > | < HNSW: "hnsw" > | < MAXLINKSPERNODE: "max-links-per-node" > | < DISTANCEMETRIC: "distance-metric" > | < NEIGHBORSTOEXPLOREATINSERT: "neighbors-to-explore-at-insert" > | < MULTITHREADEDINDEXING: "multi-threaded-indexing" > | < SUMMARYFEATURES_SL: "summary-features" (" ")* ":" (~["}","\n"])* ("\n")? > | < SUMMARYFEATURES_ML: "summary-features" ()? "{" (~["}"])* "}" > | < SUMMARYFEATURES_ML_INHERITS: "summary-features inherits " () ()? "{" (~["}"])* "}" > | < RANKFEATURES_SL: "rank-features" (" ")* ":" (~["}","\n"])* ("\n")? > | < RANKFEATURES_ML: "rank-features" ()? "{" (~["}"])* "}" > | < EXPRESSION_SL: "expression" (" ")* ":" (("{")|)* ("\n")? > | < EXPRESSION_ML: "expression" ()? "{" (("{")|)* "}" > | < #BRACE_SL_LEVEL_1: (("{")|)* "}" > | < #BRACE_SL_LEVEL_2: (("{")|)* "}" > | < #BRACE_SL_LEVEL_3: "}" > | < #BRACE_SL_CONTENT: (~["{","}","\n"])* > | < #BRACE_ML_LEVEL_1: (("{")|)* "}" > | < #BRACE_ML_LEVEL_2: (("{")|)* "}" > | < #BRACE_ML_LEVEL_3: "}" > | < #BRACE_ML_CONTENT: (~["{","}"])* > | < #SEARCHLIB_SKIP: ([" ","\f","\n","\r","\t"])+ > | < RANKPROPERTIES: "rank-properties" > | < RERANKCOUNT: "rerank-count" > | < NUMTHREADSPERSEARCH: "num-threads-per-search" > | < MINHITSPERTHREAD: "min-hits-per-thread" > | < NUMSEARCHPARTITIONS: "num-search-partitions" > | < TERMWISELIMIT: "termwise-limit" > | < KEEPRANKCOUNT: "keep-rank-count" > | < RANKSCOREDROPLIMIT: "rank-score-drop-limit" > | < CONSTANTS: "constants" > | < FILE: "file" > | < URI: "uri" > | < IDENTIFIER: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_"])* > | < IDENTIFIER_WITH_DASH: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_","-"])* > | < QUOTEDSTRING: "\"" ( ~["\""] )* "\"" > | < CONTEXT: ["a"-"z","A"-"Z"] (["a"-"z", "A"-"Z", "0"-"9"])* > | < DOUBLE: ("-")? (["0"-"9"])+ "." (["0"-"9"])+ > | < INTEGER: ("-")? (["0"-"9"])+ > | < LONG: ("-")? (["0"-"9"])+"L" > | < STRING: (["a"-"z","A"-"Z","_","0"-"9","."])+ > | < FILE_PATH: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_","-", "/", "."])+ > | < HTTP: ["h","H"] ["t","T"] ["t","T"] ["p","P"] (["s","S"])? > | < URI_PATH: ("//")? (["a"-"z","A"-"Z","0"-"9","_","-", "/", ".",":"])+ > | < LESSTHAN: "<" > | < GREATERTHAN: ">" > | < VARIABLE: "$" > | < ONNX_INPUT_SL: "input" (" ")* (|) (" ")* ":" (" ")* (~["\n"])* ("\n")? > | < ONNX_OUTPUT_SL: "output" (" ")* (|) (" ")* ":" (" ")* (~["\n"])* ("\n")? > } // Declare a special skip token for comments. SPECIAL_TOKEN : { } // -------------------------------------------------------------------------------- // // Production rules. // // -------------------------------------------------------------------------------- /** * The rule consumes any search definition and returns the corresponding object. This is the only production that should * ever consume leading newlines. * * @param dir The directory containing the file being parsed. * @return The search definition object. */ Search search(DocumentTypeManager docMan, String dir) : { this.docMan = docMan; Search search; } { ()* (search = rootSchema(dir) | search = rootDocument(dir)) { return search; } } /** * This rule consumes a proper schema block. This and rootDocument() are the only rules that should ever consume * trailing newline tokens. * * @param dir the directory containing the file being parsed. * @return the schema definition object. */ Search rootSchema(String dir) : { String name; Search search; } { ( ( | ) name = identifier() { search = new Search(name, app, fileRegistry,deployLogger, properties); rankProfileRegistry.add(new DefaultRankProfile(search, rankProfileRegistry, search.rankingConstants())); rankProfileRegistry.add(new UnrankedRankProfile(search, rankProfileRegistry, search.rankingConstants()));} lbrace() (rootSchemaItem(search) ()*)* ()* ) { return search; } } /** * Consumes an element of a schema block. This and rootSearch() are the only rules that should ever consume * trailing newline tokens. * * @param search The search object to modify. * @return Null. */ Object rootSchemaItem(Search search) : { } { ( document(search) | rawAsBase64(search) | documentSummary(search) | field(null, search) | index(search, null) | rankingConstant(search) | rankProfile(search) | searchStemming(search) | useDocument(search) | structOutside(search) | annotationOutside(search) | fieldSet(search) | importField(search) | onnxModel(search) ) { return null; } } /** * Consumes a schema definition that contains only documents to be used for inheritance, etc. * * @param dir the directory containing the file being parsed. * @return the schema definition object. */ Search rootDocument(String dir) : { Search search = new DocumentOnlySearch(app, fileRegistry, deployLogger, properties); } { ( (rootDocumentItem(search) ()*)* ) { return search; } } /** * Consumes a single item from within a root document node. * * @param search The search object to modify. * @return Null. */ Object rootDocumentItem(Search search) : { } { ( namedDocument(search) ) { return null; } } /** * Consumes a use-document statement. This currently does nothing. * * @param search the search object to modify. */ void useDocument(Search search) : { } { identifier() } /** * Consumes a document element. The name defaults to the search's name, but may be set. * * @param search the search object to add content to. */ void document(Search search) : { String name=search.getName(); SDDocumentType document; } { ( (name = identifier())? ()* { document = new SDDocumentType(name, search); } [ inheritsDocument(document) ()* ] ()* (documentBody(document, search) ()*)* ) { search.addDocument(document); } } /** * Consumes a document element, explicitly named * * @param search the search object to add content to. */ void namedDocument(Search search) : { String name; SDDocumentType document; } { ( name = identifier() ()* { document = new SDDocumentType(name, search); } [ inheritsDocument(document) ()* ] ()* (documentBody(document, search) ()*)* ) { search.addDocument(document); } } /** * Consumes a document body block * * @param document The document type to modify. * @param search The search object to add content to. * @return Null. */ Object documentBody(SDDocumentType document, Search search) : { } { ( annotation(search, document) | compression(document, null) | headercfg(document) | bodycfg(document) | structInside(document, search) | field(document, search) ) { return null; } } void rawAsBase64(Search search) : {} { { search.enableRawAsBase64(); } } /** * Consumes a document head block. * * @param document The document type to modify. */ void headercfg(SDDocumentType document) : { } {
lbrace() [compression(document, "header") ()*] } /** * Consumes a document body block. * * @param document The document type to modify. */ void bodycfg(SDDocumentType document) : { } { lbrace() [compression(document, "body") ()*] } /** * Consumes a compression block. This can be set in both document header and -body block. * * @param document The document type to modify. * @param name The name of the document block to modify. */ void compression(SDDocumentType document, String name) : { deployLogger.logApplicationPackage(Level.WARNING, "'compression' for a document is deprecated and ignored"); CompressionConfig cfg = new CompressionConfig(CompressionType.LZ4); } { lbrace() (cfg = compressionItem(cfg) ()*)* { if (name == null || name.equals("header")) { document.getDocumentType().contentStruct().setCompressionConfig(cfg); } } } /** * Consumes the body of a compression block. * * @param cfg The compression config to modify. */ CompressionConfig compressionItem(CompressionConfig cfg) : { int val = -1; } { ( ( { cfg = new CompressionConfig(CompressionType.LZ4, cfg.compressionLevel, cfg.threshold); } ) | ( val = integer()) { setCompressionThreshold(cfg, val); } | ( val = integer()) { setCompressionLevel(cfg, val); } ) { return cfg; } } /** * Consumes a document inheritance statement. * * @param document The document type to modify. */ void inheritsDocument(SDDocumentType document) : { String name; } { name = identifier() { document.inherit(new DataTypeName(name)); } ( name = identifier() { document.inherit(new DataTypeName(name)); } )* } /** * Consumes a field block from within a document element. * * @param document The document type to modify. * @param search The search object to add content to. */ void field(SDDocumentType document, Search search) : { String name; SDField field; DataType type; } { name = identifier() type = dataType() { if (name != null && com.yahoo.searchdefinition.Search.isReservedName(name.toLowerCase())) { throw new IllegalArgumentException("Reserved name '" + name + "' can not be used as a field name."); } field = new TemporarySDField(name, type, document); } lbrace() (fieldBody(field, search, document) ()*)* { if (document != null) { document.addField(field); } else { search.addExtraField(field); } } } void fieldSet(Search search) : { String setName; String field; String queryCommand; List queryCommands = new ArrayList(); FieldOperationContainer matchSetting; List matchSettings = new ArrayList(); } {
setName = identifier() lbrace() (( ( field = identifier() { search.fieldSets().addUserFieldSetItem(setName, field); } ( field = identifier() { search.fieldSets().addUserFieldSetItem(setName, field); } )* ) | ( (queryCommand = identifierWithDash() | queryCommand = quotedString())) { queryCommands.add(queryCommand); } | ( matchSetting = match(new SDField(setName, DataType.STRING)) ) { matchSettings.add(matchSetting); } )()*)+ { // Apply settings after parsing since all user field items must be set first for (Object command : queryCommands) search.fieldSets().userFieldSets().get(setName).queryCommands().add((String)command); for (Object setting : matchSettings) { ((SDField)setting).applyOperations(); search.fieldSets().userFieldSets().get(setName).setMatching(((SDField)setting).getMatching()); } } } /** * This rule consumes a annotation block from within either a document element or a search element. * @param search the search object to add content to. */ void annotationOutside(Search search) : { String name; SDAnnotationType type; } { name = identifier() { type = new SDAnnotationType(name.trim()); } [ inheritsAnnotation(type) ()* ] lbrace() (type = annotationBody(search, type)) { if (search.getDocument()==null) throw new IllegalArgumentException("Can't add annotation '"+name+"' to a document type, define a document type first or declare the annotation inside of one."); search.addAnnotation(type); } } /** * This rule consumes a annotation block from within either a document element. * * @param document The document object to add content to. */ void annotation(Search search, SDDocumentType document) : { String name; SDAnnotationType type; } { name = identifier() { type = new SDAnnotationType(name.trim()); } [ inheritsAnnotation(type) ()* ] lbrace() (type = annotationBody(search, type)) { document.addAnnotation(type); } } /** * This rule consumes a single element of an annotation body block. * * @param search The search object to add content to. * @param type The type being built. * @return a modified or new AnnotationType instance */ SDAnnotationType annotationBody(Search search, SDAnnotationType type) : { SDDocumentType struct = new SDDocumentType("annotation." + type.getName(), search); } { (structFieldDefinition(struct) ()*)* { if (struct.getFieldCount() > 0) { // Must account for the temporary TemporarySDField. type = new SDAnnotationType(type.getName(), struct, type.getInherits()); struct.setStruct(null); } return type; } } void inheritsAnnotation(SDAnnotationType annotation) : { String name; } { name = identifier() { annotation.inherit(name); } } /** * This rule consumes a struct block from within a document element. * * @param search The search object to add content to. */ void structInside(SDDocumentType document, Search search) : { SDDocumentType struct; } { ( struct = structDefinition(search, document) ) { document.addType(struct); } } /** * This rule consumes a struct block from within a document element. * * @param search The search object to add content to. */ void structOutside(Search search) : { SDDocumentType struct; } { ( struct = structDefinition(search, search.getDocument()) ) { search.addType(struct); } } /** * This rule consumes a struct block from within a document element. * * @param search The search object to add content to. */ SDDocumentType structDefinition(Search search, SDDocumentType repo) : { String name; SDDocumentType struct; } { name = identifier() { struct = new SDDocumentType(name, search); } lbrace() (structFieldDefinition(struct) ()*)* { try { docMan.getDataType(name); throw new ParseException("Reserved name '" + name + "' can not be used to declare a struct."); } catch (IllegalArgumentException e) { // empty } if (repo==null) throw new IllegalArgumentException("Can't add struct '"+name+"' to a document type, define a document type first or declare the struct inside of one."); SDDocumentType sdtype = repo.getOwnedType(struct.getDocumentName()); DataType stype = sdtype != null ? sdtype.getStruct() : TemporaryStructuredDataType.create(struct.getName()); struct.setStruct(stype); return struct; } } /** * This rule consumes a data type block from within a field element. * * @return The consumed data type. */ DataType dataType() : { String typeName = null; boolean isArrayOldStyle = false; DataType mapType = null; DataType arrayType = null; DataType wsetType = null; TensorType tensorType; TemporaryStructuredDataType referenceType; } { ( LOOKAHEAD( ) ( arrayType = dataType() { return DataType.getArray(arrayType); } ) | LOOKAHEAD( ) ( wsetType = dataType() { return DataType.getWeightedSet(wsetType); } ) | LOOKAHEAD( ) ( mapType = mapDataType() { return mapType; } ) | LOOKAHEAD( ) ( mapType = annotationRefDataType() { return mapType; } ) | LOOKAHEAD() ( tensorType = tensorType("Field type") { return DataType.getTensor(tensorType); } ) | LOOKAHEAD() ( referenceType = referenceType() { return ReferenceDataType.createWithInferredId(referenceType); } ) | ( typeName = identifier() ["[]" { isArrayOldStyle = true; }] ) ) { DataType type = VespaDocumentType.INSTANCE.getDataType(typeName); if (type == null) { // we are basically creating TemporaryStructDataType instances for ANYTHING here!! // we must do this and clean them up later. type = TemporaryStructuredDataType.create(typeName); } if (isArrayOldStyle) { deployLogger.logApplicationPackage(Level.WARNING, "Data type syntax '" + typeName + "[]' is deprecated, use 'array<" + typeName + ">' instead."); type = DataType.getArray(type); } if ("tag".equalsIgnoreCase(typeName) && type instanceof WeightedSetDataType) ((WeightedSetDataType)type).setTag(true); return type; } } TemporaryStructuredDataType referenceType() : { String documentName; } { ( documentName = identifier() ) { return TemporaryStructuredDataType.create(documentName); } } DataType annotationRefDataType() : { DataType dataType; String targetName; } { ( targetName = identifier() ) { return new TemporaryAnnotationReferenceDataType(targetName); } } DataType mapDataType() : { DataType keyType; DataType valType; } { ( keyType = dataType() valType = dataType() ) { return DataType.getMap(keyType, valType); } } /* Note: not currently used, remove when decided that map type will not support polymorphism */ DataType wildCardType() : { } { () { return DataType.NONE; } } /** * This rule consumes a field block of a struct body. * * @param struct The struct to modify. */ void structFieldDefinition(SDDocumentType struct) : { String name; SDField field; DataType type; } { name = identifier() type = dataType() { if (name != null && com.yahoo.searchdefinition.Search.isReservedName(name.toLowerCase())) { throw new IllegalArgumentException("Reserved name '" + name + "' can not be used as a field name."); } field = new TemporarySDField(name, type, struct); struct.addField(field); } lbrace() (id(field,struct) ()*)? (match(field) ()*)* { } } /** * This rule consumes a struct subfield from a document field body. This is not to be confused with a document * struct's fields, but rather this is a subfield of a document field of type struct. * * @param field The field to modify. * @param search The search object to add content to. * @param document The document type to modify. */ void structField(FieldOperationContainer field, Search search,SDDocumentType document) : { String name; SDField structField; } { name = identifier() { if (name != null && com.yahoo.searchdefinition.Search.isReservedName(name.toLowerCase())) { throw new IllegalArgumentException("Reserved name '" + name + "' can not be used as a field name."); } FieldOperationContainer structFieldOp = new StructFieldOperation(name); field.addOperation((StructFieldOperation) structFieldOp); } lbrace() (structFieldBody(structFieldOp, search, document) ()*)* } /** * This rule consumes a single element of a field body block. * * @param field The field being built. * @param search The search object to add content to. * @param document The owning document, or null if this is a search field. * @return Null. */ String fieldBody(SDField field, Search search, SDDocumentType document) : { } { ( alias(field) | attribute(field) | body(field) | bolding(field) | dictionary(field) | fieldStemming(field) | header(field) | id(field, document) | summaryInField(field) | index(search, field) | indexing(field) | indexingRewrite(field) | match(field) | normalizing(field) | queryCommand(field) | rank(field) | rankType(field) | sorting(field, field.getName()) | structField(field, search, document) | summaryTo(field) | weight(field) | weightedset(field) ) { return null; } } /** * This rule consumes a single element of a struct subfield body block. * Only elements that are supported in streaming search and indexed search (with complex attributes) are allowed. * * @param field The field being built. * @param search The search object to add content to. * @param document The owning document, or null if this is a search field. * @return Null. */ String structFieldBody(FieldOperationContainer field, Search search, SDDocumentType document) : { } { ( summaryInField(field) | indexing(field) | attribute(field) | match(field) | queryCommand(field) | structField(field, search, document) | summaryTo(field) ) { return null; } } /** * This rule consumes an indexing block of a field element. * * @param field The field to modify. * @return Null. */ Object indexing(FieldOperationContainer field) : { } { ( ( ( indexingOperation(field, false)) | indexingOperation(field, true) ) ) { return null; } } /** * This rule consumes an IL script block. This is expected to consume trailing newlines. * * @param field The field to modify. */ void indexingOperation(FieldOperationContainer field, boolean multiLine) : { } { { field.addOperation(newIndexingOperation(multiLine)); } } /** * This rule consumes a summary-to statement of a field element. * * @param field The field to modify. */ void summaryTo(FieldOperationContainer field) : { SummaryToOperation op = new SummaryToOperation(); String destination; String name = field.getName(); } { [name = identifier()] destination = identifier() { op.setName(name); op.addDestination(destination); } ( destination = identifier() {op.addDestination(destination); } )* { field.addOperation(op); } } /** * This rule consumes a weight statement of a field element. * * @param field The field to modify. */ void weight(FieldOperationContainer field) : { int num; } { num = integer() { WeightOperation op = new WeightOperation(); op.setWeight(num); field.addOperation(op); } } /** * This rule consumes a weighted set statement of a field element. * * @param field The field to modify. * @return Null. */ Object weightedset(FieldOperationContainer field) : { WeightedSetOperation op = new WeightedSetOperation(); } { ( ( weightedsetBody(op)) | (lbrace() (weightedsetBody(op) ()*)* ) ) { field.addOperation(op); return null; } } /** * This rule consumes one body item of a weighted set block. * * @param field The field to modify. * @return Null. */ Object weightedsetBody(WeightedSetOperation field) : { } { ( { field.setCreateIfNonExistent(true); } | { field.setRemoveIfZero(true); } ) { return null; } } /** * This rule consumes a rank-type statement of a field element. * * @param field The field to modify. */ void rankType(FieldOperationContainer field) : { String typeName; String indexName = null; } { [indexName = identifier()] typeName = identifier() { RankTypeOperation op = new RankTypeOperation(); op.setType(RankType.fromString(typeName)); op.setIndexName(indexName); field.addOperation(op); } } /** * This rule consumes an attribute statement of a field element. * * @param field The field to modify. * @return Null. */ Object attribute(FieldOperationContainer field) : { String name = field.getName(); } { [name = identifier()] { AttributeOperation op = new AttributeOperation(name); } ( ( attributeSetting(field, op, name)) | (lbrace() (attributeSetting(field, op, name) ()*)* ) ) { field.addOperation(op); return null; } } Object sorting(FieldOperationContainer field, String name) : { SortingOperation op = new SortingOperation(name); } { ( ( sortingSetting(op, name)) | (lbrace() (sortingSetting(op, name) ()*)* ) ) { field.addOperation(op); return null; } } Object sortingSetting(SortingOperation sorting, String attributeName) : { String locale; } { ( { sorting.setAscending(); } | { sorting.setDescending(); } | ( { sorting.setFunction(Sorting.Function.UCA); } | { sorting.setFunction(Sorting.Function.RAW); } | { sorting.setFunction(Sorting.Function.LOWERCASE); } ) | ( { sorting.setStrength(Sorting.Strength.PRIMARY); } | { sorting.setStrength(Sorting.Strength.SECONDARY); } | { sorting.setStrength(Sorting.Strength.TERTIARY); } | { sorting.setStrength(Sorting.Strength.QUATERNARY); } | { sorting.setStrength(Sorting.Strength.IDENTICAL); } ) | locale = identifierWithDash() { sorting.setLocale(locale); } ) { return null; } } /** * This rule consumes a single attribute setting statement of an attribute element. * * @param field The field to modify. * @param attributeName The name of the attribute to change. * @return Null. */ Object attributeSetting(FieldOperationContainer field, AttributeOperation attribute, String attributeName) : { String str; } { ( { attribute.setHuge(true); } | { attribute.setFastSearch(true); } | { attribute.setFastAccess(true); } | { attribute.setMutable(true); } | { attribute.setPaged(true); } | { attribute.setEnableBitVectors(true); } | { attribute.setEnableOnlyBitVector(true); } | sorting(field, attributeName) | { String alias; String aliasedName=attributeName; } [aliasedName = identifier()] alias = identifierWithDash() { attribute.setDoAlias(true); attribute.setAlias(alias); attribute.setAliasedName(aliasedName); } | attributeTensorType(attribute) | str = identifierWithDash() { attribute.setDistanceMetric(str); } ) { return null; } } /** * This rule consumes a tensor type statement for an attribute element. * * @param attribute The attribute to modify. * @return Null. */ Object attributeTensorType(AttributeOperation attribute) : { TensorType tensorType; } { tensorType = tensorType("For attribute field '" + attribute.getName() + "'") { // TODO: Remove on Vespa 8 deployLogger.logApplicationPackage(Level.WARNING, "In field '" + attribute.getName() + "': Specifying tensor type on the attribute is deprecated and has no effect."); } { return null; } } /** * This rule consumes a summary statement defined inside a document-summary block. * * @param document The document summary to modify. * @return Null. */ Object summaryInDocument(DocumentSummary document) : { String name; DataType type; SummaryField summary; } { name = identifierWithDash() { } type = dataType() { summary = new SummaryField(name, type); summary.setVsmCommand(SummaryField.VsmCommand.FLATTENSPACE); SummaryInFieldLongOperation op = new SummaryInFieldLongOperation(); } lbrace() (summaryItem(op) ()*)* { if (op.destinationIterator().hasNext()) { throw new ParseException("Summaries defined in a document-summary section " + "can not have a 'to' line."); } op.applyToSummary(summary); document.add(summary); return null; } } /** * The rule consumes a summary statement defined inside a field. * * @param field The field to modify. * @return Null. */ Object summaryInField(FieldOperationContainer field) : { SummaryInFieldOperation summary; } { ( ( LOOKAHEAD(2) summary = summaryInFieldShort(field) | summary = summaryInFieldLong(field)) ) { field.addOperation(summary); return null; } } /** * This rule consumes a single-line summary field. * * @param field The field to modify. * @return The consumed summary field. */ SummaryInFieldOperation summaryInFieldShort(FieldOperationContainer field) : { String name = field.getName(); SummaryField ret; } { [ name = identifier() ] { SummaryInFieldShortOperation op = new SummaryInFieldShortOperation(name); } ( { op.setTransform(SummaryTransform.DYNAMICTEASER); op.addSource(name); } | { op.setTransform(SummaryTransform.MATCHED_ELEMENTS_FILTER); } | ( | ) { op.setTransform(SummaryTransform.NONE); } ) { return op; } } /** * This rule consumes a multi-line summary field. * * @return The consumed summary field. */ SummaryInFieldOperation summaryInFieldLong(FieldOperationContainer field) : { String name = field.getName(); DataType type = null; } { ( [ name = identifier() [ type = dataType() ] ] lbrace() { SummaryInFieldLongOperation op = new SummaryInFieldLongOperation(name); op.setType(type); } (summaryItem(op) ()*)* ) { return op; } } /** * This rule consumes an item of a summary field block. * * @param field The field to modify. * @return Null. */ Object summaryItem(SummaryInFieldLongOperation field) : { } { ( summaryTransform(field) | summaryBolding(field) | summarySourceList(field) | summaryDestinationList(field) | summaryProperties(field) ) { return null; } } /** * This rule consumes a transform statement for a summary field element. * * @param field The field to modify. * @return Null. */ Object summaryTransform(SummaryInFieldOperation field) : { } { ( { field.setTransform(SummaryTransform.DYNAMICTEASER); } | { field.setTransform(SummaryTransform.MATCHED_ELEMENTS_FILTER); } | ( | ) { field.setTransform(SummaryTransform.NONE); } ) { return null; } } /** * This rule consumes a bolding statement for a summary field element. * * @param field The summary field to modify. */ void summaryBolding(SummaryInFieldLongOperation field) : { boolean bold; } { bold = bool() { field.setBold(bold); } } /** * This rule consumes a source-list statement for a summary field element. * * @param field The summary field to modify. */ void summarySourceList(SummaryInFieldOperation field) : { String str; } { ( str = identifier() { field.addSource(str); } ( str = identifier() { field.addSource(str); } )* ) + } /** * This rule consumes a destination-list statement for a summary field element. * * @param field The summary field to modify. */ void summaryDestinationList(SummaryInFieldLongOperation field) : { String str; } { str = identifier() { field.addDestination(str); } ( str = identifier() { field.addDestination(str); } )* } /** * This rule consumes properties for a summary field element. * * @param field The summary field to modify. */ void summaryProperties(SummaryInFieldLongOperation field) : { } { lbrace() (summaryProperty(field) )+ } /** * This rule consumes a single summary property pair for a summary field element. * * @param field The summary field to modify. */ void summaryProperty(SummaryInFieldLongOperation field) : { String name, value; } { name = identifierWithDash() (value = identifierWithDash() | value = quotedString()) { field.addProperty(new SummaryField.Property(name, value)); } } /** * This rule consumes a stemming block of a field element. * * @param field The field to modify. */ void fieldStemming(FieldOperationContainer field) : { String setting; StemmingOperation op = new StemmingOperation(); } { setting = identifierWithDash() { op.setSetting(setting); field.addOperation(op); } } /** * This rule consumes a stemming statement for a search element. * * @param search The search to modify. */ void searchStemming(Search search) : { String setting; } { setting = identifierWithDash() { search.setStemming(Stemming.get(setting)); } } /** * This rule consumes a normalizing statement of a field element. At the moment, this can only be used to turn off * normalizing. * * @param field The field to modify. */ void normalizing(FieldOperationContainer field) : { String setting; } { setting = identifierWithDash() { field.addOperation(new NormalizingOperation(setting)); } } /** * This rule consumes a bolding statement of a field element. * * @param field The field to modify. */ void bolding(FieldOperationContainer field) : { boolean bold; } { bold = bool() { field.addOperation(new BoldingOperation(bold)); } } /** * This rule consumes a dictionary statement of a field element. * * @param field The field to modify. */ void dictionary(FieldOperationContainer field) : { } { ( ( dictionarySetting(field)) | (lbrace() (dictionarySetting(field) ()*)* )) { } } void dictionarySetting(FieldOperationContainer field) : { Dictionary.Type type; } { ( { field.addOperation(new DictionaryOperation(DictionaryOperation.Operation.HASH)); } | { field.addOperation(new DictionaryOperation(DictionaryOperation.Operation.BTREE)); } | { field.addOperation(new DictionaryOperation(DictionaryOperation.Operation.CASED)); } | { field.addOperation(new DictionaryOperation(DictionaryOperation.Operation.UNCASED)); }) { } } /** * This rule consumes a body statement of a field element. * * @param field The field to modify. */ void body(SDField field) : { } { { deployLogger.logApplicationPackage(Level.WARNING, field + ": 'header/body' is deprecated and has no effect."); } } /** * This rule consumes a header statement of a field element. * * @param field The field to modify. */ void header(SDField field) : { } {
{ deployLogger.logApplicationPackage(Level.WARNING, field + ": 'header/body' is deprecated and has no effect."); } } void queryCommand(FieldOperationContainer container) : { String command; QueryCommandOperation field = new QueryCommandOperation(); } { ( command = identifierWithDash() | command = quotedString() ) { field.addQueryCommand(command); container.addOperation(field); } } void alias(FieldOperationContainer container) : { String aliasedName = null; String alias; } { [aliasedName = identifier()] alias = identifierWithDash() { AliasOperation op = new AliasOperation(aliasedName, alias); container.addOperation(op); } } FieldOperationContainer match(FieldOperationContainer field) : { } { ( ( matchType(field)) | (lbrace() (matchItem(field) ()*)* ) ) { return field; } } /** * This rule consumes a single match item for a match block. * * @param field The field to modify. * @return Null. */ Object matchItem(FieldOperationContainer field) : { } { ( matchType(field) | exactTerminator(field) | gramSize(field) | matchSize(field) ) { return null; } } Object matchType(FieldOperationContainer container) : { MatchOperation matchOp = new MatchOperation(); } { ( { matchOp.setMatchingType(Matching.Type.TEXT); } // Deprecated synonym to TEXT | { matchOp.setMatchingType(Matching.Type.TEXT); } | { matchOp.setMatchingType(Matching.Type.WORD); } | { matchOp.setMatchingType(Matching.Type.EXACT); } | { matchOp.setMatchingType(Matching.Type.GRAM); } | { matchOp.setCase(Case.CASED); } | { matchOp.setCase(Case.UNCASED); } | { matchOp.setMatchingAlgorithm(Matching.Algorithm.PREFIX); } | { matchOp.setMatchingAlgorithm(Matching.Algorithm.SUBSTRING); } | { matchOp.setMatchingAlgorithm(Matching.Algorithm.SUFFIX); } ) { container.addOperation(matchOp); return null; } } void exactTerminator(FieldOperationContainer container) : { String terminator; MatchOperation field = new MatchOperation(); } { terminator = quotedString() { field.setExactMatchTerminator(terminator); container.addOperation(field); } } void gramSize(FieldOperationContainer container) : { int gramSize; MatchOperation field = new MatchOperation(); } { gramSize = integer() { field.setGramSize(gramSize); container.addOperation(field); } } void matchSize(FieldOperationContainer container) : { int matchSize; MatchOperation field = new MatchOperation(); } { matchSize = integer() { field.setMaxLength(matchSize); container.addOperation(field); } } /** * Consumes a rank statement of a field element. * * @param field The field to modify. * @return Null. */ Object rank(FieldOperationContainer field) : { RankOperation op = new RankOperation(); } { ( ( rankSetting(op)) | (lbrace() (rankSetting(op) ()*)* ) ) { field.addOperation(op); return null; } } /** * Consumes a single rank setting of a rank statement. * * @param field The field to modify. * @return Null. */ Object rankSetting(RankOperation field) : { } { ( { field.setLiteral(true); } | { field.setNormal(true); } | { field.setFilter(true); } ) { return null; } } /** * Consumes an id statement of a field body block. * * @param field The field to modify. * @param document The document type to modify. */ void id(FieldOperationContainer field, SDDocumentType document) : { int fieldId; IdOperation op = new IdOperation(); } { fieldId = integer() { op.setDocument(document); op.setFieldId(fieldId); field.addOperation(op); } } /** * Consumes an indexing-rewrite statement of a field body block. * * @param field The field to modify. */ void indexingRewrite(FieldOperationContainer field) : { } { { field.addOperation(new IndexingRewriteOperation()); } } /** * Consumes a document-summary block from within a search block. * * @param search The search object to add content to. * @return Null. */ Object documentSummary(Search search) : { String name; DocumentSummary summary; } { ( name = identifierWithDash() { search.addSummary(summary = new DocumentSummary(name)); } [inheritsDocumentSummary(summary, search)] lbrace() ( { summary.setFromDisk(true); } | { summary.setOmitSummaryFeatures(true); } | documentSummaryItem(summary) | )* ) { return null; } } /** * This rule consumes an inherits statement of a document summary. * * @param documentSummary The document summary to modify. * @param search The search object documentSummary is being added to. */ void inheritsDocumentSummary(DocumentSummary documentSummary, Search search) : { String name; } { name = identifierWithDash() { documentSummary.setInherited(search.getSummaries().get(name)); } } /** * Consumes a single document-summary item. * * @param summary The document summary to modify. * @return Null. */ Object documentSummaryItem(DocumentSummary summary) : { } { summaryInDocument(summary) { return null; } } /** * Consumes an index block for a field element. * * @param search The search object to add content to. * @param field The field to modify. * @return Null. */ Object index(Search search, FieldOperationContainer field) : { IndexOperation op = new IndexOperation(); String indexName = (field != null) ? field.getName() : null; } { [indexName = identifier()] { if (indexName == null) { throw new ParseException("Index statements outside fields must have an explicit name."); } op.setIndexName(indexName); } ( ( indexBody(op) ( indexBody(op))*) | (lbrace() (indexBody(op) ()*)* ) ) { if (field == null) { Index index = new Index(indexName); op.applyToIndex(index); search.addIndex(index); } else { field.addOperation(op); } return null; } } /** * Consumes a single index statement for an index block. * * @param index The index to modify. * @return Null. */ Object indexBody(IndexOperation index) : { String str; int arity; long num; double threshold; } { ( { index.setPrefix(true); } | str = identifierWithDash() { index.addAlias(str); } | str = identifierWithDash() { index.setStemming(str); } | arity = integer() { index.setArity(arity); } | num = consumeLong() { index.setLowerBound(num); } | num = consumeLong() { index.setUpperBound(num); } | threshold = consumeFloat() { index.setDensePostingListThreshold(threshold); } | { index.setEnableBm25(true); } | hnswIndex(index) { } ) { return null; } } void hnswIndex(IndexOperation index) : { HnswIndexParams.Builder params = new HnswIndexParams.Builder(); } { ( LOOKAHEAD( lbrace()) ( (lbrace() (hnswIndexBody(params) ()*)* ) ) | ) { index.setHnswIndexParams(params); } } void hnswIndexBody(HnswIndexParams.Builder params) : { int num; boolean bool; } { ( num = integer() { params.setMaxLinksPerNode(num); } | num = integer() { params.setNeighborsToExploreAtInsert(num); } | bool = bool() { params.setMultiThreadedIndexing(bool); } ) } /** * Consumes a onnx-model block of a search element. * * @param search The search object to add content to. */ void onnxModel(Search search) : { String name; OnnxModel onnxModel; } { ( name = identifier() { onnxModel = new OnnxModel(name); } lbrace() (onnxModelItem(onnxModel) ()*)+ ) { if (documentsOnly) return; search.onnxModels().add(onnxModel); } } /** * This rule consumes an onnx-model block. * * @param onnxModel The onnxModel to modify. * @return Null. */ Object onnxModelItem(OnnxModel onnxModel) : { String path = null; } { ( ( path = filePath() { } ()*) { onnxModel.setFileName(path); } | ( path = uriPath() { } ()*) { onnxModel.setUri(path); } | () { String name = token.image.substring(5, token.image.lastIndexOf(":")).trim(); if (name.startsWith("\"")) { name = name.substring(1, name.length() - 1); } String source = token.image.substring(token.image.lastIndexOf(":") + 1).trim(); onnxModel.addInputNameMapping(name, source); } | () { String name = token.image.substring(6, token.image.lastIndexOf(":")).trim(); if (name.startsWith("\"")) { name = name.substring(1, name.length() - 1); } String as = token.image.substring(token.image.lastIndexOf(":") + 1).trim(); onnxModel.addOutputNameMapping(name, as); } ) { return null; } } /** * Consumes a constant block of a search element. * * @param search The search object to add content to. */ void rankingConstant(Search search) : { String name; RankingConstant constant; } { ( name = identifier() { constant = new RankingConstant(name); } lbrace() (rankingConstantItem(constant) ()*)+ ) { if (documentsOnly) return; search.rankingConstants().add(constant); } } /** * This rule consumes a constant block. * * @param constant The constant to modify. * @return Null. */ Object rankingConstantItem(RankingConstant constant) : { String path = null; TensorType type = null; } { ( ( path = filePath() { } ()*) { constant.setFileName(path); } | ( path = uriPath() { } ()*) { constant.setUri(path); } | type = tensorTypeWithPrefix(rankingConstantErrorMessage(constant.getName())) ()* { constant.setType(type); } ) { return null; } } String rankingConstantErrorMessage(String name) : {} { { return "For ranking constant ' " + name + "'"; } } String filePath() : { } { ( | | ) { return token.image; } } String uriPath() : { } { ( ) { return token.image; } } /** * Consumes a rank-profile block of a search element. * * @param search The search object to add content to. */ void rankProfile(Search search) : { String name; RankProfile profile; } { ( ( | ) name = identifierWithDash() { if (documentsOnly) { profile = new DocumentsOnlyRankProfile(name, search, rankProfileRegistry, search.rankingConstants()); } else if ("default".equals(name)) { profile = rankProfileRegistry.get(search, "default"); } else { profile = new RankProfile(name, search, rankProfileRegistry, search.rankingConstants()); } } [inheritsRankProfile(profile)] lbrace() (rankProfileItem(profile) ()*)* ) { if (documentsOnly) return; rankProfileRegistry.add(profile); } } /** * This rule consumes a single statement for a rank-profile block. * * @param profile The rank profile to modify. * @return Null. */ Object rankProfileItem(RankProfile profile) : { } { ( fieldRankType(profile) | fieldWeight(profile) | fieldRankFilter(profile) | firstPhase(profile) | matchPhase(profile) | function(profile) | execute(profile) | ignoreRankFeatures(profile) | numThreadsPerSearch(profile) | minHitsPerThread(profile) | numSearchPartitions(profile) | termwiseLimit(profile) | rankFeatures(profile) | rankProperties(profile) | secondPhase(profile) | rankDegradation(profile) | constants(profile) | summaryFeatures(profile) ) { return null; } } /** * This rule consumes an inherits statement of a rank-profile. * * @param profile The profile to modify. */ void inheritsRankProfile(RankProfile profile) : { String str; } { str = identifierWithDash() { profile.setInherited(str); } } /** * This rule consumes an execute statement of a rank-profile. * * @param profile The profile to modify. */ void execute(RankProfile profile) : { } { lbrace() (execute_operation(profile) )+ { } } void execute_operation(RankProfile profile) : { String attribute, operation; RankProfile.ExecuteOperation.Phase phase; } { ( { phase = RankProfile.ExecuteOperation.Phase.onmatch; } | { phase = RankProfile.ExecuteOperation.Phase.onrerank; } | { phase = RankProfile.ExecuteOperation.Phase.onsummary; } ) lbrace() attribute = identifier() operation = execute_expr() ()* { profile.addExecuteOperation(phase, attribute, operation); } } String execute_expr() : { String op; Number constant = null; } { (("++" | "--") { op = token.image; } | ("+=" | "-=" | "*=" | "/=" | "%=" | "=") { op = token.image; } constant = consumeNumber()) { return constant != null ? (op + constant) : op; } } /** * This rule consumes a function statement of a rank-profile. * * @param profile The profile to modify. */ void function(RankProfile profile) : { String name, expression, parameter; List parameters = new ArrayList(); boolean inline = false; } { ( ( | ) inline = inline() name = identifier() [ "$" { name = name + token.image; } ] "(" [ parameter = identifier() { parameters.add(parameter); } ( parameter = identifier() { parameters.add(parameter); } )* ] ")" lbrace() expression = expression() ()* ) { profile.addFunction(name, parameters, expression, inline); } } boolean inline() : { } { ( { return true; } ) ? { return false; } } /** * This rule consumes a match-phase block of a rank profile. * * @param profile The rank profile to modify. */ void matchPhase(RankProfile profile) : { MatchPhaseSettings settings = new MatchPhaseSettings(); } { lbrace() (matchPhaseItem(settings) ()*)* { settings.checkValid(); profile.setMatchPhaseSettings(settings); } } void matchPhaseItem(MatchPhaseSettings settings) : { String str; int num; double multiplier; double coverage; } { ( str = identifier() { settings.setAttribute(str); } | diversity(settings) | ( { settings.setAscending(true); } | { settings.setAscending(false); } ) | num = integer() { settings.setMaxHits(num); } | coverage = consumeFloat() { settings.setMaxFilterCoverage(coverage); } | multiplier = consumeFloat() { settings.setEvaluationPoint(multiplier); } | multiplier = consumeFloat() { settings.setPrePostFilterTippingPoint(multiplier); } ) { return; } } /** * This rule consumes a diversity block of a rank profile. * * @param profile The rank profile to modify. */ void diversity(MatchPhaseSettings profile) : { DiversitySettings settings = new DiversitySettings(); } { lbrace() (diversityItem(settings) ()*)* { profile.setDiversity(settings); } } void diversityItem(DiversitySettings settings) : { String str; int num; double multiplier; } { ( str = identifier() { settings.setAttribute(str); } | num = integer() { settings.setMinGroups(num); } | multiplier = consumeFloat() { settings.setCutoffFactor(multiplier); } | ( { settings.setCutoffStrategy(Diversity.CutoffStrategy.strict); } | { settings.setCutoffStrategy(Diversity.CutoffStrategy.loose); } ) ) { return; } } /** * Consumes the first-phase block of a rank profile. * * @param profile The rank profile to modify. */ void firstPhase(RankProfile profile) : { String exp; } { lbrace() (firstPhaseItem(profile) ()*)* } Object firstPhaseItem(RankProfile profile) : { String expression; int rerankCount; double dropLimit; } { ( expression = expression() { profile.setFirstPhaseRanking(expression); } | ( rerankCount = integer()) { profile.setKeepRankCount(rerankCount); } | ( dropLimit = consumeFloat()) { profile.setRankScoreDropLimit(dropLimit); } ) { return null; } } /** * Consumes the second-phase block of a rank profile. * * @param profile The rank profile to modify. */ void secondPhase(RankProfile profile) : { } { lbrace() (secondPhaseItem(profile) ()*)* } /** * Consumes a statement for a second-phase block. * * @param profile The rank profile to modify. * @return Null. */ Object secondPhaseItem(RankProfile profile) : { String expression; int rerankCount; } { ( expression = expression() { profile.setSecondPhaseRanking(expression); } | ( rerankCount = integer()) { profile.setRerankCount(rerankCount); } ) { return null; } } /** * This rule consumes a summary-features block of a rank profile. * * @param profile The rank profile to modify. * @return Null. */ Object summaryFeatures(RankProfile profile) : { String features; String inherited = null; } { ( { features = token.image.substring(token.image.indexOf(":") + 1).trim(); } | { features = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")).trim(); } | { int inheritsIndex = token.image.indexOf("inherits "); String rest = token.image.substring(inheritsIndex + "inherits ".length()); profile.setInheritedSummaryFeatures(rest.substring(0, rest.indexOf(" ")).trim()); features = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")).trim(); } ) { profile.addSummaryFeatures(getFeatureList(features)); return null; } } /** Consumes a rank-features block of a rank profile */ Object rankFeatures(RankProfile profile) : { String features; } { ( { features = token.image.substring(token.image.indexOf(":") + 1).trim(); } | { features = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")).trim(); } ) { profile.addRankFeatures(getFeatureList(features)); return null; } } /** * This rule consumes a ignore-default-rank-features statement for a rank profile. * * @param profile The rank profile to modify. */ void ignoreRankFeatures(RankProfile profile) : { } { { profile.setIgnoreDefaultRankFeatures(true); } } /** * This rule consumes a num-threads-per-search statement for a rank profile. * * @param profile The rank profile to modify. */ void numThreadsPerSearch(RankProfile profile) : { int num; } { ( num = integer()) { profile.setNumThreadsPerSearch(num); } } /** * This rule consumes a min-hits-per-thread statement for a rank profile. * * @param profile The rank profile to modify. */ void minHitsPerThread(RankProfile profile) : { int num; } { ( num = integer()) { profile.setMinHitsPerThread(num); } } /** * This rule consumes a num-search-partitions statement for a rank profile. * * @param profile The rank profile to modify. */ void numSearchPartitions(RankProfile profile) : { int num; } { ( num = integer()) { profile.setNumSearchPartitions(num); } } /** * This rule consumes a num-threads-per-search statement for a rank profile. * * @param profile The rank profile to modify. */ void termwiseLimit(RankProfile profile) : { double num; } { ( num = consumeFloat()) { profile.setTermwiseLimit(num); } } /** * This rule consumes a rank-properties block of a rank profile. There is a little trick within this rule to allow the * final rank property to skip the terminating newline token. * * @param profile The rank profile to modify. */ void rankProperties(RankProfile profile) : { } { lbrace() (LOOKAHEAD(rankPropertyItem() rankPropertyItem() ) rankProperty(profile) ()+)* [rankProperty(profile)] } /** * This rule consumes a single rank property pair for a rank profile. * * @param profile The rank profile to modify. */ void rankProperty(RankProfile profile) : { String key, val; } { key = rankPropertyItem() val = rankPropertyItem() { profile.addRankProperty(key, val); } } /** * This rule consumes a single rank property for a rank-properties block. * * @return The token image of the consumed item. */ String rankPropertyItem() : { String image, ret = ""; } { ( ( image = identifierWithDash() { ret += image; } | image = quotedString() { ret += image; } | ( "(" | ")" | | ) { ret += token.image; } )+ ) { return ret; } } /** * This rule consumes a field-weight statement of a rank profile. * * @param profile The rank profile to modify. */ void fieldWeight(RankProfile profile) : { Integer num; String name; } { name = identifier() num = integer() { profile.addRankSetting(name, RankProfile.RankSetting.Type.WEIGHT, num); } } /** * This rule consumes a rank-type statement of a rank profile. * * @param profile The rank profile to modify. */ void fieldRankType(RankProfile profile) : { String name; String type; } { name = identifier() type = identifier() { profile.addRankSetting(name, RankProfile.RankSetting.Type.RANKTYPE, RankType.fromString(type)); } } /** * This rule consumes a rank filter statement of a rank profile. * * @param profile The rank profile to modify. */ void fieldRankFilter(RankProfile profile) : { String name; } { name = identifier() { profile.addRankSetting(name, RankProfile.RankSetting.Type.PREFERBITVECTOR, Boolean.TRUE); } } /** * This rule consumes part of a rank-degradation statement of a rank profile. */ void rankDegradationBinSize() : { double freq; } { freq = consumeFloat() { deployLogger.logApplicationPackage(Level.WARNING, "Specifying 'doc-frequency' in 'rank-degradation' is deprecated and has no effect."); } } /** * This rule consumes part of a rank-degradation statement of a rank profile. */ void rankDegradationBinLow() : { int n; } { n = integer() { deployLogger.logApplicationPackage(Level.WARNING, "Specifying 'min-fullrank-docs' in 'rank-degradation' is deprecated and has no effect."); } } /** * This rule consumes part of a rank-degradation statement of a rank profile. */ void rankDegradationPosbinSize() : { double avgOcc; } { avgOcc = consumeFloat() { deployLogger.logApplicationPackage(Level.WARNING, "Specifying 'occurrences-per-doc' in 'rank-degradation' is deprecated and has no effect."); } } /** * This rule consumes part of a rank-degradation statement of a rank profile. */ Object rankDegradationItem() : { } { ( rankDegradationBinSize() | rankDegradationBinLow() | rankDegradationPosbinSize() ) { return null; } } /** * This rule consumes a rank-degradation statement of a rank profile. * * @param profile The rank profile to modify. */ Object rankDegradation(RankProfile profile) : { double freq; } { ( freq = consumeFloat() { deployLogger.logApplicationPackage(Level.WARNING, "Specifying 'rank-degradation-frequency' in 'rank-profile' is deprecated and has no effect."); } | lbrace() ( rankDegradationItem() ()*)+ ) { return null; } } /** * Consumes a set of constants available in ranking expressions in the enclosing profile. */ void constants(RankProfile profile) : { String name; } { ()* ( name = identifier() ( constantValue(profile, name) | constantTensor(profile, name) ) ()* )* } void constantValue(RankProfile profile, String name) : { String value; } { value = identifier() { profile.addConstant(name, Value.parse(value)); } } void constantTensor(RankProfile profile, String name) : { String tensorString = ""; TensorType tensorType = null; } { ()* (( tensorString = tensorValue() | tensorType = tensorTypeWithPrefix(constantTensorErrorMessage(profile.getName(), name)) ) ()* )* { if (tensorType != null) { profile.addConstantTensor(name, new TensorValue(Tensor.from(tensorType, tensorString))); } else { profile.addConstantTensor(name, new TensorValue(Tensor.from(tensorString))); } } } String constantTensorErrorMessage(String rankProfileName, String constantTensorName) : {} { { return "For constant tensor '" + constantTensorName + "' in rank profile '" + rankProfileName + "'"; } } String tensorValue() : { String tensor; } { ( { tensor = token.image.substring(token.image.indexOf(":") + 1); } | { tensor = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")); } ) { return tensor; } } TensorType tensorTypeWithPrefix(String errorMessage) : { TensorType type; } { type= tensorType(errorMessage) { return type; } } TensorType tensorType(String errorMessage) : { String tensorTypeString; } { ( ) { tensorTypeString = token.image; } { TensorType tensorType; try { tensorType = TensorType.fromSpec(tensorTypeString); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(errorMessage + ": Illegal tensor type spec: " + e.getMessage()); } return tensorType; } } void importField(Search search) : { String fieldRefSpec; String aliasFieldName; } { fieldRefSpec = identifier() aliasFieldName = identifier() lbrace() { long nDots = Utils.count(fieldRefSpec, '.'); if (nDots != 1) { throw new IllegalArgumentException("Illegal field reference spec '" + fieldRefSpec + "': Does not include a single '.'"); } int indexOfDot = fieldRefSpec.indexOf('.'); String documentReferenceFieldName = fieldRefSpec.substring(0, indexOfDot); String foreignFieldName = fieldRefSpec.substring(indexOfDot + 1); TemporaryImportedFields importedFields = search.temporaryImportedFields().get(); if (importedFields.hasField(aliasFieldName)) { throw new IllegalArgumentException("For search '" + search.getName() + "', import field as '" + aliasFieldName + "': Field already imported"); } importedFields.add(new TemporaryImportedField(aliasFieldName, documentReferenceFieldName, foreignFieldName)); } } /** * This rule consumes an expression token and returns its image. * * @return The consumed token image. */ String expression() : { String exp; } { ( { exp = token.image.substring(token.image.indexOf(":") + 1); } | { exp = token.image.substring(token.image.indexOf("{") + 1, token.image.lastIndexOf("}")); } ) { return exp; } } String identifierWithDash() : { String identifier; } { ( identifier = identifier() { return identifier; } ) | ( { return token.image; } ) } /** * Consumes an identifier. This must be kept in sync with all word tokens that should be parseable as * identifiers. * * @return the identifier string */ String identifier() : { } { ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
| | | | | | | |
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ) { return token.image; } } /** * Consumes a string token and returns the token image. * * @return The consumed token image. */ String string() : { } { { return token.image; } } /** * Consumes a quoted string token and returns the token image minus the quotes. This does not perform * unescaping of the content, it simply removes the first and last character of the image. However, the token itself can * contain anything but a double quote. * * @return The unquoted token image. */ String quotedString() : { } { { return token.image.substring(1, token.image.length() - 1); } } /** * This rule consumes a boolean value. * * @return The consumed boolean value. */ Boolean bool() : { } { ( ( | ) { return true; } | ( | ) { return false; } ) } /** * This rule consumes an integer token and returns its numeric value. * * @return The consumed integer value. */ int integer() : { } { { return Integer.parseInt(token.image); } } /** * This rule consumes a long or integer token and returns its numeric value. * * @return The consumed long value. */ long consumeLong() : { } { ( { return Long.parseLong(token.image); } | { return Long.parseLong(token.image.substring(0, token.image.length()-1)); } ) } /** * This rule consumes a floating-point token and returns its numeric value. * * @return The consumed value. */ double consumeFloat() : { } { { return Double.valueOf(token.image); } } Number consumeNumber() : { Number num; } { (num = consumeFloat() | num = consumeLong()) { return num; } } /** * This rule consumes an opening brace with leading and trailing newline tokens. */ void lbrace() : { } { ()* ()* }