diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-11-18 23:04:30 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-11-18 23:04:30 +0100 |
commit | c978c9e29652b24b7f31ed545c1c0e48a17464ec (patch) | |
tree | c347e96d74bcb9d180346d90385a2dceb6fbcad5 /integration/intellij/src/main/jflex | |
parent | 28b80bf7669ff14f1af913ef7bcee8659ac555a2 (diff) |
Move and rename
Diffstat (limited to 'integration/intellij/src/main/jflex')
-rw-r--r-- | integration/intellij/src/main/jflex/ai/vespa/intellij/schema/lexer/sd.flex | 239 |
1 files changed, 239 insertions, 0 deletions
diff --git a/integration/intellij/src/main/jflex/ai/vespa/intellij/schema/lexer/sd.flex b/integration/intellij/src/main/jflex/ai/vespa/intellij/schema/lexer/sd.flex new file mode 100644 index 00000000000..b4491acc717 --- /dev/null +++ b/integration/intellij/src/main/jflex/ai/vespa/intellij/schema/lexer/sd.flex @@ -0,0 +1,239 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package ai.vespa.intellij.schema.lexer; + +import com.intellij.lexer.FlexLexer; +import com.intellij.psi.tree.IElementType; +import com.intellij.ui.components.MultiColumnList; +import ai.vespa.intellij.schema.psi.SdTokenType; + +import static ai.vespa.intellij.schema.psi.SdTypes.*; // That is the class which is specified as `elementTypeHolderClass` in bnf + // grammar file. This will contain all other tokens which we will use. +import static com.intellij.psi.TokenType.BAD_CHARACTER; // Pre-defined bad character token. +import static com.intellij.psi.TokenType.WHITE_SPACE; // Pre-defined whitespace character token. + +/* + * Vespa schema parser lexer + * + * @author Shahar Ariel + */ + +%% + +%public +%class SdLexer +%implements FlexLexer +%function advance +%type IElementType +%unicode + +//**--------- REGEXES ---------**// +// If some character sequence is matched to this regex, it will be treated as an IDENTIFIER. +ID=[a-zA-Z_][a-zA-Z0-9_]* +ID_WITH_DASH = [a-zA-Z_][a-zA-Z0-9_-]* +// If some character sequence is matched to this regex, it will be treated as a WHITE_SPACE. +WHITE_SPACE=[ \t\n\x0B\f\r]+ + +COMMENT=#.* +SYMBOL= [!$|:{}(),.\[\]] +INTEGER = [0-9]+ +FLOAT = {INTEGER}[.][0-9]+[e]? +COMPARISON_OPERATOR = [<>]|(==)|(<=)|(>=)|(\~=) +ARITHMETIC_OPERATOR = [\-+*/] +STRING = \"([^\"\\]*(\\.[^\"\\]*)*)\" +WORD = \w+ + + +%% + +<YYINITIAL> { + /** + In here, we match keywords. So if a keyword is found, this returns a token which corresponds to that keyword. + These tokens are generated using the 'sd.bnf' file and located in the SdTypes class. + These tokens are Parsed uses these return values to match token squence to a parser rule. + */ + + /** + This list of keywords has to be synchronized with sd.bnf file. If you add a keyword here, you should add it to the + sd.bnf file as well (to the rule KeywordOrIdentifier / KeywordNotIdentifier). + */ + + "search" { return SEARCH; } + "schema" { return SCHEMA; } + "document" { return DOCUMENT; } + "inherits" { return INHERITS; } + "struct" { return STRUCT; } + "field" { return FIELD; } + "type" { return TYPE; } + "struct-field" { return STRUCT_FIELD; } + "match" { return MATCH; } + + "indexing" { return INDEXING; } + "summary" { return SUMMARY; } + "attribute" { return ATTRIBUTE; } + "set_language" { return SET_LANGUAGE; } + + "array" { return ARRAY; } + "raw" { return RAW; } + "uri" { return URI; } + "reference" { return REFERENCE; } + "annotationreference" { return ANNOTATIONREFERENCE; } + "weightedset" { return WEIGHTEDSET; } + "map" { return MAP; } + + "text" { return TEXT; } + "exact" { return EXACT; } + "exact-terminator" { return EXACT_TERMINATOR; } + "word" { return WORD; } + "prefix" { return PREFIX; } + "cased" { return CASED; } + "uncased" { return UNCASED; } + "substring" { return SUBSTRING; } + "suffix" { return SUFFIX; } + "max-length" { return MAX_LENGTH; } + "gram" { return GRAM; } + "gram-size" { return GRAM_SIZE; } + + "fast-search" { return FAST_SEARCH; } + "fast-access" { return FAST_ACCESS; } + "alias" { return ALIAS; } + "sorting" { return SORTING; } + "uca" { return UCA; } + "lowercase" { return LOWERCASE; } + "paged" { return PAGED; } + "strength" { return STRENGTH; } + "primary" { return PRIMARY; } + "secondary" { return SECONDARY; } + "tertiary" { return TERTIARY; } + "quaternary" { return QUATERNARY; } + "identical" { return IDENTICAL; } + "distance-metric" { return DISTANCE_METRIC; } + + "rank" { return RANK; } + "filter" { return FILTER; } + "normal" { return NORMAL; } + "literal" { return LITERAL; } + "indexing-rewrite" { return INDEXING_REWRITE; } + "none" { return NONE; } + "query-command" { return QUERY_COMMAND; } + "full" { return FULL; } + "static" { return STATIC; } + "dynamic" { return DYNAMIC; } + "source" { return SOURCE; } + "to" { return TO; } + "matched-elements-only" { return MATCHED_ELEMENTS_ONLY; } + + "input" { return INPUT; } + "mutable" { return MUTABLE; } + "enable-bit-vectors" { return ENABLE_BIT_VECTORS; } + "enable-only-bit-vector" { return ENABLE_ONLY_BIT_VECTOR; } + "document-summary" { return DOCUMENT_SUMMARY; } + "from-disk" { return FROM_DISK; } + "omit-summary-features" { return OMIT_SUMMARY_FEATURES; } + "import" { return IMPORT; } + "as" { return AS; } + + "rank-profile" { return RANK_PROFILE; } + "model" { return MODEL; } + "match-phase" { return MATCH_PHASE; } + "order" { return ORDER; } + "ascending" { return ASCENDING; } + "descending" { return DESCENDING; } + "locale" { return LOCALE; } + "max-hits" { return MAX_HITS; } + "diversity" { return DIVERSITY; } + "min-groups" { return MIN_GROUPS; } + "cutoff-factor" { return CUTOFF_FACTOR; } + "cutoff-strategy" { return CUTOFF_STRATEGY; } + "loose" { return LOOSE; } + "strict" { return STRICT; } + "rank-properties" { return RANK_PROPERTIES; } + + "first-phase" { return FIRST_PHASE; } + "keep-rank-count" { return KEEP_RANK_COUNT; } + "rank-score-drop-limit" { return RANK_SCORE_DROP_LIMIT; } + "expression" { return EXPRESSION; } + "file" { return FILE; } + "expression" { return EXPRESSION; } + "num-threads-per-search" { return NUM_THREADS_PER_SEARCH; } + "termwise-limit" { return TERMWISE_LIMIT; } + "ignore-default-rank-features" { return IGNORE_DEFAULT_RANK_FEATURES; } + "min-hits-per-thread" { return MIN_HITS_PER_THREAD; } + "num-search-partition" { return NUM_SEARCH_PARTITION; } + "constants" { return CONSTANTS; } + "second-phase" { return SECOND_PHASE; } + "rerank-count" { return RERANK_COUNT; } + "rank-features" { return RANK_FEATURES; } + + "weight" { return WEIGHT; } + "index" { return INDEX; } + "bolding" { return BOLDING; } + "on" { return ON; } + "off" { return OFF; } + "true" { return TRUE; } + "false" { return FALSE; } + "id" { return ID; } + "normalizing" { return NORMALIZING; } + "stemming" { return STEMMING; } + "arity" { return ARITY; } + "lower-bound" { return LOWER_BOUND; } + "upper-bound" { return UPPER_BOUND; } + "dense-posting-list-threshold" {return DENSE_POSTING_LIST_THRESHOLD; } + "enable-bm25" { return ENABLE_BM25; } + "hnsw" { return HNSW; } + "max-links-per-node" { return MAX_LINKS_PER_NODE; } + "neighbors-to-explore-at-insert" { return NEIGHBORS_TO_EXPLORE_AT_INSERT; } + "multi-threaded-indexing" { return MULTI_THREADED_INDEXING; } + "create-if-nonexistent" { return CREATE_IF_NONEXISTENT; } + "remove-if-zero" { return REMOVE_IF_ZERO; } + "dictionary" { return DICTIONARY; } + "hash" { return HASH; } + "btree" { return BTREE; } + + "fieldset" { return FIELDSET; } + "fields" { return FIELDS; } + "constant" { return CONSTANT; } + "output" { return OUTPUT; } + + "annotation" { return ANNOTATION; } + "rank-type" { return RANK_TYPE; } + "onnx-model" { return ONNX_MODEL; } + "raw-as-base64-in-summary" { return RAW_AS_BASE64_IN_SUMMARY; } + "on-match" { return ON_MATCH; } + "on-rank" { return ON_RANK; } + "on-summary" { return ON_SUMMARY; } + + "function" { return FUNCTION; } + "macro" { return MACRO; } + "inline" { return INLINE; } + + "summary-features" { return SUMMARY_FEATURES; } + "match-features" { return MATCH_FEATURES; } + "rank-features" { return RANK_FEATURES; } + + "body" { return BODY; } + "header" { return HEADER; } + "summary-to" { return SUMMARY_TO; } + + "evaluation-point" { return EVALUATION_POINT; } + "pre-post-filter-tipping-point" { return PRE_POST_FILTER_TIPPING_POINT; } + + // In here, we check for character sequences which matches regular expressions defined above. + {ID} { return ID_REG; } + {ID_WITH_DASH} { return ID_WITH_DASH_REG; } + + {WHITE_SPACE} { return WHITE_SPACE; } + + {COMMENT} { return COMMENT; } + {SYMBOL} { return SYMBOL; } + {INTEGER} { return INTEGER_REG; } + {FLOAT} { return FLOAT_REG; } + {ARITHMETIC_OPERATOR} { return ARITHMETIC_OPERATOR; } + {COMPARISON_OPERATOR} { return COMPARISON_OPERATOR; } + {WORD} { return WORD_REG; } + {STRING} { return STRING_REG; } + +} + +// If the character sequence does not match any of the above rules, we return BAD_CHARACTER which indicates that +// there is an error in the character sequence. This is used to highlight errors. +[^] { return BAD_CHARACTER; }
\ No newline at end of file |