aboutsummaryrefslogtreecommitdiffstats
path: root/sd-plugin/src/main/java/org/intellij/sdk/language/sd.flex
diff options
context:
space:
mode:
Diffstat (limited to 'sd-plugin/src/main/java/org/intellij/sdk/language/sd.flex')
-rw-r--r--sd-plugin/src/main/java/org/intellij/sdk/language/sd.flex202
1 files changed, 202 insertions, 0 deletions
diff --git a/sd-plugin/src/main/java/org/intellij/sdk/language/sd.flex b/sd-plugin/src/main/java/org/intellij/sdk/language/sd.flex
new file mode 100644
index 00000000000..c657af4f658
--- /dev/null
+++ b/sd-plugin/src/main/java/org/intellij/sdk/language/sd.flex
@@ -0,0 +1,202 @@
+package org.intellij.sdk.language.lexer;
+
+import com.intellij.lexer.FlexLexer;
+import com.intellij.psi.tree.IElementType;import com.intellij.ui.components.MultiColumnList;import org.intellij.sdk.language.psi.SdTokenType;
+
+import static com.intellij.psi.TokenType.BAD_CHARACTER; // Pre-defined bad character token.
+import static com.intellij.psi.TokenType.WHITE_SPACE; // Pre-defined whitespace character token.
+import static org.intellij.sdk.language.psi.SdTypes.*; // That is the class which is specified as `elementTypeHolderClass` in bnf
+ // grammar file. This will contain all other tokens which we will use.
+
+%%
+
+%public
+%class SdLexer
+%implements FlexLexer
+%function advance
+%type IElementType
+%unicode
+
+//**--------- REGEXES ---------**//
+// If some character sequence is matched to this regex, it will be treated as an IDENTIFIER.
+ID=[a-zA-Z_][a-zA-Z0-9_]*
+ID_WITH_DASH = [a-zA-Z_][a-zA-Z0-9_-]*
+// If some character sequence is matched to this regex, it will be treated as a WHITE_SPACE.
+WHITE_SPACE=[ \t\n\x0B\f\r]+
+
+COMMENT=#.*
+SYMBOL= [|:{}(),.\[\]]
+INTEGER = [0-9]+
+FLOAT = {INTEGER}[.][0-9]+[e]?
+COMPARISON_OPERATOR = [<>]|(==)|(<=)|(>=)|(\~=)
+ARITHMETIC_OPERATOR = [\-+*/]
+STRING = [\"][^\"\n]*[\"]
+WORD = \w+
+
+
+%%
+
+<YYINITIAL> {
+ // In here, we match keywords. So if a keyword is found, this returns a token which corresponds to that keyword.
+ // These tokens are generated using the 'sd.bnf' file and located in the SdTypes class.
+ // These tokens are Parsed uses these return values to match token squence to a parser rule.
+
+ "search" { return SEARCH; }
+ "document" { return DOCUMENT; }
+ "inherits" { return INHERITS; }
+ "struct" { return STRUCT; }
+ "field" { return FIELD; }
+ "type" { return TYPE; }
+ "struct-field" { return STRUCT_FIELD; }
+ "match" { return MATCH; }
+
+ "indexing" { return INDEXING; }
+ "summary" { return SUMMARY; }
+ "attribute" { return ATTRIBUTE; }
+
+ "array" { return ARRAY; }
+ "raw" { return RAW; }
+ "uri" { return URI; }
+ "reference" { return REFERENCE; }
+ "annotationreference" { return ANNOTATIONREFERENCE; }
+ "weightedset" { return WEIGHTEDSET; }
+ "map" { return MAP; }
+
+ "text" { return TEXT; }
+ "exact" { return EXACT; }
+ "exact-terminator" { return EXACT_TERMINATOR; }
+ "word" { return WORD; }
+ "prefix" { return PREFIX; }
+ "cased" { return CASED; }
+ "uncased" { return UNCASED; }
+ "substring" { return SUBSTRING; }
+ "suffix" { return SUFFIX; }
+ "max-length" { return MAX_LENGTH; }
+ "gram" { return GRAM; }
+ "gram-size" { return GRAM_SIZE; }
+
+ "fast-search" { return FAST_SEARCH; }
+ "fast-access" { return FAST_ACCESS; }
+ "alias" { return ALIAS; }
+ "sorting" { return SORTING; }
+ "uca" { return UCA; }
+ "lowercase" { return LOWERCASE; }
+ "paged" { return PAGED; }
+ "strength" { return STRENGTH; }
+ "primary" { return PRIMARY; }
+ "secondary" { return SECONDARY; }
+ "tertiary" { return TERTIARY; }
+ "quaternary" { return QUATERNARY; }
+ "identical" { return IDENTICAL; }
+ "distance-metric" { return DISTANCE_METRIC; }
+
+ "rank" { return RANK; }
+ "filter" { return FILTER; }
+ "normal" { return NORMAL; }
+ "indexing-rewrite" { return INDEXING_REWRITE; }
+ "none" { return NONE; }
+ "query-command" { return QUERY_COMMAND; }
+ "full" { return FULL; }
+ "dinamic" { return DYNAMIC; }
+ "source" { return SOURCE; }
+ "to" { return TO; }
+ "matched-elements-only" { return MATCHED_ELEMENTS_ONLY; }
+
+ "input" { return INPUT; }
+ "mutable" { return MUTABLE; }
+ "document-summary" { return DOCUMENT_SUMMARY; }
+ "from-disk" { return FROM_DISK; }
+ "omit-summary-features" { return OMIT_SUMMARY_FEATURES; }
+ "import" { return IMPORT; }
+ "as" { return AS; }
+
+ "rank-profile" { return RANK_PROFILE; }
+ "match-phase" { return MATCH_PHASE; }
+ "order" { return ORDER; }
+ "ascending" { return ASCENDING; }
+ "descending" { return DESCENDING; }
+ "max-hits" { return MAX_HITS; }
+ "diversity" { return DIVERSITY; }
+ "min-groups" { return MIN_GROUPS; }
+ "rank-properties" { return RANK_PROPERTIES; }
+
+ "first-phase" { return FIRST_PHASE; }
+ "keep-rank-count" { return KEEP_RANK_COUNT; }
+ "rank-score-drop-limit" { return RANK_SCORE_DROP_LIMIT; }
+ "expression" { return EXPRESSION; }
+ "file" { return FILE; }
+ "expression" { return EXPRESSION; }
+ "num-threads-per-search" { return NUM_THREADS_PER_SEARCH; }
+ "termwise-limit" { return TERMWISE_LIMIT; }
+ "ignore-default-rank-features" { return IGNORE_DEFAULT_RANK_FEATURES; }
+ "min-hits-per-thread" { return MIN_HITS_PER_THREAD; }
+ "num-search-partition" { return NUM_SEARCH_PARTITION; }
+ "constants" { return CONSTANTS; }
+ "literal" { return LITERAL; }
+ "second-phase" { return SECOND_PHASE; }
+ "rerank-count" { return RERANK_COUNT; }
+
+ "weight" { return WEIGHT; }
+ "index" { return INDEX; }
+ "bolding" { return BOLDING; }
+ "on" { return ON; }
+ "off" { return OFF; }
+ "true" { return TRUE; }
+ "false" { return FALSE; }
+ "id" { return ID; }
+ "normalizing" { return NORMALIZING; }
+ "stemming" { return STEMMING; }
+ "arity" { return ARITY; }
+ "lower-bound" { return LOWER_BOUND; }
+ "upper-bound" { return UPPER_BOUND; }
+ "dense-posting-list-threshold" {return DENSE_POSTING_LIST_THRESHOLD; }
+ "enable-bm25" { return ENABLE_BM25; }
+ "hnsw" { return HNSW; }
+ "max-links-per-node" { return MAX_LINKS_PER_NODE; }
+ "neighbors-to-explore-at-insert" { return NEIGHBORS_TO_EXPLORE_AT_INSERT; }
+ "multi-threaded-indexing" { return MULTI_THREADED_INDEXING; }
+ "create-if-nonexistent" { return CREATE_IF_NONEXISTENT; }
+ "remove-if-zero" { return REMOVE_IF_ZERO; }
+ "dictionary" { return DICTIONARY; }
+ "hash" { return HASH; }
+ "btree" { return BTREE; }
+
+ "fieldset" { return FIELDSET; }
+ "fields" { return FIELDS; }
+ "constant" { return CONSTANT; }
+ "output" { return OUTPUT; }
+
+ "annotation" { return ANNOTATION; }
+ "rank-type" { return RANK_TYPE; }
+ "onnx-model" { return ONNX_MODEL; }
+ "raw-as-base64-in-summary" { return RAW_AS_BASE64_IN_SUMMARY; }
+
+ "function" { return FUNCTION; }
+ "macro" { return MACRO; }
+ "inline" { return INLINE; }
+
+ "summary-features" { return SUMMARY_FEATURES; }
+
+ "body" { return BODY; }
+ "header" { return HEADER; }
+
+ // In here, we check for character sequences which matches regular expressions defined above.
+ {ID} { return ID_REG; }
+ {ID_WITH_DASH} { return ID_WITH_DASH_REG; }
+
+ {WHITE_SPACE} { return WHITE_SPACE; }
+
+ {COMMENT} { return COMMENT; }
+ {SYMBOL} { return SYMBOL; }
+ {INTEGER} { return INTEGER_REG; }
+ {FLOAT} { return FLOAT_REG; }
+ {ARITHMETIC_OPERATOR} { return ARITHMETIC_OPERATOR; }
+ {COMPARISON_OPERATOR} { return COMPARISON_OPERATOR; }
+ {WORD} { return WORD_REG; }
+ {STRING} { return STRING; }
+
+}
+
+// If the character sequence does not match any of the above rules, we return BAD_CHARACTER which indicates that
+// there is an error in the character sequence. This is used to highlight errors.
+[^] { return BAD_CHARACTER; } \ No newline at end of file