// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. // -------------------------------------------------------------------------------- // // JavaCC options. // // -------------------------------------------------------------------------------- options { CACHE_TOKENS = false; DEBUG_PARSER = false; ERROR_REPORTING = true; USER_CHAR_STREAM = true; } // -------------------------------------------------------------------------------- // // Parser body. // // -------------------------------------------------------------------------------- PARSER_BEGIN(IndexingParser) package com.yahoo.vespa.indexinglanguage.parser; import java.math.BigDecimal; import java.math.BigInteger; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.util.LinkedHashMap; import com.yahoo.collections.Pair; import com.yahoo.document.datatypes.*; import com.yahoo.text.StringUtilities; import com.yahoo.vespa.indexinglanguage.expressions.*; import com.yahoo.vespa.indexinglanguage.linguistics.AnnotatorConfig; import com.yahoo.language.process.Embedder; import com.yahoo.language.Linguistics; /** * @author Simon Thoresen Hult */ public class IndexingParser { private String defaultFieldName; private Linguistics linguistics; private Map embedders; private AnnotatorConfig annotatorCfg; public IndexingParser(String str) { this(new IndexingInput(str)); } public IndexingParser setDefaultFieldName(String fieldName) { defaultFieldName = fieldName; return this; } public IndexingParser setLinguistics(Linguistics linguistics) { this.linguistics = linguistics; return this; } public IndexingParser setEmbedders(Map embedders) { this.embedders = embedders; return this; } public IndexingParser setAnnotatorConfig(AnnotatorConfig cfg) { annotatorCfg = cfg; return this; } private static FieldValue parseDouble(String str) { return new DoubleFieldValue(new BigDecimal(str).doubleValue()); } private static FieldValue parseFloat(String str) { if (str.endsWith("f") || str.endsWith("F")) { str = str.substring(0, str.length() - 1); } return new FloatFieldValue(new BigDecimal(str).floatValue()); } private static FieldValue parseInteger(String str) { if (str.startsWith("0x")) { return new IntegerFieldValue(new BigInteger(str.substring(2), 16).intValue()); } else { return new IntegerFieldValue(new BigInteger(str).intValue()); } } private static FieldValue parseLong(String str) { if (str.endsWith("l") || str.endsWith("L")) { str = str.substring(0, str.length() - 1); } if (str.startsWith("0x")) { return new LongFieldValue(new BigInteger(str.substring(2), 16).longValue()); } else { return new LongFieldValue(new BigInteger(str).longValue()); } } } PARSER_END(IndexingParser) SKIP : { " " | "\t" | "\r" | "\f" } SPECIAL_TOKEN : { } TOKEN : { | ["l","L"]> | | ["f", "F"]> } TOKEN : { | | | | | | | | "> | ="> | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | } // -------------------------------------------------------------------------------- // // Production rules. // // -------------------------------------------------------------------------------- Expression root() : { Expression exp; } { ( exp = statement() [ ] ) { while (exp instanceof ExpressionList && ((ExpressionList)exp).size() == 1) exp = ((ExpressionList)exp).get(0); return exp; } } ScriptExpression script() : { StatementExpression exp; List lst = new ArrayList(); } { ( nl() exp = statement() { lst.add(exp); } nl() ( nl() [ exp = statement() { lst.add(exp); } nl() ] )* ) { return new ScriptExpression(lst); } } StatementExpression statement() : { Expression exp; List lst = new ArrayList(); } { ( exp = expression() { lst.add(exp); } ( nl() exp = expression() { lst.add(exp); } )* ) { return new StatementExpression(lst); } } Expression expression() : { Expression choice; List choices = new ArrayList(); } { choice = nonChoiceExpression() { choices.add(choice); } ( choice = nonChoiceExpression() { choices.add(choice); } )* { return choices.size() == 1 ? choices.get(0) : new ChoiceExpression(choices); } } Expression nonChoiceExpression() : { Expression exp; List lst = new ArrayList(); } { ( exp = math() { lst.add(exp); } ( exp = math() { lst.add(exp); } )* ) { return lst.size() == 1 ? exp : new CatExpression(lst); } } Expression math() : { ArithmeticExpression.Operator op = ArithmeticExpression.Operator.ADD; MathResolver math = new MathResolver(); Expression exp; } { ( exp = value() { math.push(op, exp); } ( ( { op = ArithmeticExpression.Operator.ADD; } |
{ op = ArithmeticExpression.Operator.DIV; } | { op = ArithmeticExpression.Operator.MOD; } | { op = ArithmeticExpression.Operator.MUL; } | { op = ArithmeticExpression.Operator.SUB; } ) exp = value() { math.push(op, exp); } )* ) { return math.resolve(); } } Expression value() : { Expression val; } { ( val = attributeExp() | val = base64DecodeExp() | val = base64EncodeExp() | val = busy_waitExp() | val = clearStateExp() | val = echoExp() | val = embedExp() | val = exactExp() | val = flattenExp() | val = forEachExp() | val = getFieldExp() | val = getVarExp() | val = guardExp() | val = hashExp() | val = hexDecodeExp() | val = hexEncodeExp() | val = hostNameExp() | val = ifThenExp() | val = indexExp() | val = inputExp() | val = joinExp() | val = lowerCaseExp() | val = ngramExp() | val = normalizeExp() | val = nowExp() | val = optimizePredicateExp() | val = passthroughExp() | val = randomExp() | val = script() | val = selectInputExp() | val = setLanguageExp() | val = setValueExp() | val = setVarExp() | val = sleepExp() | val = splitExp() | val = substringExp() | val = summaryExp() | val = switchExp() | val = thisExp() | val = tokenizeExp() | val = toArrayExp() | val = toByteExp() | val = toDoubleExp() | val = toFloatExp() | val = toIntExp() | val = toLongExp() | val = toPosExp() | val = toEpochSecondExp() | val = toStringExp() | val = toWsetExp() | val = toBoolExp() | val = trimExp() | val = literalBoolExp() | val = zcurveExp() | val = executionValueExp() | ( val = statement() { val = new ParenthesisExpression(val); } ) ) { return val; } } Expression attributeExp() : { String val = defaultFieldName; } { ( [ val = fieldName() ] ) { return new AttributeExpression(val); } } Expression base64DecodeExp() : { } { ( ) { return new Base64DecodeExpression(); } } Expression base64EncodeExp() : { } { ( ) { return new Base64EncodeExpression(); } } Expression busy_waitExp() : { } { ( ) { return new BusyWaitExpression(); } } Expression clearStateExp() : { } { ( ) { return new ClearStateExpression(); } } Expression echoExp() : { } { ( ) { return new EchoExpression(); } } Expression embedExp() : { String embedderId = ""; String embedderArgument; List embedderArguments = new ArrayList(); } { ( [ LOOKAHEAD(2) embedderId = identifier() ] ( LOOKAHEAD(2) embedderArgument = identifier() { embedderArguments.add(embedderArgument); } )* ) { return new EmbedExpression(embedders, embedderId, embedderArguments); } } Expression exactExp() : { int maxTokenLength = annotatorCfg.getMaxTokenLength(); } { ( [ maxTokenLength = integer() ] ) { return new ExactExpression(maxTokenLength); } } Expression flattenExp() : { } { ( ) { return new FlattenExpression(); } } Expression forEachExp() : { Expression val; } { ( nl() val = statement() nl() ) { return new ForEachExpression(val); } } Expression getFieldExp() : { String val; } { ( val = identifier() ) { return new GetFieldExpression(val); } } Expression getVarExp() : { String val; } { ( val = identifier() ) { return new GetVarExpression(val); } } Expression guardExp() : { Expression val; } { ( val = script() ) { return new GuardExpression(val); } } Expression hashExp() : { } { ( ) { return new HashExpression(); } } Expression hexDecodeExp() : { } { ( ) { return new HexDecodeExpression(); } } Expression hexEncodeExp() : { } { ( ) { return new HexEncodeExpression(); } } Expression hostNameExp() : { } { ( ) { return new HostNameExpression(); } } Expression ifThenExp() : { Expression lhs, rhs, ifTrue, ifFalse = null; IfThenExpression.Comparator cmp; } { ( lhs = expression() cmp = ifThenCmp() rhs = expression() ifTrue = script() [ ifFalse = script() ] ) { return new IfThenExpression(lhs, cmp, rhs, ifTrue, ifFalse); } } IfThenExpression.Comparator ifThenCmp() : { IfThenExpression.Comparator val = null; } { ( { val = IfThenExpression.Comparator.EQ; } | { val = IfThenExpression.Comparator.NE; } | { val = IfThenExpression.Comparator.LE; } | { val = IfThenExpression.Comparator.LT; } | { val = IfThenExpression.Comparator.GE; } | { val = IfThenExpression.Comparator.GT; } ) { return val; } } Expression indexExp() : { String val = defaultFieldName; } { ( [ val = fieldName() ] ) { return new IndexExpression(val); } } Expression inputExp() : { String val = defaultFieldName; } { ( [ val = identifier() ] ) { return new InputExpression(val); } } Expression joinExp() : { String val; } { ( val = string() ) { return new JoinExpression(val); } } Expression lowerCaseExp() : { } { ( ) { return new LowerCaseExpression(); } } Expression ngramExp() : { int gramSize; } { ( gramSize = integer() ) { return new NGramExpression(linguistics, gramSize); } } Expression normalizeExp() : { } { ( ) { return new NormalizeExpression(linguistics); } } Expression nowExp() : { } { ( ) { return new NowExpression(); } } Expression optimizePredicateExp() : { } { ( ) { return new OptimizePredicateExpression(); } } Expression passthroughExp() : { String val = defaultFieldName; } { ( [ val = fieldName() ] ) { return new PassthroughExpression(val); } } Expression randomExp() : { Integer val = null; } { ( [ LOOKAHEAD(2) val = integer() ] ) { return new RandomExpression(val); } } Expression selectInputExp() : { List> cases = new ArrayList>(); Expression exp; String str; } { ( nl() ( str = identifier() exp = statement() nl() { cases.add(new Pair(str, exp)); } )+ ) { return new SelectInputExpression(cases); } } Expression setLanguageExp() : { } { ( ) { return new SetLanguageExpression(); } } Expression setValueExp() : { FieldValue val; } { ( val = fieldValue() ) { return new ConstantExpression(val); } } Expression setVarExp() : { String val; } { ( val = identifier() ) { return new SetVarExpression(val); } } Expression sleepExp() : { } { ( ) { return new SleepExpression(); } } Expression splitExp() : { String val; } { ( val = string() ) { return new SplitExpression(val); } } Expression substringExp() : { long from, to; } { ( from = integer() to = integer() ) { return new SubstringExpression((int)from, (int)to); } } Expression summaryExp() : { String val = defaultFieldName; } { ( [ val = fieldName() ] ) { return new SummaryExpression(val); } } Expression switchExp() : { Map cases = new LinkedHashMap(); Expression exp, defaultExp = null; String str; } { ( nl() ( str = string() exp = statement() { cases.put(str, exp); } nl() )+ [ defaultExp = statement() nl() ] ) { return new SwitchExpression(cases, defaultExp); } } Expression thisExp() : { } { ( ) { return new ThisExpression(); } } Expression tokenizeExp() : { AnnotatorConfig cfg = annotatorCfg; } { ( [ cfg = tokenizeCfg() ] ) { return new TokenizeExpression(linguistics, cfg); } } AnnotatorConfig tokenizeCfg() : { AnnotatorConfig val = new AnnotatorConfig(annotatorCfg); String str = "SHORTEST"; Integer maxLength; Integer maxTermOccurrences; Integer maxTokenLength; } { ( ( str = string() ) ? { val.setStemMode(str); } | maxLength = integer() { val.setMaxTokenizeLength(maxLength); } | maxTermOccurrences = integer() { val.setMaxTermOccurrences(maxTermOccurrences); } | maxTokenLength = integer() { val.setMaxTokenLength(maxTokenLength); } | { val.setRemoveAccents(true); } )+ { return val; } } Expression toArrayExp() : { } { ( ) { return new ToArrayExpression(); } } Expression toByteExp() : { } { ( ) { return new ToByteExpression(); } } Expression toDoubleExp() : { } { ( ) { return new ToDoubleExpression(); } } Expression toFloatExp() : { } { ( ) { return new ToFloatExpression(); } } Expression toIntExp() : { } { ( ) { return new ToIntegerExpression(); } } Expression toLongExp() : { } { ( ) { return new ToLongExpression(); } } Expression toPosExp() : { } { ( ) { return new ToPositionExpression(); } } Expression toEpochSecondExp() : { } { ( ) { return new ToEpochSecondExpression(); } } Expression toStringExp() : { } { ( ) { return new ToStringExpression(); } } Expression toWsetExp() : { boolean createIfNonExistent = false; boolean removeIfZero = false; } { ( ( { createIfNonExistent = true; } | { removeIfZero = true; } )* ) { return new ToWsetExpression(createIfNonExistent, removeIfZero); } } Expression toBoolExp() : { } { ( ) { return new ToBoolExpression(); } } Expression trimExp() : { } { ( ) { return new TrimExpression(); } } Expression literalBoolExp() : { } { ( | ) { return new LiteralBoolExpression(Boolean.parseBoolean(token.image)); } } Expression zcurveExp() : { } { ( ) { return new ZCurveExpression(); } } Expression executionValueExp() : { } { ( ) { return new ExecutionValueExpression(); } } String identifier() : { String val; } { ( val = string() | ( | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | ) { val = token.image; } ) { return val; } } String fieldName() : { StringBuilder builder = new StringBuilder(); String str; } { ( str = identifier() { builder.append(str); } ( LOOKAHEAD(2) { builder.append(token.image); } str = identifier() { builder.append(str); } )* ) { return builder.toString(); } } FieldValue fieldValue() : { FieldValue val; } { ( val = numericValue() | val = stringValue() ) { return val; } } FieldValue numericValue() : { FieldValue val; String pre = ""; } { ( [ | { pre = "-"; } ] ( { val = parseDouble(pre + token.image); } | { val = parseFloat(pre + token.image); } | { val = parseInteger(pre + token.image); } | { val = parseLong(pre + token.image); } ) ) { return val; } } FieldValue stringValue() : { String val; } { ( val = string() ) { return new StringFieldValue(val); } } String string() : { } { ( ) { return StringUtilities.unescape(token.image.substring(1, token.image.length() - 1)); } } int integer() : { String pre = ""; int val; } { ( [ | { pre = "-"; } ] { val = Integer.parseInt(pre + token.image); } ) { return val; } } void nl() : { } { ( )* }