// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. /** * When this file is changed, do "mvn generate-sources" to rebuild the parser. * * @author bratseth */ options { CACHE_TOKENS = true; STATIC = false; DEBUG_PARSER = false; USER_TOKEN_MANAGER = false; ERROR_REPORTING = true; USER_CHAR_STREAM = false; } PARSER_BEGIN(RankingExpressionParser) package com.yahoo.searchlib.rankingexpression.parser; import com.yahoo.searchlib.rankingexpression.rule.*; import com.yahoo.searchlib.rankingexpression.evaluation.Value; import com.yahoo.searchlib.rankingexpression.evaluation.StringValue; import com.yahoo.searchlib.rankingexpression.evaluation.TensorValue; import com.yahoo.tensor.*; import com.yahoo.tensor.functions.*; import java.util.Collections; import java.util.LinkedHashMap; import java.util.Arrays; import java.util.ArrayList; import java.util.List; import java.util.Optional; public class RankingExpressionParser { } PARSER_END(RankingExpressionParser) SKIP : { <[" ","\n","\r","\t"]> } TOKEN : { (["l","L"])? | (["l","L"])? | (["l","L"])?> | <#DECIMAL: ["1"-"9"] (["0"-"9"])*> | <#HEX: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+> | <#OCTAL: "0" (["0"-"7"])*> | )? (["f","F","d","D"])?> | <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+> } TOKEN : { | | | | | | | | | | | | | | | | | | ="> | "> | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | // MAX // MIN | | | | | | | | | | | | | | | | | | | | | | | } // Declare a special skip token for comments. SPECIAL_TOKEN : { } List featureList() : { List ret = new ArrayList(); ReferenceNode exp; } { ( ( exp = feature() { ret.add(exp); } )+ ) { return ret; } } ExpressionNode rankingExpression() : { ExpressionNode ret; } { ( ret = expression() ) { return ret; } } ExpressionNode expression() : { ExpressionNode left, right; List rightList; TruthOperator comparatorOp; } { ( left = arithmeticExpression() ( ( comparatorOp = comparator() right = arithmeticExpression() { left = new ComparisonNode(left, comparatorOp, right); } ) | ( rightList = expressionList() { left = new SetMembershipNode(left, rightList); } ) ) * ) { return left; } } ExpressionNode arithmeticExpression() : { ExpressionNode left, right = null; ArithmeticOperator arithmeticOp; } { ( left = value() ( arithmeticOp = arithmetic() right = value() { left = ArithmeticNode.resolve(left, arithmeticOp, right); } ) * ) { return left; } } ArithmeticOperator arithmetic() : { } { ( { return ArithmeticOperator.PLUS; } | { return ArithmeticOperator.MINUS; } |
{ return ArithmeticOperator.DIVIDE; } | { return ArithmeticOperator.MULTIPLY; } | { return ArithmeticOperator.MODULO; } | { return ArithmeticOperator.AND; } | { return ArithmeticOperator.OR; } | { return ArithmeticOperator.POWER; } ) { return null; } } TruthOperator comparator() : { } { ( { return TruthOperator.SMALLEREQUAL; } | { return TruthOperator.SMALLER; } | { return TruthOperator.EQUAL; } | { return TruthOperator.NOTEQUAL; } | { return TruthOperator.APPROX_EQUAL; } | { return TruthOperator.LARGEREQUAL; } | { return TruthOperator.LARGER; } ) { return null; } } ExpressionNode value() : { ExpressionNode ret; boolean neg = false; boolean not = false; } { ( [ { not = true; } ] [ LOOKAHEAD(2) { neg = true; } ] ( ret = constantPrimitive() | LOOKAHEAD(2) ret = ifExpression() | LOOKAHEAD(4) ret = function() | ret = feature() | ret = legacyQueryFeature() | ( ret = expression() { ret = new EmbracedNode(ret); } ) ) ) { ret = not ? new NotNode(ret) : ret; ret = neg ? new NegativeNode(ret) : ret; return ret; } } IfNode ifExpression() : { ExpressionNode condition, ifTrue, ifFalse; Double trueProbability = null; } { ( ( condition = expression() ) ifTrue = expression() ifFalse = expression() ( trueProbability = doubleNumber() )? ) { return new IfNode(condition, ifTrue, ifFalse, trueProbability); } } ReferenceNode feature() : { List args = null; String name, out = null; } { ( name = identifier() [ args = args() ] [ out = outs() ] ) { return new ReferenceNode(name, args, out); } } // Query features can be referenced as "$name" instead of "query(name)". TODO: Warn this is deprecated ReferenceNode legacyQueryFeature() : { String name; } { ( name = identifier() ) { return new ReferenceNode("query", Arrays.asList((ExpressionNode)new NameNode(name)), null); } } String outs() : { StringBuilder ret = new StringBuilder(); String str; } { ( str = out() { ret.append(str); } ( { ret.append(token.image); } str = out() { ret.append(str); } )* ) { return ret.toString(); } } String out() : { Function fnc; String name; } { ( { return token.image; } | { return token.image; } | name = identifier() { return name; } ) { return null; } } List args() : { List arguments = new ArrayList(); ExpressionNode argument; } { ( argument = arg() { arguments.add(argument); } ( argument = arg() { arguments.add(argument); } )* ) { return arguments; } } // TODO: Replace use of this for macro arguments with value() // For that to work with the current search execution framework // we need to generate another macro for the argument such that we can replace // instances of the argument with the reference to that macro in the same way // as we replace by constants/names today (this can make for some fun combinatorial explosion). // Simon also points out that we should stop doing macro expansion in the toString of a macro. // - Jon 2014-05-02 ExpressionNode arg() : { ExpressionNode ret; String name; Function fnc; } { ( ret = constantPrimitive() | LOOKAHEAD(2) ret = feature() | name = identifier() { ret = new NameNode(name); } ) { return ret; } } ExpressionNode function() : { ExpressionNode function; } { ( LOOKAHEAD(2) function = scalarOrTensorFunction() | function = tensorFunction() ) { return function; } } FunctionNode scalarOrTensorFunction() : { Function function; ExpressionNode arg1, arg2; } { ( ( function = unaryFunctionName() arg1 = expression() ) { return new FunctionNode(function, arg1); } ) | ( ( function = binaryFunctionName() arg1 = expression() arg2 = expression() ) { return new FunctionNode(function, arg1, arg2); } ) } ExpressionNode tensorFunction() : { ExpressionNode tensorExpression; } { ( tensorExpression = tensorMap() | tensorExpression = tensorReduce() | tensorExpression = tensorReduceComposites() | tensorExpression = tensorJoin() | tensorExpression = tensorRename() | tensorExpression = tensorConcat() | tensorExpression = tensorGenerate() | tensorExpression = tensorRange() | tensorExpression = tensorDiag() | tensorExpression = tensorRandom() | tensorExpression = tensorL1Normalize() | tensorExpression = tensorL2Normalize() | tensorExpression = tensorMatmul() | tensorExpression = tensorSoftmax() | tensorExpression = tensorXwPlusB() | tensorExpression = tensorArgmax() | tensorExpression = tensorArgmin() ) { return tensorExpression; } } ExpressionNode tensorMap() : { ExpressionNode tensor; LambdaFunctionNode doubleMapper; } { tensor = expression() doubleMapper = lambdaFunction() { return new TensorFunctionNode(new Map(TensorFunctionNode.wrapArgument(tensor), doubleMapper.asDoubleUnaryOperator())); } } ExpressionNode tensorReduce() : { ExpressionNode tensor; Reduce.Aggregator aggregator; List dimensions = null; } { tensor = expression() aggregator = tensorReduceAggregator() dimensions = tagCommaLeadingList() { return new TensorFunctionNode(new Reduce(TensorFunctionNode.wrapArgument(tensor), aggregator, dimensions)); } } ExpressionNode tensorReduceComposites() : { ExpressionNode tensor; Reduce.Aggregator aggregator; List dimensions = null; } { aggregator = tensorReduceAggregator() tensor = expression() dimensions = tagCommaLeadingList() { return new TensorFunctionNode(new Reduce(TensorFunctionNode.wrapArgument(tensor), aggregator, dimensions)); } } ExpressionNode tensorJoin() : { ExpressionNode tensor1, tensor2; LambdaFunctionNode doubleJoiner; } { tensor1 = expression() tensor2 = expression() doubleJoiner = lambdaFunction() { return new TensorFunctionNode(new Join(TensorFunctionNode.wrapArgument(tensor1), TensorFunctionNode.wrapArgument(tensor2), doubleJoiner.asDoubleBinaryOperator())); } } ExpressionNode tensorRename() : { ExpressionNode tensor; List fromDimensions, toDimensions; } { tensor = expression() fromDimensions = bracedIdentifierList() toDimensions = bracedIdentifierList() { return new TensorFunctionNode(new Rename(TensorFunctionNode.wrapArgument(tensor), fromDimensions, toDimensions)); } } ExpressionNode tensorConcat() : { ExpressionNode tensor1, tensor2; String dimension; } { tensor1 = expression() tensor2 = expression() dimension = tag() { return new TensorFunctionNode(new Concat(TensorFunctionNode.wrapArgument(tensor1), TensorFunctionNode.wrapArgument(tensor2), dimension)); } } ExpressionNode tensorGenerate() : { TensorType type; ExpressionNode generator; } { type = tensorTypeArgument() generator = expression() { return new TensorFunctionNode(new Generate(type, new GeneratorLambdaFunctionNode(type, generator).asLongListToDoubleOperator())); } } ExpressionNode tensorRange() : { TensorType type; } { type = tensorTypeArgument() { return new TensorFunctionNode(new Range(type)); } } ExpressionNode tensorDiag() : { TensorType type; } { type = tensorTypeArgument() { return new TensorFunctionNode(new Diag(type)); } } ExpressionNode tensorRandom() : { TensorType type; } { type = tensorTypeArgument() { return new TensorFunctionNode(new Random(type)); } } ExpressionNode tensorL1Normalize() : { ExpressionNode tensor; String dimension; } { tensor = expression() dimension = identifier() { return new TensorFunctionNode(new L1Normalize(TensorFunctionNode.wrapArgument(tensor), dimension)); } } ExpressionNode tensorL2Normalize() : { ExpressionNode tensor; String dimension; } { tensor = expression() dimension = identifier() { return new TensorFunctionNode(new L2Normalize(TensorFunctionNode.wrapArgument(tensor), dimension)); } } ExpressionNode tensorMatmul() : { ExpressionNode tensor1, tensor2; String dimension; } { tensor1 = expression() tensor2 = expression() dimension = identifier() { return new TensorFunctionNode(new Matmul(TensorFunctionNode.wrapArgument(tensor1), TensorFunctionNode.wrapArgument(tensor2), dimension)); } } ExpressionNode tensorSoftmax() : { ExpressionNode tensor; String dimension; } { tensor = expression() dimension = identifier() { return new TensorFunctionNode(new Softmax(TensorFunctionNode.wrapArgument(tensor), dimension)); } } ExpressionNode tensorXwPlusB() : { ExpressionNode tensor1, tensor2, tensor3; String dimension; } { tensor1 = expression() tensor2 = expression() tensor3 = expression() dimension = identifier() { return new TensorFunctionNode(new XwPlusB(TensorFunctionNode.wrapArgument(tensor1), TensorFunctionNode.wrapArgument(tensor2), TensorFunctionNode.wrapArgument(tensor3), dimension)); } } ExpressionNode tensorArgmax() : { ExpressionNode tensor; String dimension; } { tensor = expression() dimension = identifier() { return new TensorFunctionNode(new Argmax(TensorFunctionNode.wrapArgument(tensor), dimension)); } } ExpressionNode tensorArgmin() : { ExpressionNode tensor; String dimension; } { tensor = expression() dimension = identifier() { return new TensorFunctionNode(new Argmin(TensorFunctionNode.wrapArgument(tensor), dimension)); } } LambdaFunctionNode lambdaFunction() : { List variables; ExpressionNode functionExpression; } { ( variables = identifierList() functionExpression = expression() ) { return new LambdaFunctionNode(variables, functionExpression); } } Reduce.Aggregator tensorReduceAggregator() : { } { ( | | | | | ) { return Reduce.Aggregator.valueOf(token.image); } } TensorType tensorTypeArgument() : { TensorType.Builder builder = new TensorType.Builder(); } { ( tensorTypeDimension(builder) ) ? ( tensorTypeDimension(builder) ) * { return builder.build(); } } // NOTE: Only indexed bound dimensions are parsed currently, as that is what we need void tensorTypeDimension(TensorType.Builder builder) : { String name; int size; } { name = identifier() size = integerNumber() { builder.indexed(name, size); } } // This is needed not to parse tensor functions but for the "reserved names as literals" workaround cludge String tensorFunctionName() : { Reduce.Aggregator aggregator; } { ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( { return token.image; } ) | ( aggregator = tensorReduceAggregator() { return aggregator.toString(); } ) } Function unaryFunctionName() : { } { { return Function.abs; } | { return Function.acos; } | { return Function.asin; } | { return Function.atan; } | { return Function.ceil; } | { return Function.cos; } | { return Function.cosh; } | { return Function.elu; } | { return Function.exp; } | { return Function.fabs; } | { return Function.floor; } | { return Function.isNan; } | { return Function.log; } | { return Function.log10; } | { return Function.relu; } | { return Function.round; } | { return Function.sigmoid; } | { return Function.sign; } | { return Function.sin; } | { return Function.sinh; } | { return Function.square; } | { return Function.sqrt; } | { return Function.tan; } | { return Function.tanh; } } Function binaryFunctionName() : { } { { return Function.atan2; } | { return Function.fmod; } | { return Function.ldexp; } | { return Function.max; } | { return Function.min; } | { return Function.pow; } } List expressionList() : { List list = new ArrayList(); ExpressionNode expression; } { expression=expression() { list.add(expression); } ( LOOKAHEAD(2) expression=expression() { list.add(expression); } ) * { return list; } } double doubleNumber() : { String sign = ""; } { ( { sign = "-";} )? ( | ) { return Double.parseDouble(sign + token.image); } } int integerNumber() : { String sign = ""; } { ( { sign = "-";} )? ( ) { return Integer.parseInt(sign + token.image); } } String identifier() : { String name; Function func; } { LOOKAHEAD(2) name = tensorFunctionName() { return name; } | func = unaryFunctionName() { return func.toString(); } | func = binaryFunctionName() { return func.toString(); } | { return token.image; } | { return token.image; } | { return token.image; } } List identifierList() : { List list = new ArrayList(); String element; } { ( element = identifier() { list.add(element); } )? ( element = identifier() { list.add(element); } ) * { return list; } } List bracedIdentifierList() : { List list = new ArrayList(); String element; } { ( element = identifier() { return Collections.singletonList(element); } ) | ( list = identifierList() { return list; } ) } // An identifier or integer String tag() : { String name; } { name = identifier() { return name; } | { return token.image; } } List tagCommaLeadingList() : { List list = new ArrayList(); String element; } { ( element = tag() { list.add(element); } ) * { return list; } } ConstantNode constantPrimitive() : { String sign = ""; String value; } { ( { sign = "-";} ) ? ( { value = token.image; } | { value = token.image; } | value = stringPath() ) { return new ConstantNode(Value.parse(sign + value),sign + value); } } // Strings separated by "/" String stringPath() : { StringBuilder b = new StringBuilder(); } { { b.append(token.image); } ( LOOKAHEAD(2)
{ b.append("/").append(token.image); } ) * { return b.toString(); } } Value primitiveValue() : { String sign = ""; } { ( { sign = "-";} ) ? ( | | ) { return Value.parse(sign + token.image); } }