// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. /** * A best-effort treenet parser. * * @author Simon Thoresen Hult * @version $Id: TreeNetParser.jj,v 1.1 2009-02-24 10:06:32 arnej Exp $ */ options { CACHE_TOKENS = true; STATIC = false; DEBUG_PARSER = false; IGNORE_CASE = true; // Flip for higher performance ERROR_REPORTING = true; } PARSER_BEGIN(TreeNetParser) package com.yahoo.searchlib.treenet.parser; import com.yahoo.searchlib.rankingexpression.parser.RankingExpressionParser; import com.yahoo.searchlib.treenet.rule.*; import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class TreeNetParser { void verifyCategoricalVar(String expected, String actual) throws ParseException { if (!expected.equals(actual)) { throw new ParseException("Expected variable '" + expected + "', got '" + actual + "'."); } } ComparisonCondition resolveCategoricalCondition(String var, Integer valA, Integer valB, String lblA, String lblB) { if (valA < valB) return new ComparisonCondition(var, valA + (valB - valA) / 2.0, lblA, lblB); else return new ComparisonCondition(var, valB + (valA - valB) / 2.0, lblB, lblA); } } PARSER_END(TreeNetParser) SKIP : { <[" ","\r","\t"]> | <"//" (~["\n","\r"])* ("\n" | "\r" | "\r\n")> | <"#" (~["\n","\r"])* ("\n" | "\r" | "\r\n")> | <"/*" (~["*"])* "*" ("*" | ~["*","/"] (~["*"])* "*")* "/"> } TOKEN : { (["l","L"])? | (["l","L"])? | (["l","L"])?> | <#DECIMAL: ["1"-"9"] (["0"-"9"])*> | <#HEX: "0" ["x","X"] (["0"-"9","a"-"f","A"-"F"])+> | <#OCTAL: "0" (["0"-"7"])*> | )? (["f","F","d","D"])? | "." (["0"-"9"])+ ()? (["f","F","d","D"])? | (["0"-"9"])+ (["f","F","d","D"])? | (["0"-"9"])+ ()? ["f","F","d","D"]> | <#EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+> | } TOKEN : { | | | | | | | | | | | | | | | | | | | | | | } TreeNet treeNet() : { String begin, label; Tree tree; Map trees = new HashMap(); } { ( ( ignoredCpp() )* nl() { begin = token.image; } eol() eol() eol() ( tree = tree() { trees.put(tree.getName(), tree); } )* ( ignoredCpp() )* ) { return new TreeNet(begin, trees); } } /** C++ code outside the model which we can ignore */ void ignoredCpp() : { } { | | | | | | | | | | | | } Tree tree() : { String name; String begin = null, label; Double value = null; TreeNode node; Map nodes = new HashMap(); } { ( ( ( value = tnScore() name = label() ) | ( name = label() value = tnScore() ) ) ( LOOKAHEAD(label() (condition() | response())) label = label() { if (begin == null) { begin = label; } } ( node = condition() { nodes.put(label, node); } | node = response() { nodes.put(label, node); } ) )* ) { return new Tree(name, value, begin, nodes); } } Double tnScore() : { Double value = null; } { ( value = floatVal() | ( ) ) eol() { return value; } } Condition condition() : { String var; Condition ret; } { ( var = feature() ( ret = continuousCondition(var) | LOOKAHEAD(8) ret = singleValueCategoricalCondition(var) | ret = setMembershipCondition(var) ) ) { return ret; } } ComparisonCondition continuousCondition(String left) : { Double right; String ift, iff; } { ( right = floatVal() ift = jump() eol() iff = jump() eol()) { return new ComparisonCondition(left, right, ift, iff); } } // Handle single-value IN expression as a regular comparison. // This special case may be removed when IN support is implemented in ranking expressions in both C++ and Java ComparisonCondition singleValueCategoricalCondition(String varA) : { Integer valA, valB; String lblA, lblB, varB; } { ( valA = intVal() nl() lblA = jump() eol() varB = feature() { verifyCategoricalVar(varA, varB); } valB = intVal() nl() lblB = jump() eol() ) { return resolveCategoricalCondition(varA, valA, valB, lblA, lblB); } } SetMembershipCondition setMembershipCondition(String testValue) : { List setValues; String trueLabel, falseLabel; } { ( setValues = valueList() trueLabel = jump() eol() falseLabel = jump() eol() ) { return new SetMembershipCondition(testValue, setValues, trueLabel, falseLabel); } } Response response() : { Double val; String lbl; } { ( val = floatVal() eol() lbl = jump() eol() ) { return new Response(val, lbl); } } String feature() : { String name; String arguments = null; String output = null; } { ( name = identifier() [ arguments = featureArguments() ] [ output = featureOutputs() ] ) { return name + (arguments != null ? "(" + arguments + ")" : "") + (output !=null ? "." + output : ""); } } String featureArguments() : { String argument; StringBuilder arguments = new StringBuilder(); } { ( argument = featureArgument() { arguments.append(argument); } ( argument = featureArgument() { arguments.append(",").append(argument); } )* ) { return arguments.toString(); } } String featureArgument() : { String argument; } { ( argument = string() | argument = floatImage() | argument = feature() ) { return argument; } } String featureOutputs() : { StringBuilder outputs = new StringBuilder(); String output; } { output = featureOutput() { outputs.append(output); } ( output = featureOutput() { outputs.append(output); } ) * { return outputs.toString(); } } String featureOutput() : { String name; } { { return token.image; } | { return token.image; } | name = identifier() { return name; } } String label() : { String ret; } { ( ret = identifier() nl() ) { return ret; } } void eol() : { } { nl() } void nl() : { } { ( )+ } String jump() : { } { { return token.image; } } String identifier() : { } { ( /* | | | | | | | | | | |*/ ) { return token.image; } } String spaceSeparatedIdentifiers() : { StringBuilder identifiers = new StringBuilder(); String identifier; } { identifier = identifier() { identifiers.append(identifier); } ( identifier = identifier() { identifiers.append(identifier); } ) * { return identifiers.toString(); } } List valueList() : { List values = new ArrayList(); Object value; } { value = value() { values.add(value); } ( value = value() { values.add(value); } ) * { return values; } } Object value() : { Object value; } { ( value = spaceSeparatedIdentifiers() | value = intVal() | value = string() ) { return value; } } String string() : { } { { return token.image; } } Integer intVal() : { } { { return Integer.valueOf(token.image); } } Double floatVal() : { } { ( | ) { return Double.valueOf(token.image); } } String floatImage() : { } { ( | ) { return token.image; } }