// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
// --------------------------------------------------------------------------------
//
// JavaCC options. When this file is changed, run "mvn generate-sources" to rebuild
// the parser classes.
//
// --------------------------------------------------------------------------------
options {
    UNICODE_INPUT = true;
    CACHE_TOKENS  = false;
    DEBUG_PARSER = false;
    ERROR_REPORTING = true;
    FORCE_LA_CHECK = true;
    USER_CHAR_STREAM = true;
}

// --------------------------------------------------------------------------------
//
// Parser body.
//
// --------------------------------------------------------------------------------
PARSER_BEGIN(ModelParser)

package ai.vespa.rankingexpression.importer.vespa.parser;

import java.io.File;
import java.io.Reader;
import java.io.StringReader;
import java.util.List;
import java.util.ArrayList;
import ai.vespa.rankingexpression.importer.ImportedModel;
import com.yahoo.io.IOUtils;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.IndexedTensor;
import com.yahoo.tensor.MixedTensor;
import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
import com.yahoo.tensor.serialization.JsonFormat;
import com.yahoo.searchlib.rankingexpression.RankingExpression;

/**
 * Parser of Vespa ML model files: Ranking expression functions enclosed in brackets.
 *
 * @author bratseth
 */
public class ModelParser {

    /** The model we are importing into */
    private ImportedModel model;

    /** Creates a parser of a string */
    public ModelParser(String input, ImportedModel model) {
        this(new SimpleCharStream(input), model);
    }

    /** Creates a parser */
    public ModelParser(SimpleCharStream input, ImportedModel model) {
        this(input);
        this.model = model;
    }

}

PARSER_END(ModelParser)


// --------------------------------------------------------------------------------
//
// Token declarations.
//
// --------------------------------------------------------------------------------

// Declare white space characters. These do not include newline because it has
// special meaning in several of the production rules.
SKIP :
{
  " " | "\t" | "\r" | "\f"
}

// Declare all tokens to be recognized. When a word token is added it MUST be
// added to the identifier() production rule.
TOKEN :
{
  < NL: "\n" >
| < FUNCTION: "function" >
| < TENSOR_TYPE: "tensor" ("<" (~["<",">"])+ ">")? "(" (~["(",")"])* ")" >
| < TENSOR_VALUE_SL: "value" (" ")* ":" (" ")* ("{"<BRACE_SL_LEVEL_1>) ("\n")? >
| < TENSOR_VALUE_ML: "value" (<SEARCHLIB_SKIP>)? "{" (["\n"," "])* ("{"<BRACE_ML_LEVEL_1>) (["\n"," "])* "}" ("\n")? >
| < LBRACE: "{" >
| < RBRACE: "}" >
| < COLON: ":" >
| < DOT: "." >
| < COMMA: "," >
| < DOUBLE_KEYWORD: "double" >
| < INPUTS: "inputs" >
| < MODEL: "model" >
| < TYPE: "type" >
| < EXPRESSION_SL: "expression" (" ")* ":" (("{"<BRACE_SL_LEVEL_1>)|<BRACE_SL_CONTENT>)* ("\n")? >
| < EXPRESSION_ML: "expression" (<SEARCHLIB_SKIP>)? "{" (("{"<BRACE_ML_LEVEL_1>)|<BRACE_ML_CONTENT>)* "}" >
| < #BRACE_SL_LEVEL_1: (("{"<BRACE_SL_LEVEL_2>)|<BRACE_SL_CONTENT>)* "}" >
| < #BRACE_SL_LEVEL_2: (("{"<BRACE_SL_LEVEL_3>)|<BRACE_SL_CONTENT>)* "}" >
| < #BRACE_SL_LEVEL_3: <BRACE_SL_CONTENT> "}" >
| < #BRACE_SL_CONTENT: (~["{","}","\n"])* >
| < #BRACE_ML_LEVEL_1: (("{"<BRACE_ML_LEVEL_2>)|<BRACE_ML_CONTENT>)* "}" >
| < #BRACE_ML_LEVEL_2: (("{"<BRACE_ML_LEVEL_3>)|<BRACE_ML_CONTENT>)* "}" >
| < #BRACE_ML_LEVEL_3: <BRACE_ML_CONTENT> "}" >
| < #BRACE_ML_CONTENT: (~["{","}"])* >
| < #SEARCHLIB_SKIP: ([" ","\f","\n","\r","\t"])+ >
| < CONSTANT: "constant" >
| < CONSTANTS: "constants" >
| < FILE: "file" >
| < URI: "uri" >
| < IDENTIFIER:           ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_"])* >
| < DOUBLEQUOTEDSTRING: "\"" ( ~["\""] )* "\"" >
| < SINGLEQUOTEDSTRING: "'" ( ~["'"] )* "'" >
| < CONTEXT: ["a"-"z","A"-"Z"] (["a"-"z", "A"-"Z", "0"-"9"])* >
| < DOUBLE: ("-")? (["0"-"9"])+ "." (["0"-"9"])+ >
| < INTEGER: ("-")? (["0"-"9"])+ >
| < LONG: ("-")? (["0"-"9"])+"L" >
| < STRING: (["a"-"z","A"-"Z","_","0"-"9","."])+ >
| < FILE_PATH: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_","-", "/", "."])+ >
| < HTTP: ["h","H"] ["t","T"] ["t","T"] ["p","P"] (["s","S"])? >
| < URI_PATH: <HTTP> <COLON> ("//")? (["a"-"z","A"-"Z","0"-"9","_","-", "/", ".",":"])+ >
}

// Declare a special skip token for comments.
SPECIAL_TOKEN :
{
  <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* >
}


// --------------------------------------------------------------------------------
//
// Production rules.
//
// --------------------------------------------------------------------------------

void model() :
{
    String name;
}
{
    (<NL>)*
    <MODEL>
    (<NL>)*
    name = identifier()
    (<NL>)*
    <LBRACE> modelContent() <RBRACE>
    (<NL>)*
    <EOF>
    {
        if ( ! model.name().endsWith(name))
            throw new IllegalArgumentException("Unexpected model name '" + model.name() +
                                               "': Model '" + name + "' must be saved in a file named '" + name + ".model'");
    }
}

void modelContent() :
{}
{
    ( <NL> |
      constants() |
      largeConstant() |
      function() |
      inputs() |
      input()
    )*
}

void inputs() :
{}
{
    <INPUTS> (<NL>)* <LBRACE> (<NL>)*
    ( input() (<NL>)* )*
    <RBRACE>
}

/** Declared input variables (aka features). All non-scalar inputs must be declared. */
void input() :
{
    String name;
    TensorType type;
}
{
    name = identifier() <COLON> type = tensorType("Input parameter '" + name + "'")
    { model.input(name, type); }
}

/** A function */
void function() :
{
    String name, expression, parameter;
    List< String > parameters = new ArrayList< String >();
}
{
    (  <FUNCTION> name = identifier()
      "("
          [ parameter = identifier()         { parameters.add(parameter); }
          ( <COMMA> parameter = identifier() { parameters.add(parameter); } )* ]
      ")"
      lbrace() expression = expression() (<NL>)* <RBRACE> )
    {
        model.expression(name, expression);
    }
}

/** Consumes the constants of this model. */
void constants() :
{
    String name;
}
{
    <CONSTANTS> <LBRACE> (<NL>)*
    ( constant() (<NL>)* )*
    <RBRACE>
}

String constantTensorErrorMessage(String constantTensorName) : {}
{
    { return "For constant tensor '" + constantTensorName + "' in '" + model + "'"; }
}

void constant() :
{
    String name = null;
    TensorType type = TensorType.empty;
    Tensor value = null;
    String valuePath = null;
}
{
    (
      name = identifier() (<COLON>)?
      (
        LOOKAHEAD(4) ( ( type = valueType(name) )? (<COLON>)? (<NL>)* ( value = tensorValue(type) | valuePath = fileItem())
        {
            if (value != null) {
                model.smallConstant(name, value);
            }
            else {
                try {
                    value = JsonFormat.decode(type, IOUtils.readFileBytes(model.relativeFile(valuePath, "constant '" + name + "'")));
                    model.largeConstant(name, value);
                }
                catch (Exception e) {
                    throw new IllegalArgumentException("Could not read constant '" + name + "'", e);
                }
            }
        }
        )
        | // Deprecated forms (TODO: Vespa > 8: Add warning ):
        ( constantValue(name) | constantTensor(name) )
      )
    )
}

// Deprecated form
void constantValue(String name) :
{
    Token value;
}
{
    <COLON> ( value = <DOUBLE> | value = <INTEGER> | value = <IDENTIFIER> )
    { model.smallConstant(name, Tensor.from(value.image)); }
}

// Deprecated form
void constantTensor(String name) :
{
    String tensorString = "";
    TensorType type = null;
}
{
    <LBRACE> (<NL>)*
      (( tensorString = tensorValuePrefixedByValue() |
         type = tensorTypeWithPrefix(constantTensorErrorMessage(name)) ) (<NL>)* )* <RBRACE>
    { model.smallConstant(name, type != null ? Tensor.from(type, tensorString) : Tensor.from(tensorString)); }
}

TensorType valueType(String name) :
{
    TensorType type;

}
{
    (
      ( type = tensorType("Type of " + name) )
      |
      ( <DOUBLE_KEYWORD> { type = TensorType.empty; } )
    )
    { return type; }
}

TensorType tensorType(String errorMessage) :
{
    String tensorTypeString;
}
{
    <TENSOR_TYPE> { tensorTypeString = token.image; }
    {
        TensorType tensorType;
        try {
            tensorType = TensorType.fromSpec(tensorTypeString);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(errorMessage + ": Illegal tensor type spec: " + e.getMessage());
        }
        return tensorType;
    }
}

/**
 * Parses a tensor written in a tensor literal form,
 * https://docs.vespa.ai/en/reference/tensor.html#tensor-literal-form
 */
Tensor tensorValue(TensorType type) :
{
    Tensor.Builder builder = Tensor.Builder.of(type);
    Number doubleValue = null;
}
{
    ( mappedTensorValue(builder) | indexedTensorValues(builder) |  doubleValue = number() )
    {
        if (doubleValue != null) {
            if (type.rank() > 0)
                throw new IllegalArgumentException("A tensor of type " + type + " cannot be a number");
            builder.cell(doubleValue.doubleValue());
        }
        return builder.build();
    }
}

/** A mapped or mixed tensor value. */
void mappedTensorValue(Tensor.Builder builder) : {}
{
    "{"
    ( mappedTensorBlock(builder) )*
    ( <COMMA> (<NL>)* mappedTensorBlock(builder) )*
    "}"
}


void mappedTensorBlock(Tensor.Builder builder) :
{
    TensorAddress mappedAddress;
}
{
    mappedAddress = tensorAddress(builder.type()) <COLON> (<NL>)*
    ( mappedTensorCellValue(mappedAddress, builder) | indexedTensorBlockValues(mappedAddress, builder) )
}

void indexedTensorBlockValues(TensorAddress mappedAddress, Tensor.Builder builder) :
{
    List<Double> values = new ArrayList<Double>();
}
{
    arrayTensorValues(values)
    {
        MixedTensor.BoundBuilder boundBuilder = (MixedTensor.BoundBuilder)builder;
        double[] arrayValues = new double[values.size()];
        for (int i = 0; i < values.size(); i++ ) {
            arrayValues[i] = values.get(i);
        }
        boundBuilder.block(mappedAddress, arrayValues);
    }
}

void indexedTensorValues(Tensor.Builder builder) :
{
    List<Double> values = new ArrayList<Double>();
}
{
    arrayTensorValues(values)
    {
        IndexedTensor.BoundBuilder boundBuilder = (IndexedTensor.BoundBuilder)builder;
        double[] arrayValues = new double[values.size()];
        for (int i = 0; i < values.size(); i++ ) {
            arrayValues[i] = values.get(i);
        }
        boundBuilder.fill(arrayValues);
    }
}

/** Tensor array values. Using sub-bracketing for multiple dimensions is optional and therefore ignored here. */
void arrayTensorValues(List<Double> values) : {}
{
    "[" (                 ( indexedTensorValue(values) | arrayTensorValues(values)) )*
        ( <COMMA> (<NL>)* ( indexedTensorValue(values) | arrayTensorValues(values)) )*
    "]"
}

void indexedTensorValue(List<Double> values) :
{
    Number value;
}
{
    value = number()
    { values.add(value.doubleValue()); }
}

void mappedTensorCellValue(TensorAddress address, Tensor.Builder builder) :
{
    double value;
}
{
    value = tensorCellValue()
    { builder.cell(address, value); }
}

TensorAddress tensorAddress(TensorType type) :
{
    TensorAddress.Builder builder = new TensorAddress.PartialBuilder(type);
    String label;
}
{
    (
        label = tensorAddressLabel() { builder.add(label); }
        |
        ( "{" ( tensorAddressElement(builder) )* ( <COMMA> tensorAddressElement(builder) )* "}" )
    )
    { return builder.build(); }
}

void tensorAddressElement(TensorAddress.Builder builder) :
{
    String dimension;
    String label;
}
{
    dimension = identifier() <COLON> (<NL>)* label = tensorAddressLabel()
    { builder.add(dimension, label); }
}

String tensorAddressLabel() :
{
    String label;
}
{
    ( label = identifier() | label = quotedString() )
    { return label; }
}

double tensorCellValue() :
{
    Number value;
}
{
    value = number()
    { return value.doubleValue(); }
}

/** Undocumented syntax for supplying a tensor constant value by a string prefixed by "value" */
String tensorValuePrefixedByValue() :
{
    String tensor;
}
{
    ( <TENSOR_VALUE_SL> { tensor = token.image.substring(token.image.indexOf(":") + 1); } |
      <TENSOR_VALUE_ML> { tensor = token.image.substring(token.image.indexOf("{") + 1,
                                                         token.image.lastIndexOf("}")); } )
    {
        return tensor;
    }
}

TensorType tensorTypeWithPrefix(String errorMessage) :
{
    String tensorTypeString;
}
{
    <TENSOR_TYPE> { tensorTypeString = token.image; }
    {
        TensorType tensorType;
        try {
            tensorType = TensorType.fromSpec(tensorTypeString);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException(errorMessage + ": Illegal tensor type spec: " + e.getMessage());
        }
        return tensorType;
    }
}

/** Consumes a large constant. */ // TODO: Remove on Vespa 9
void largeConstant() :
{
    String name;
    Tensor value;
}
{
    ( <CONSTANT> name = identifier() lbrace() value = largeConstantBody(name) <RBRACE> )
    { model.largeConstant(name, value); }
}

// TODO: Add support in ImportedModel for passing a large tensor through as a file/Uri pointer instead of reading it here
Tensor largeConstantBody(String name) :
{
    String path = null;
    TensorType type = null;
}
{
    (   <FILE> <COLON> path = filePath()
//      | (<URI> <COLON> path = uriPath() TODO
      | <TYPE> <COLON> type = tensorType("Constant '" + name + "'")
      | <NL>
    )+
    {
        try {
            return JsonFormat.decode(type, IOUtils.readFileBytes(model.relativeFile(path, "constant '" + name + "'")));
        }
        catch (Exception e) {
            throw new IllegalArgumentException("Could not read constant '" + name + "'", e);
        }
    }
}

String filePath() : { }
{
    ( <FILE_PATH> | <STRING> | <IDENTIFIER>)
    { return token.image; }
}

String uriPath() : { }
{
    ( <URI_PATH> )
    { return token.image; }
}

/** Consumes an expression token and returns its image. */
String expression() :
{
    String exp;
}
{
    ( <EXPRESSION_SL> { exp = token.image.substring(token.image.indexOf(":") + 1); } |
      <EXPRESSION_ML> { exp = token.image.substring(token.image.indexOf("{") + 1,
                                                    token.image.lastIndexOf("}")); } )
    { return exp; }
}

/** Consumes an identifier. This must be kept in sync with all word tokens that should be parseable as identifiers. */
String identifier() : { }
{
    (
      <CONSTANT> |
      <CONSTANTS> |
      <DOUBLE_KEYWORD> |
      <FILE> |
      <IDENTIFIER> |
      <INPUTS> |
      <INTEGER> |
      <MODEL> |
      <TYPE> |
      <URI>
    )
    { return token.image; }
}

Number number() :
{
    Number num;
}
{
    (num = floatValue() | num = longValue() ) { return num; }
}

/** Consumes a long or integer token and returns its numeric value. */
long longValue() : { }
{
    ( <INTEGER> { return Long.parseLong(token.image); } |
      <LONG>    { return Long.parseLong(token.image.substring(0, token.image.length()-1)); }
    )
}

/** Consumes a floating-point token and returns its numeric value. */
double floatValue() : { }
{
    <DOUBLE> { return Double.valueOf(token.image); }
}

/** Consumes an opening brace with leading and trailing newline tokens. */
void lbrace() : { }
{
    (<NL>)* <LBRACE> (<NL>)*
}

String fileItem() :
{
   String path;
}
{
  (<FILE> <COLON> ( <FILE_PATH> | <STRING> | <IDENTIFIER>) { path = com.yahoo.path.Path.fromString(token.image).getRelative(); } { } (<NL>)*) { return path; }
}

/**
 * Consumes a quoted string token and returns the token image minus the quotes. This does not perform
 * unescaping of the content, it simply removes the first and last character of the image. However, the token itself can
 * contain anything but a double quote.
 *
 * @return the unquoted token image
 */
String quotedString() : { }
{
    ( <DOUBLEQUOTEDSTRING> | <SINGLEQUOTEDSTRING> )
    { return token.image.substring(1, token.image.length() - 1); }
}