diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-05-18 11:38:27 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2022-05-18 11:38:27 +0200 |
commit | 619d924440939076e399f2504fa6850976d2a303 (patch) | |
tree | 83d680e0c0c516585b6d4e8e086d825dcb381365 /model-integration | |
parent | 16de8d32fd0394335ffa065b61f4943c4fd49542 (diff) |
Unify constant syntax across models and rank profiles
Diffstat (limited to 'model-integration')
-rw-r--r-- | model-integration/src/main/javacc/ModelParser.jj | 312 | ||||
-rw-r--r-- | model-integration/src/test/models/vespa/example.model | 6 |
2 files changed, 290 insertions, 28 deletions
diff --git a/model-integration/src/main/javacc/ModelParser.jj b/model-integration/src/main/javacc/ModelParser.jj index 6f6f3508beb..668fd017aa9 100644 --- a/model-integration/src/main/javacc/ModelParser.jj +++ b/model-integration/src/main/javacc/ModelParser.jj @@ -30,8 +30,11 @@ import java.util.List; import java.util.ArrayList; import ai.vespa.rankingexpression.importer.ImportedModel; import com.yahoo.io.IOUtils; -import com.yahoo.tensor.TensorType; import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.IndexedTensor; +import com.yahoo.tensor.MixedTensor; +import com.yahoo.tensor.TensorAddress; +import com.yahoo.tensor.TensorType; import com.yahoo.tensor.serialization.JsonFormat; import com.yahoo.searchlib.rankingexpression.RankingExpression; @@ -80,8 +83,7 @@ TOKEN : { < NL: "\n" > | < FUNCTION: "function" > -| < TENSOR_TYPE: "tensor(" (~["(",")"])+ ")" > -| < TENSORVALUE: (" ")* ":" (" ")* ("{"<BRACE_SL_LEVEL_1>) ("\n")? > +| < TENSOR_TYPE: "tensor" ("<" (~["<",">"])+ ">")? "(" (~["(",")"])* ")" > | < TENSOR_VALUE_SL: "value" (" ")* ":" (" ")* ("{"<BRACE_SL_LEVEL_1>) ("\n")? > | < TENSOR_VALUE_ML: "value" (<SEARCHLIB_SKIP>)? "{" (["\n"," "])* ("{"<BRACE_ML_LEVEL_1>) (["\n"," "])* "}" ("\n")? > | < LBRACE: "{" > @@ -89,6 +91,7 @@ TOKEN : | < COLON: ":" > | < DOT: "." > | < COMMA: "," > +| < DOUBLE_KEYWORD: "double" > | < MODEL: "model" > | < TYPE: "type" > | < EXPRESSION_SL: "expression" (" ")* ":" (("{"<BRACE_SL_LEVEL_1>)|<BRACE_SL_CONTENT>)* ("\n")? > @@ -107,8 +110,12 @@ TOKEN : | < FILE: "file" > | < URI: "uri" > | < IDENTIFIER: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_"])* > +| < DOUBLEQUOTEDSTRING: "\"" ( ~["\""] )* "\"" > +| < SINGLEQUOTEDSTRING: "'" ( ~["'"] )* "'" > | < CONTEXT: ["a"-"z","A"-"Z"] (["a"-"z", "A"-"Z", "0"-"9"])* > | < DOUBLE: ("-")? (["0"-"9"])+ "." (["0"-"9"])+ > +| < INTEGER: ("-")? (["0"-"9"])+ > +| < LONG: ("-")? (["0"-"9"])+"L" > | < STRING: (["a"-"z","A"-"Z","_","0"-"9","."])+ > | < FILE_PATH: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_","-", "/", "."])+ > | < HTTP: ["h","H"] ["t","T"] ["t","T"] ["p","P"] (["s","S"])? > @@ -152,7 +159,12 @@ void modelContent() : { } { - ( <NL> | input() | constants() | largeConstant() | function() )* + ( <NL> | + constants() | + largeConstant() | + function() | + input() + )* } /** Declared input variables (aka features). All non-scalar inputs must be declared. */ @@ -191,36 +203,244 @@ void constants() : } { <CONSTANTS> <LBRACE> (<NL>)* - ( name = identifier() <COLON> ( constantDouble(name) | constantTensor(name) ) (<NL>)* )* + ( constant() (<NL>)* )* <RBRACE> } -void constantDouble(String name) : +String constantTensorErrorMessage(String constantTensorName) : {} +{ + { return "For constant tensor '" + constantTensorName + "' in '" + model + "'"; } +} + +void constant() : +{ + String name = null; + TensorType type = TensorType.empty; + Tensor value = null; + String valuePath = null; +} +{ + ( + name = identifier() (<COLON>)? + ( + LOOKAHEAD(4) ( ( type = valueType(name) )? (<COLON>)? (<NL>)* ( value = tensorValue(type) | valuePath = fileItem()) + { + if (value != null) { + model.smallConstant(name, value); + } + else { + try { + value = JsonFormat.decode(type, IOUtils.readFileBytes(model.relativeFile(valuePath, "constant '" + name + "'"))); + model.largeConstant(name, value); + } + catch (Exception e) { + throw new IllegalArgumentException("Could not read constant '" + name + "'", e); + } + } + } + ) + | // Deprecated forms (TODO: Add warning on Vespa 8): + ( constantValue(name) | constantTensor(name) ) + ) + ) +} + +// Deprecated form +void constantValue(String name) : { Token value; } { - value = <DOUBLE> { model.smallConstant(name, Tensor.from(Double.parseDouble(value.image))); } + <COLON> ( value = <DOUBLE> | value = <INTEGER> | value = <IDENTIFIER> ) + { model.smallConstant(name, Tensor.from(value.image)); } } +// Deprecated form void constantTensor(String name) : { + String tensorString = ""; + TensorType type = null; +} +{ + <LBRACE> (<NL>)* + (( tensorString = tensorValuePrefixedByValue() | + type = tensorTypeWithPrefix(constantTensorErrorMessage(name)) ) (<NL>)* )* <RBRACE> + { model.smallConstant(name, type != null ? Tensor.from(type, tensorString) : Tensor.from(tensorString)); } +} + +TensorType valueType(String name) : +{ TensorType type; - Token value; + +} +{ + ( + ( type = tensorType("Type of " + name) ) + | + ( <DOUBLE_KEYWORD> { type = TensorType.empty; } ) + ) + { return type; } +} + +TensorType tensorType(String errorMessage) : +{ + String tensorTypeString; +} +{ + <TENSOR_TYPE> { tensorTypeString = token.image; } + { + TensorType tensorType; + try { + tensorType = TensorType.fromSpec(tensorTypeString); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException(errorMessage + ": Illegal tensor type spec: " + e.getMessage()); + } + return tensorType; + } +} + +/** + * Parses a tensor written in a tensor literal form, + * https://docs.vespa.ai/en/reference/tensor.html#tensor-literal-form + */ +Tensor tensorValue(TensorType type) : +{ + Tensor.Builder builder = Tensor.Builder.of(type); + Number doubleValue = null; } { - type = tensorType("constant '" + name + "'") value = <TENSORVALUE> - { - model.smallConstant(name, Tensor.from(type, value.image.substring(1))); - } + ( mappedTensorValue(builder) | indexedTensorValues(builder) | doubleValue = number() ) + { + if (doubleValue != null) { + if (type.rank() > 0) + throw new IllegalArgumentException("A tensor of type " + type + " cannot be a number"); + builder.cell(doubleValue.doubleValue()); + } + return builder.build(); + } } -String constantTensorErrorMessage(String model, String constantTensorName) : {} +/** A mapped or mixed tensor value. */ +void mappedTensorValue(Tensor.Builder builder) : {} { - { return "For constant tensor '" + constantTensorName + "' in model '" + model + "'"; } + "{" + ( mappedTensorBlock(builder) )* + ( <COMMA> (<NL>)* mappedTensorBlock(builder) )* + "}" } -String tensorValue() : + +void mappedTensorBlock(Tensor.Builder builder) : +{ + TensorAddress mappedAddress; +} +{ + mappedAddress = tensorAddress(builder.type()) <COLON> (<NL>)* + ( mappedTensorCellValue(mappedAddress, builder) | indexedTensorBlockValues(mappedAddress, builder) ) +} + +void indexedTensorBlockValues(TensorAddress mappedAddress, Tensor.Builder builder) : +{ + List<Double> values = new ArrayList<Double>(); +} +{ + arrayTensorValues(values) + { + MixedTensor.BoundBuilder boundBuilder = (MixedTensor.BoundBuilder)builder; + double[] arrayValues = new double[values.size()]; + for (int i = 0; i < values.size(); i++ ) { + arrayValues[i] = values.get(i); + } + boundBuilder.block(mappedAddress, arrayValues); + } +} + +void indexedTensorValues(Tensor.Builder builder) : +{ + List<Double> values = new ArrayList<Double>(); +} +{ + arrayTensorValues(values) + { + IndexedTensor.BoundBuilder boundBuilder = (IndexedTensor.BoundBuilder)builder; + double[] arrayValues = new double[values.size()]; + for (int i = 0; i < values.size(); i++ ) { + arrayValues[i] = values.get(i); + } + boundBuilder.fill(arrayValues); + } +} + +/** Tensor array values. Using sub-bracketing for multiple dimensions is optional and therefore ignored here. */ +void arrayTensorValues(List<Double> values) : {} +{ + "[" ( ( indexedTensorValue(values) | arrayTensorValues(values)) )* + ( <COMMA> (<NL>)* ( indexedTensorValue(values) | arrayTensorValues(values)) )* + "]" +} + +void indexedTensorValue(List<Double> values) : +{ + Number value; +} +{ + value = number() + { values.add(value.doubleValue()); } +} + +void mappedTensorCellValue(TensorAddress address, Tensor.Builder builder) : +{ + double value; +} +{ + value = tensorCellValue() + { builder.cell(address, value); } +} + +TensorAddress tensorAddress(TensorType type) : +{ + TensorAddress.Builder builder = new TensorAddress.PartialBuilder(type); + String label; +} +{ + ( + label = tensorAddressLabel() { builder.add(label); } + | + ( "{" ( tensorAddressElement(builder) )* ( <COMMA> tensorAddressElement(builder) )* "}" ) + ) + { return builder.build(); } +} + +void tensorAddressElement(TensorAddress.Builder builder) : +{ + String dimension; + String label; +} +{ + dimension = identifier() <COLON> (<NL>)* label = tensorAddressLabel() + { builder.add(dimension, label); } +} + +String tensorAddressLabel() : +{ + String label; +} +{ + ( label = identifier() | label = quotedString() ) + { return label; } +} + +double tensorCellValue() : +{ + Number value; +} +{ + value = number() + { return value.doubleValue(); } +} + +/** Undocumented syntax for supplying a tensor constant value by a string prefixed by "value" */ +String tensorValuePrefixedByValue() : { String tensor; } @@ -233,7 +453,7 @@ String tensorValue() : } } -TensorType tensorType(String errorMessage) : +TensorType tensorTypeWithPrefix(String errorMessage) : { String tensorTypeString; } @@ -250,7 +470,7 @@ TensorType tensorType(String errorMessage) : } } -/** Consumes a large constant. */ +/** Consumes a large constant. */ // TODO: Remove on Vespa 9 void largeConstant() : { String name; @@ -311,18 +531,64 @@ String expression() : String identifier() : { } { ( - <IDENTIFIER> - | <DOUBLE> - | <FILE> - | <URI> - | <MODEL> - | <TYPE> + <CONSTANT> | + <CONSTANTS> | + <DOUBLE_KEYWORD> | + <FILE> | + <IDENTIFIER> | + <INTEGER> | + <MODEL> | + <TYPE> | + <URI> ) { return token.image; } } +Number number() : +{ + Number num; +} +{ + (num = floatValue() | num = longValue() ) { return num; } +} + +/** Consumes a long or integer token and returns its numeric value. */ +long longValue() : { } +{ + ( <INTEGER> { return Long.parseLong(token.image); } | + <LONG> { return Long.parseLong(token.image.substring(0, token.image.length()-1)); } + ) +} + +/** Consumes a floating-point token and returns its numeric value. */ +double floatValue() : { } +{ + <DOUBLE> { return Double.valueOf(token.image); } +} + /** Consumes an opening brace with leading and trailing newline tokens. */ void lbrace() : { } { (<NL>)* <LBRACE> (<NL>)* } + +String fileItem() : +{ + String path; +} +{ + (<FILE> <COLON> ( <FILE_PATH> | <STRING> | <IDENTIFIER>) { path = com.yahoo.path.Path.fromString(token.image).getRelative(); } { } (<NL>)*) { return path; } +} + +/** + * Consumes a quoted string token and returns the token image minus the quotes. This does not perform + * unescaping of the content, it simply removes the first and last character of the image. However, the token itself can + * contain anything but a double quote. + * + * @return the unquoted token image + */ +String quotedString() : { } +{ + ( <DOUBLEQUOTEDSTRING> | <SINGLEQUOTEDSTRING> ) + { return token.image.substring(1, token.image.length() - 1); } +} diff --git a/model-integration/src/test/models/vespa/example.model b/model-integration/src/test/models/vespa/example.model index 269ed83b695..fd8565f2b92 100644 --- a/model-integration/src/test/models/vespa/example.model +++ b/model-integration/src/test/models/vespa/example.model @@ -7,11 +7,7 @@ model example { constants { constant1: tensor(x[3]):{{x:0}:0.5, {x:1}:1.5, {x:2}:2.5} constant2: 3.0 - } - - constant constant1asLarge { - type: tensor(x[3]) - file: constant1asLarge.json + constant1asLarge tensor(x[3]): file:constant1asLarge.json } function foo1() { |