diff options
author | Arnstein Ressem <aressem@gmail.com> | 2022-05-18 13:45:07 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-18 13:45:07 +0200 |
commit | c080a3c9b1ebd39b947aeddd1e5a0bf5e46d474c (patch) | |
tree | aad434533f9514747e80f339d87b984088e2f1f7 /model-integration | |
parent | a4dbfc43c7df534ee5b032204ef19a7b038d7e3e (diff) |
Revert "Bratseth/model syntax"
Diffstat (limited to 'model-integration')
4 files changed, 30 insertions, 339 deletions
diff --git a/model-integration/src/main/javacc/ModelParser.jj b/model-integration/src/main/javacc/ModelParser.jj index c850d223612..6f6f3508beb 100644 --- a/model-integration/src/main/javacc/ModelParser.jj +++ b/model-integration/src/main/javacc/ModelParser.jj @@ -30,11 +30,8 @@ import java.util.List; import java.util.ArrayList; import ai.vespa.rankingexpression.importer.ImportedModel; import com.yahoo.io.IOUtils; -import com.yahoo.tensor.Tensor; -import com.yahoo.tensor.IndexedTensor; -import com.yahoo.tensor.MixedTensor; -import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; +import com.yahoo.tensor.Tensor; import com.yahoo.tensor.serialization.JsonFormat; import com.yahoo.searchlib.rankingexpression.RankingExpression; @@ -83,7 +80,8 @@ TOKEN : { < NL: "\n" > | < FUNCTION: "function" > -| < TENSOR_TYPE: "tensor" ("<" (~["<",">"])+ ">")? "(" (~["(",")"])* ")" > +| < TENSOR_TYPE: "tensor(" (~["(",")"])+ ")" > +| < TENSORVALUE: (" ")* ":" (" ")* ("{"<BRACE_SL_LEVEL_1>) ("\n")? > | < TENSOR_VALUE_SL: "value" (" ")* ":" (" ")* ("{"<BRACE_SL_LEVEL_1>) ("\n")? > | < TENSOR_VALUE_ML: "value" (<SEARCHLIB_SKIP>)? "{" (["\n"," "])* ("{"<BRACE_ML_LEVEL_1>) (["\n"," "])* "}" ("\n")? > | < LBRACE: "{" > @@ -91,8 +89,6 @@ TOKEN : | < COLON: ":" > | < DOT: "." > | < COMMA: "," > -| < DOUBLE_KEYWORD: "double" > -| < INPUTS: "inputs" > | < MODEL: "model" > | < TYPE: "type" > | < EXPRESSION_SL: "expression" (" ")* ":" (("{"<BRACE_SL_LEVEL_1>)|<BRACE_SL_CONTENT>)* ("\n")? > @@ -111,12 +107,8 @@ TOKEN : | < FILE: "file" > | < URI: "uri" > | < IDENTIFIER: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_"])* > -| < DOUBLEQUOTEDSTRING: "\"" ( ~["\""] )* "\"" > -| < SINGLEQUOTEDSTRING: "'" ( ~["'"] )* "'" > | < CONTEXT: ["a"-"z","A"-"Z"] (["a"-"z", "A"-"Z", "0"-"9"])* > | < DOUBLE: ("-")? (["0"-"9"])+ "." (["0"-"9"])+ > -| < INTEGER: ("-")? (["0"-"9"])+ > -| < LONG: ("-")? (["0"-"9"])+"L" > | < STRING: (["a"-"z","A"-"Z","_","0"-"9","."])+ > | < FILE_PATH: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_","-", "/", "."])+ > | < HTTP: ["h","H"] ["t","T"] ["t","T"] ["p","P"] (["s","S"])? > @@ -157,23 +149,10 @@ void model() : } void modelContent() : -{} { - ( <NL> | - constants() | - largeConstant() | - function() | - inputs() | - input() - )* } - -void inputs() : -{} { - <INPUTS> (<NL>)* <LBRACE> (<NL>)* - ( input() (<NL>)* )* - <RBRACE> + ( <NL> | input() | constants() | largeConstant() | function() )* } /** Declared input variables (aka features). All non-scalar inputs must be declared. */ @@ -212,244 +191,36 @@ void constants() : } { <CONSTANTS> <LBRACE> (<NL>)* - ( constant() (<NL>)* )* + ( name = identifier() <COLON> ( constantDouble(name) | constantTensor(name) ) (<NL>)* )* <RBRACE> } -String constantTensorErrorMessage(String constantTensorName) : {} -{ - { return "For constant tensor '" + constantTensorName + "' in '" + model + "'"; } -} - -void constant() : -{ - String name = null; - TensorType type = TensorType.empty; - Tensor value = null; - String valuePath = null; -} -{ - ( - name = identifier() (<COLON>)? - ( - LOOKAHEAD(4) ( ( type = valueType(name) )? (<COLON>)? (<NL>)* ( value = tensorValue(type) | valuePath = fileItem()) - { - if (value != null) { - model.smallConstant(name, value); - } - else { - try { - value = JsonFormat.decode(type, IOUtils.readFileBytes(model.relativeFile(valuePath, "constant '" + name + "'"))); - model.largeConstant(name, value); - } - catch (Exception e) { - throw new IllegalArgumentException("Could not read constant '" + name + "'", e); - } - } - } - ) - | // Deprecated forms (TODO: Add warning on Vespa 8): - ( constantValue(name) | constantTensor(name) ) - ) - ) -} - -// Deprecated form -void constantValue(String name) : +void constantDouble(String name) : { Token value; } { - <COLON> ( value = <DOUBLE> | value = <INTEGER> | value = <IDENTIFIER> ) - { model.smallConstant(name, Tensor.from(value.image)); } + value = <DOUBLE> { model.smallConstant(name, Tensor.from(Double.parseDouble(value.image))); } } -// Deprecated form void constantTensor(String name) : { - String tensorString = ""; - TensorType type = null; -} -{ - <LBRACE> (<NL>)* - (( tensorString = tensorValuePrefixedByValue() | - type = tensorTypeWithPrefix(constantTensorErrorMessage(name)) ) (<NL>)* )* <RBRACE> - { model.smallConstant(name, type != null ? Tensor.from(type, tensorString) : Tensor.from(tensorString)); } -} - -TensorType valueType(String name) : -{ TensorType type; - -} -{ - ( - ( type = tensorType("Type of " + name) ) - | - ( <DOUBLE_KEYWORD> { type = TensorType.empty; } ) - ) - { return type; } -} - -TensorType tensorType(String errorMessage) : -{ - String tensorTypeString; -} -{ - <TENSOR_TYPE> { tensorTypeString = token.image; } - { - TensorType tensorType; - try { - tensorType = TensorType.fromSpec(tensorTypeString); - } catch (IllegalArgumentException e) { - throw new IllegalArgumentException(errorMessage + ": Illegal tensor type spec: " + e.getMessage()); - } - return tensorType; - } -} - -/** - * Parses a tensor written in a tensor literal form, - * https://docs.vespa.ai/en/reference/tensor.html#tensor-literal-form - */ -Tensor tensorValue(TensorType type) : -{ - Tensor.Builder builder = Tensor.Builder.of(type); - Number doubleValue = null; -} -{ - ( mappedTensorValue(builder) | indexedTensorValues(builder) | doubleValue = number() ) - { - if (doubleValue != null) { - if (type.rank() > 0) - throw new IllegalArgumentException("A tensor of type " + type + " cannot be a number"); - builder.cell(doubleValue.doubleValue()); - } - return builder.build(); - } -} - -/** A mapped or mixed tensor value. */ -void mappedTensorValue(Tensor.Builder builder) : {} -{ - "{" - ( mappedTensorBlock(builder) )* - ( <COMMA> (<NL>)* mappedTensorBlock(builder) )* - "}" -} - - -void mappedTensorBlock(Tensor.Builder builder) : -{ - TensorAddress mappedAddress; -} -{ - mappedAddress = tensorAddress(builder.type()) <COLON> (<NL>)* - ( mappedTensorCellValue(mappedAddress, builder) | indexedTensorBlockValues(mappedAddress, builder) ) -} - -void indexedTensorBlockValues(TensorAddress mappedAddress, Tensor.Builder builder) : -{ - List<Double> values = new ArrayList<Double>(); -} -{ - arrayTensorValues(values) - { - MixedTensor.BoundBuilder boundBuilder = (MixedTensor.BoundBuilder)builder; - double[] arrayValues = new double[values.size()]; - for (int i = 0; i < values.size(); i++ ) { - arrayValues[i] = values.get(i); - } - boundBuilder.block(mappedAddress, arrayValues); - } -} - -void indexedTensorValues(Tensor.Builder builder) : -{ - List<Double> values = new ArrayList<Double>(); -} -{ - arrayTensorValues(values) - { - IndexedTensor.BoundBuilder boundBuilder = (IndexedTensor.BoundBuilder)builder; - double[] arrayValues = new double[values.size()]; - for (int i = 0; i < values.size(); i++ ) { - arrayValues[i] = values.get(i); - } - boundBuilder.fill(arrayValues); - } -} - -/** Tensor array values. Using sub-bracketing for multiple dimensions is optional and therefore ignored here. */ -void arrayTensorValues(List<Double> values) : {} -{ - "[" ( ( indexedTensorValue(values) | arrayTensorValues(values)) )* - ( <COMMA> (<NL>)* ( indexedTensorValue(values) | arrayTensorValues(values)) )* - "]" -} - -void indexedTensorValue(List<Double> values) : -{ - Number value; -} -{ - value = number() - { values.add(value.doubleValue()); } -} - -void mappedTensorCellValue(TensorAddress address, Tensor.Builder builder) : -{ - double value; -} -{ - value = tensorCellValue() - { builder.cell(address, value); } -} - -TensorAddress tensorAddress(TensorType type) : -{ - TensorAddress.Builder builder = new TensorAddress.PartialBuilder(type); - String label; -} -{ - ( - label = tensorAddressLabel() { builder.add(label); } - | - ( "{" ( tensorAddressElement(builder) )* ( <COMMA> tensorAddressElement(builder) )* "}" ) - ) - { return builder.build(); } -} - -void tensorAddressElement(TensorAddress.Builder builder) : -{ - String dimension; - String label; -} -{ - dimension = identifier() <COLON> (<NL>)* label = tensorAddressLabel() - { builder.add(dimension, label); } -} - -String tensorAddressLabel() : -{ - String label; + Token value; } { - ( label = identifier() | label = quotedString() ) - { return label; } + type = tensorType("constant '" + name + "'") value = <TENSORVALUE> + { + model.smallConstant(name, Tensor.from(type, value.image.substring(1))); + } } -double tensorCellValue() : -{ - Number value; -} +String constantTensorErrorMessage(String model, String constantTensorName) : {} { - value = number() - { return value.doubleValue(); } + { return "For constant tensor '" + constantTensorName + "' in model '" + model + "'"; } } -/** Undocumented syntax for supplying a tensor constant value by a string prefixed by "value" */ -String tensorValuePrefixedByValue() : +String tensorValue() : { String tensor; } @@ -462,7 +233,7 @@ String tensorValuePrefixedByValue() : } } -TensorType tensorTypeWithPrefix(String errorMessage) : +TensorType tensorType(String errorMessage) : { String tensorTypeString; } @@ -479,7 +250,7 @@ TensorType tensorTypeWithPrefix(String errorMessage) : } } -/** Consumes a large constant. */ // TODO: Remove on Vespa 9 +/** Consumes a large constant. */ void largeConstant() : { String name; @@ -540,65 +311,18 @@ String expression() : String identifier() : { } { ( - <CONSTANT> | - <CONSTANTS> | - <DOUBLE_KEYWORD> | - <FILE> | - <IDENTIFIER> | - <INPUTS> | - <INTEGER> | - <MODEL> | - <TYPE> | - <URI> + <IDENTIFIER> + | <DOUBLE> + | <FILE> + | <URI> + | <MODEL> + | <TYPE> ) { return token.image; } } -Number number() : -{ - Number num; -} -{ - (num = floatValue() | num = longValue() ) { return num; } -} - -/** Consumes a long or integer token and returns its numeric value. */ -long longValue() : { } -{ - ( <INTEGER> { return Long.parseLong(token.image); } | - <LONG> { return Long.parseLong(token.image.substring(0, token.image.length()-1)); } - ) -} - -/** Consumes a floating-point token and returns its numeric value. */ -double floatValue() : { } -{ - <DOUBLE> { return Double.valueOf(token.image); } -} - /** Consumes an opening brace with leading and trailing newline tokens. */ void lbrace() : { } { (<NL>)* <LBRACE> (<NL>)* } - -String fileItem() : -{ - String path; -} -{ - (<FILE> <COLON> ( <FILE_PATH> | <STRING> | <IDENTIFIER>) { path = com.yahoo.path.Path.fromString(token.image).getRelative(); } { } (<NL>)*) { return path; } -} - -/** - * Consumes a quoted string token and returns the token image minus the quotes. This does not perform - * unescaping of the content, it simply removes the first and last character of the image. However, the token itself can - * contain anything but a double quote. - * - * @return the unquoted token image - */ -String quotedString() : { } -{ - ( <DOUBLEQUOTEDSTRING> | <SINGLEQUOTEDSTRING> ) - { return token.image.substring(1, token.image.length() - 1); } -} diff --git a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/vespa/VespaImportTestCase.java b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/vespa/VespaImportTestCase.java index 25c51a75b0b..fc92883a90f 100644 --- a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/vespa/VespaImportTestCase.java +++ b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/vespa/VespaImportTestCase.java @@ -25,16 +25,7 @@ public class VespaImportTestCase { @Test public void testExample() { ImportedModel model = importModel("example"); - assertModel(model); - } - - @Test - public void testLegacySyntax() { - ImportedModel model = importModel("legacy_syntax"); - assertModel(model); - } - private void assertModel(ImportedModel model) { assertEquals(2, model.inputs().size()); assertEquals("tensor(name{},x[3])", model.inputs().get("input1").toString()); assertEquals("tensor(x[3])", model.inputs().get("input2").toString()); diff --git a/model-integration/src/test/models/vespa/example.model b/model-integration/src/test/models/vespa/example.model index 25d27033cfd..269ed83b695 100644 --- a/model-integration/src/test/models/vespa/example.model +++ b/model-integration/src/test/models/vespa/example.model @@ -1,15 +1,17 @@ model example { # All inputs that are not scalar (aka 0-dimensional tensor) must be declared - inputs { - input1: tensor(name{}, x[3]) - input2: tensor(x[3]) - } + input1: tensor(name{}, x[3]) + input2: tensor(x[3]) constants { constant1: tensor(x[3]):{{x:0}:0.5, {x:1}:1.5, {x:2}:2.5} constant2: 3.0 - constant1asLarge tensor(x[3]): file:constant1asLarge.json + } + + constant constant1asLarge { + type: tensor(x[3]) + file: constant1asLarge.json } function foo1() { diff --git a/model-integration/src/test/models/vespa/legacy_syntax.model b/model-integration/src/test/models/vespa/legacy_syntax.model deleted file mode 100644 index 2a5031a5ff9..00000000000 --- a/model-integration/src/test/models/vespa/legacy_syntax.model +++ /dev/null @@ -1,26 +0,0 @@ -model legacy_syntax { - - # Syntax not supported in rank profiles which probably should be removed on Vespa 9 - input1: tensor(name{}, x[3]) - input2: tensor(x[3]) - - constants { - constant1: tensor(x[3]):{{x:0}:0.5, {x:1}:1.5, {x:2}:2.5} - constant2: 3.0 - } - - # Syntax to be removed on Vespa 9 - constant constant1asLarge { - type: tensor(x[3]) - file: constant1asLarge.json - } - - function foo1() { - expression: file:test.expression - } - - function foo2() { - expression: reduce(sum(input1 * input2, name) * constant(constant1asLarge), max, x) * constant2 - } - -}
\ No newline at end of file |