summaryrefslogtreecommitdiffstats
path: root/model-integration
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2022-05-18 14:37:17 +0200
committerJon Bratseth <bratseth@gmail.com>2022-05-18 14:37:17 +0200
commit4f8fa003117d3ad1f30b6dc0adcabf4e923b0f90 (patch)
treece780f3bac6be9f7a70b63f1853a96b2b9c322b8 /model-integration
parent2c9cb8159adaced573fd1e34689e533946feea85 (diff)
Revert "Merge pull request #22642 from vespa-engine/revert-22637-bratseth/model-syntax"
This reverts commit 2c9cb8159adaced573fd1e34689e533946feea85, reversing changes made to a4dbfc43c7df534ee5b032204ef19a7b038d7e3e.
Diffstat (limited to 'model-integration')
-rw-r--r--model-integration/src/main/javacc/ModelParser.jj322
-rw-r--r--model-integration/src/test/java/ai/vespa/rankingexpression/importer/vespa/VespaImportTestCase.java9
-rw-r--r--model-integration/src/test/models/vespa/example.model12
-rw-r--r--model-integration/src/test/models/vespa/legacy_syntax.model26
4 files changed, 339 insertions, 30 deletions
diff --git a/model-integration/src/main/javacc/ModelParser.jj b/model-integration/src/main/javacc/ModelParser.jj
index 6f6f3508beb..c850d223612 100644
--- a/model-integration/src/main/javacc/ModelParser.jj
+++ b/model-integration/src/main/javacc/ModelParser.jj
@@ -30,8 +30,11 @@ import java.util.List;
import java.util.ArrayList;
import ai.vespa.rankingexpression.importer.ImportedModel;
import com.yahoo.io.IOUtils;
-import com.yahoo.tensor.TensorType;
import com.yahoo.tensor.Tensor;
+import com.yahoo.tensor.IndexedTensor;
+import com.yahoo.tensor.MixedTensor;
+import com.yahoo.tensor.TensorAddress;
+import com.yahoo.tensor.TensorType;
import com.yahoo.tensor.serialization.JsonFormat;
import com.yahoo.searchlib.rankingexpression.RankingExpression;
@@ -80,8 +83,7 @@ TOKEN :
{
< NL: "\n" >
| < FUNCTION: "function" >
-| < TENSOR_TYPE: "tensor(" (~["(",")"])+ ")" >
-| < TENSORVALUE: (" ")* ":" (" ")* ("{"<BRACE_SL_LEVEL_1>) ("\n")? >
+| < TENSOR_TYPE: "tensor" ("<" (~["<",">"])+ ">")? "(" (~["(",")"])* ")" >
| < TENSOR_VALUE_SL: "value" (" ")* ":" (" ")* ("{"<BRACE_SL_LEVEL_1>) ("\n")? >
| < TENSOR_VALUE_ML: "value" (<SEARCHLIB_SKIP>)? "{" (["\n"," "])* ("{"<BRACE_ML_LEVEL_1>) (["\n"," "])* "}" ("\n")? >
| < LBRACE: "{" >
@@ -89,6 +91,8 @@ TOKEN :
| < COLON: ":" >
| < DOT: "." >
| < COMMA: "," >
+| < DOUBLE_KEYWORD: "double" >
+| < INPUTS: "inputs" >
| < MODEL: "model" >
| < TYPE: "type" >
| < EXPRESSION_SL: "expression" (" ")* ":" (("{"<BRACE_SL_LEVEL_1>)|<BRACE_SL_CONTENT>)* ("\n")? >
@@ -107,8 +111,12 @@ TOKEN :
| < FILE: "file" >
| < URI: "uri" >
| < IDENTIFIER: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_"])* >
+| < DOUBLEQUOTEDSTRING: "\"" ( ~["\""] )* "\"" >
+| < SINGLEQUOTEDSTRING: "'" ( ~["'"] )* "'" >
| < CONTEXT: ["a"-"z","A"-"Z"] (["a"-"z", "A"-"Z", "0"-"9"])* >
| < DOUBLE: ("-")? (["0"-"9"])+ "." (["0"-"9"])+ >
+| < INTEGER: ("-")? (["0"-"9"])+ >
+| < LONG: ("-")? (["0"-"9"])+"L" >
| < STRING: (["a"-"z","A"-"Z","_","0"-"9","."])+ >
| < FILE_PATH: ["a"-"z","A"-"Z", "_"] (["a"-"z","A"-"Z","0"-"9","_","-", "/", "."])+ >
| < HTTP: ["h","H"] ["t","T"] ["t","T"] ["p","P"] (["s","S"])? >
@@ -149,10 +157,23 @@ void model() :
}
void modelContent() :
+{}
{
+ ( <NL> |
+ constants() |
+ largeConstant() |
+ function() |
+ inputs() |
+ input()
+ )*
}
+
+void inputs() :
+{}
{
- ( <NL> | input() | constants() | largeConstant() | function() )*
+ <INPUTS> (<NL>)* <LBRACE> (<NL>)*
+ ( input() (<NL>)* )*
+ <RBRACE>
}
/** Declared input variables (aka features). All non-scalar inputs must be declared. */
@@ -191,36 +212,244 @@ void constants() :
}
{
<CONSTANTS> <LBRACE> (<NL>)*
- ( name = identifier() <COLON> ( constantDouble(name) | constantTensor(name) ) (<NL>)* )*
+ ( constant() (<NL>)* )*
<RBRACE>
}
-void constantDouble(String name) :
+String constantTensorErrorMessage(String constantTensorName) : {}
+{
+ { return "For constant tensor '" + constantTensorName + "' in '" + model + "'"; }
+}
+
+void constant() :
+{
+ String name = null;
+ TensorType type = TensorType.empty;
+ Tensor value = null;
+ String valuePath = null;
+}
+{
+ (
+ name = identifier() (<COLON>)?
+ (
+ LOOKAHEAD(4) ( ( type = valueType(name) )? (<COLON>)? (<NL>)* ( value = tensorValue(type) | valuePath = fileItem())
+ {
+ if (value != null) {
+ model.smallConstant(name, value);
+ }
+ else {
+ try {
+ value = JsonFormat.decode(type, IOUtils.readFileBytes(model.relativeFile(valuePath, "constant '" + name + "'")));
+ model.largeConstant(name, value);
+ }
+ catch (Exception e) {
+ throw new IllegalArgumentException("Could not read constant '" + name + "'", e);
+ }
+ }
+ }
+ )
+ | // Deprecated forms (TODO: Add warning on Vespa 8):
+ ( constantValue(name) | constantTensor(name) )
+ )
+ )
+}
+
+// Deprecated form
+void constantValue(String name) :
{
Token value;
}
{
- value = <DOUBLE> { model.smallConstant(name, Tensor.from(Double.parseDouble(value.image))); }
+ <COLON> ( value = <DOUBLE> | value = <INTEGER> | value = <IDENTIFIER> )
+ { model.smallConstant(name, Tensor.from(value.image)); }
}
+// Deprecated form
void constantTensor(String name) :
{
+ String tensorString = "";
+ TensorType type = null;
+}
+{
+ <LBRACE> (<NL>)*
+ (( tensorString = tensorValuePrefixedByValue() |
+ type = tensorTypeWithPrefix(constantTensorErrorMessage(name)) ) (<NL>)* )* <RBRACE>
+ { model.smallConstant(name, type != null ? Tensor.from(type, tensorString) : Tensor.from(tensorString)); }
+}
+
+TensorType valueType(String name) :
+{
TensorType type;
- Token value;
+
}
{
- type = tensorType("constant '" + name + "'") value = <TENSORVALUE>
- {
- model.smallConstant(name, Tensor.from(type, value.image.substring(1)));
- }
+ (
+ ( type = tensorType("Type of " + name) )
+ |
+ ( <DOUBLE_KEYWORD> { type = TensorType.empty; } )
+ )
+ { return type; }
}
-String constantTensorErrorMessage(String model, String constantTensorName) : {}
+TensorType tensorType(String errorMessage) :
{
- { return "For constant tensor '" + constantTensorName + "' in model '" + model + "'"; }
+ String tensorTypeString;
+}
+{
+ <TENSOR_TYPE> { tensorTypeString = token.image; }
+ {
+ TensorType tensorType;
+ try {
+ tensorType = TensorType.fromSpec(tensorTypeString);
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException(errorMessage + ": Illegal tensor type spec: " + e.getMessage());
+ }
+ return tensorType;
+ }
}
-String tensorValue() :
+/**
+ * Parses a tensor written in a tensor literal form,
+ * https://docs.vespa.ai/en/reference/tensor.html#tensor-literal-form
+ */
+Tensor tensorValue(TensorType type) :
+{
+ Tensor.Builder builder = Tensor.Builder.of(type);
+ Number doubleValue = null;
+}
+{
+ ( mappedTensorValue(builder) | indexedTensorValues(builder) | doubleValue = number() )
+ {
+ if (doubleValue != null) {
+ if (type.rank() > 0)
+ throw new IllegalArgumentException("A tensor of type " + type + " cannot be a number");
+ builder.cell(doubleValue.doubleValue());
+ }
+ return builder.build();
+ }
+}
+
+/** A mapped or mixed tensor value. */
+void mappedTensorValue(Tensor.Builder builder) : {}
+{
+ "{"
+ ( mappedTensorBlock(builder) )*
+ ( <COMMA> (<NL>)* mappedTensorBlock(builder) )*
+ "}"
+}
+
+
+void mappedTensorBlock(Tensor.Builder builder) :
+{
+ TensorAddress mappedAddress;
+}
+{
+ mappedAddress = tensorAddress(builder.type()) <COLON> (<NL>)*
+ ( mappedTensorCellValue(mappedAddress, builder) | indexedTensorBlockValues(mappedAddress, builder) )
+}
+
+void indexedTensorBlockValues(TensorAddress mappedAddress, Tensor.Builder builder) :
+{
+ List<Double> values = new ArrayList<Double>();
+}
+{
+ arrayTensorValues(values)
+ {
+ MixedTensor.BoundBuilder boundBuilder = (MixedTensor.BoundBuilder)builder;
+ double[] arrayValues = new double[values.size()];
+ for (int i = 0; i < values.size(); i++ ) {
+ arrayValues[i] = values.get(i);
+ }
+ boundBuilder.block(mappedAddress, arrayValues);
+ }
+}
+
+void indexedTensorValues(Tensor.Builder builder) :
+{
+ List<Double> values = new ArrayList<Double>();
+}
+{
+ arrayTensorValues(values)
+ {
+ IndexedTensor.BoundBuilder boundBuilder = (IndexedTensor.BoundBuilder)builder;
+ double[] arrayValues = new double[values.size()];
+ for (int i = 0; i < values.size(); i++ ) {
+ arrayValues[i] = values.get(i);
+ }
+ boundBuilder.fill(arrayValues);
+ }
+}
+
+/** Tensor array values. Using sub-bracketing for multiple dimensions is optional and therefore ignored here. */
+void arrayTensorValues(List<Double> values) : {}
+{
+ "[" ( ( indexedTensorValue(values) | arrayTensorValues(values)) )*
+ ( <COMMA> (<NL>)* ( indexedTensorValue(values) | arrayTensorValues(values)) )*
+ "]"
+}
+
+void indexedTensorValue(List<Double> values) :
+{
+ Number value;
+}
+{
+ value = number()
+ { values.add(value.doubleValue()); }
+}
+
+void mappedTensorCellValue(TensorAddress address, Tensor.Builder builder) :
+{
+ double value;
+}
+{
+ value = tensorCellValue()
+ { builder.cell(address, value); }
+}
+
+TensorAddress tensorAddress(TensorType type) :
+{
+ TensorAddress.Builder builder = new TensorAddress.PartialBuilder(type);
+ String label;
+}
+{
+ (
+ label = tensorAddressLabel() { builder.add(label); }
+ |
+ ( "{" ( tensorAddressElement(builder) )* ( <COMMA> tensorAddressElement(builder) )* "}" )
+ )
+ { return builder.build(); }
+}
+
+void tensorAddressElement(TensorAddress.Builder builder) :
+{
+ String dimension;
+ String label;
+}
+{
+ dimension = identifier() <COLON> (<NL>)* label = tensorAddressLabel()
+ { builder.add(dimension, label); }
+}
+
+String tensorAddressLabel() :
+{
+ String label;
+}
+{
+ ( label = identifier() | label = quotedString() )
+ { return label; }
+}
+
+double tensorCellValue() :
+{
+ Number value;
+}
+{
+ value = number()
+ { return value.doubleValue(); }
+}
+
+/** Undocumented syntax for supplying a tensor constant value by a string prefixed by "value" */
+String tensorValuePrefixedByValue() :
{
String tensor;
}
@@ -233,7 +462,7 @@ String tensorValue() :
}
}
-TensorType tensorType(String errorMessage) :
+TensorType tensorTypeWithPrefix(String errorMessage) :
{
String tensorTypeString;
}
@@ -250,7 +479,7 @@ TensorType tensorType(String errorMessage) :
}
}
-/** Consumes a large constant. */
+/** Consumes a large constant. */ // TODO: Remove on Vespa 9
void largeConstant() :
{
String name;
@@ -311,18 +540,65 @@ String expression() :
String identifier() : { }
{
(
- <IDENTIFIER>
- | <DOUBLE>
- | <FILE>
- | <URI>
- | <MODEL>
- | <TYPE>
+ <CONSTANT> |
+ <CONSTANTS> |
+ <DOUBLE_KEYWORD> |
+ <FILE> |
+ <IDENTIFIER> |
+ <INPUTS> |
+ <INTEGER> |
+ <MODEL> |
+ <TYPE> |
+ <URI>
)
{ return token.image; }
}
+Number number() :
+{
+ Number num;
+}
+{
+ (num = floatValue() | num = longValue() ) { return num; }
+}
+
+/** Consumes a long or integer token and returns its numeric value. */
+long longValue() : { }
+{
+ ( <INTEGER> { return Long.parseLong(token.image); } |
+ <LONG> { return Long.parseLong(token.image.substring(0, token.image.length()-1)); }
+ )
+}
+
+/** Consumes a floating-point token and returns its numeric value. */
+double floatValue() : { }
+{
+ <DOUBLE> { return Double.valueOf(token.image); }
+}
+
/** Consumes an opening brace with leading and trailing newline tokens. */
void lbrace() : { }
{
(<NL>)* <LBRACE> (<NL>)*
}
+
+String fileItem() :
+{
+ String path;
+}
+{
+ (<FILE> <COLON> ( <FILE_PATH> | <STRING> | <IDENTIFIER>) { path = com.yahoo.path.Path.fromString(token.image).getRelative(); } { } (<NL>)*) { return path; }
+}
+
+/**
+ * Consumes a quoted string token and returns the token image minus the quotes. This does not perform
+ * unescaping of the content, it simply removes the first and last character of the image. However, the token itself can
+ * contain anything but a double quote.
+ *
+ * @return the unquoted token image
+ */
+String quotedString() : { }
+{
+ ( <DOUBLEQUOTEDSTRING> | <SINGLEQUOTEDSTRING> )
+ { return token.image.substring(1, token.image.length() - 1); }
+}
diff --git a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/vespa/VespaImportTestCase.java b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/vespa/VespaImportTestCase.java
index fc92883a90f..25c51a75b0b 100644
--- a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/vespa/VespaImportTestCase.java
+++ b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/vespa/VespaImportTestCase.java
@@ -25,7 +25,16 @@ public class VespaImportTestCase {
@Test
public void testExample() {
ImportedModel model = importModel("example");
+ assertModel(model);
+ }
+
+ @Test
+ public void testLegacySyntax() {
+ ImportedModel model = importModel("legacy_syntax");
+ assertModel(model);
+ }
+ private void assertModel(ImportedModel model) {
assertEquals(2, model.inputs().size());
assertEquals("tensor(name{},x[3])", model.inputs().get("input1").toString());
assertEquals("tensor(x[3])", model.inputs().get("input2").toString());
diff --git a/model-integration/src/test/models/vespa/example.model b/model-integration/src/test/models/vespa/example.model
index 269ed83b695..25d27033cfd 100644
--- a/model-integration/src/test/models/vespa/example.model
+++ b/model-integration/src/test/models/vespa/example.model
@@ -1,17 +1,15 @@
model example {
# All inputs that are not scalar (aka 0-dimensional tensor) must be declared
- input1: tensor(name{}, x[3])
- input2: tensor(x[3])
+ inputs {
+ input1: tensor(name{}, x[3])
+ input2: tensor(x[3])
+ }
constants {
constant1: tensor(x[3]):{{x:0}:0.5, {x:1}:1.5, {x:2}:2.5}
constant2: 3.0
- }
-
- constant constant1asLarge {
- type: tensor(x[3])
- file: constant1asLarge.json
+ constant1asLarge tensor(x[3]): file:constant1asLarge.json
}
function foo1() {
diff --git a/model-integration/src/test/models/vespa/legacy_syntax.model b/model-integration/src/test/models/vespa/legacy_syntax.model
new file mode 100644
index 00000000000..2a5031a5ff9
--- /dev/null
+++ b/model-integration/src/test/models/vespa/legacy_syntax.model
@@ -0,0 +1,26 @@
+model legacy_syntax {
+
+ # Syntax not supported in rank profiles which probably should be removed on Vespa 9
+ input1: tensor(name{}, x[3])
+ input2: tensor(x[3])
+
+ constants {
+ constant1: tensor(x[3]):{{x:0}:0.5, {x:1}:1.5, {x:2}:2.5}
+ constant2: 3.0
+ }
+
+ # Syntax to be removed on Vespa 9
+ constant constant1asLarge {
+ type: tensor(x[3])
+ file: constant1asLarge.json
+ }
+
+ function foo1() {
+ expression: file:test.expression
+ }
+
+ function foo2() {
+ expression: reduce(sum(input1 * input2, name) * constant(constant1asLarge), max, x) * constant2
+ }
+
+} \ No newline at end of file