summaryrefslogtreecommitdiffstats
path: root/model-integration
diff options
context:
space:
mode:
authorArne Juul <arnej@yahooinc.com>2023-06-23 10:01:11 +0000
committerArne Juul <arnej@yahooinc.com>2023-06-23 10:01:15 +0000
commit83ba3d8b6226b8b109a07b470111a9c7581bcdb8 (patch)
tree6f7e26ea754d04a50ec934feb9813ee1f01841fe /model-integration
parent97b15016085dd6f2b515b7051803f92e34b29ab9 (diff)
update onnx.proto
* use latest version from https://github.com/onnx/onnx/blob/main/onnx/onnx.proto * track API changes (enum -> int32)
Diffstat (limited to 'model-integration')
-rw-r--r--model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TensorConverter.java5
-rw-r--r--model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TypeConverter.java7
-rw-r--r--model-integration/src/main/protobuf/onnx.proto517
-rw-r--r--model-integration/src/test/java/ai/vespa/rankingexpression/importer/onnx/OnnxOperationsTestCase.java4
4 files changed, 453 insertions, 80 deletions
diff --git a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TensorConverter.java b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TensorConverter.java
index f12f60dcc8e..f690b8e8c8a 100644
--- a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TensorConverter.java
+++ b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TensorConverter.java
@@ -32,8 +32,9 @@ class TensorConverter {
}
private static Values readValuesOf(Onnx.TensorProto tensorProto) {
+ var elemType = Onnx.TensorProto.DataType.forNumber(tensorProto.getDataType());
if (tensorProto.hasRawData()) {
- switch (tensorProto.getDataType()) {
+ switch (elemType) {
case BOOL: return new RawBoolValues(tensorProto);
case FLOAT: return new RawFloatValues(tensorProto);
case DOUBLE: return new RawDoubleValues(tensorProto);
@@ -41,7 +42,7 @@ class TensorConverter {
case INT64: return new RawLongValues(tensorProto);
}
} else {
- switch (tensorProto.getDataType()) {
+ switch (elemType) {
case FLOAT: return new FloatValues(tensorProto);
case DOUBLE: return new DoubleValues(tensorProto);
case INT32: return new IntValues(tensorProto);
diff --git a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TypeConverter.java b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TypeConverter.java
index 35ec1d8c54a..deac950d324 100644
--- a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TypeConverter.java
+++ b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TypeConverter.java
@@ -37,7 +37,8 @@ class TypeConverter {
static OrderedTensorType typeFrom(Onnx.TypeProto type) {
String dimensionPrefix = "d"; // standard naming convention: d0, d1, ...
Onnx.TensorShapeProto shape = type.getTensorType().getShape();
- OrderedTensorType.Builder builder = new OrderedTensorType.Builder(toValueType(type.getTensorType().getElemType()));
+ var elemType = Onnx.TensorProto.DataType.forNumber(type.getTensorType().getElemType());
+ OrderedTensorType.Builder builder = new OrderedTensorType.Builder(toValueType(elemType));
for (int i = 0; i < shape.getDimCount(); ++ i) {
String dimensionName = dimensionPrefix + i;
Onnx.TensorShapeProto.Dimension onnxDimension = shape.getDim(i);
@@ -52,8 +53,8 @@ class TypeConverter {
}
static OrderedTensorType typeFrom(Onnx.TensorProto tensor) {
- return OrderedTensorType.fromDimensionList(toValueType(tensor.getDataType()),
- tensor.getDimsList());
+ var elemType = Onnx.TensorProto.DataType.forNumber(tensor.getDataType());
+ return OrderedTensorType.fromDimensionList(toValueType(elemType), tensor.getDimsList());
}
private static TensorType.Value toValueType(Onnx.TensorProto.DataType dataType) {
diff --git a/model-integration/src/main/protobuf/onnx.proto b/model-integration/src/main/protobuf/onnx.proto
index dc6542867e0..1d265ae9f28 100644
--- a/model-integration/src/main/protobuf/onnx.proto
+++ b/model-integration/src/main/protobuf/onnx.proto
@@ -3,8 +3,8 @@
//
-// Copyright (c) Facebook Inc. and Microsoft Corporation.
-// Licensed under the MIT license.
+// SPDX-License-Identifier: Apache-2.0
+
syntax = "proto2";
@@ -20,23 +20,16 @@ package onnx;
//
// This document describes the syntax of models and their computation graphs,
// as well as the standard data types. Together, they are referred to as the ONNX
-// Intermediate Representation, or 'IR' for short.
+// Intermediate Representation, or 'IR' for short.
//
// The normative semantic specification of the ONNX IR is found in docs/IR.md.
// Definitions of the built-in neural network operators may be found in docs/Operators.md.
// Notes
//
-// Release
-//
-// We are still in the very early stage of defining ONNX. The current
-// version of ONNX is a starting point. While we are actively working
-// towards a complete spec, we would like to get the community involved
-// by sharing our working version of ONNX.
-//
// Protobuf compatibility
-//
-// To simplify framework compatibility, ONNX is defined using the subset of protobuf
+//
+// To simplify framework compatibility, ONNX is defined using the subset of protobuf
// that is compatible with both protobuf v2 and v3. This means that we do not use any
// protobuf features that are only available in one of the two versions.
//
@@ -60,22 +53,60 @@ enum Version {
_START_VERSION = 0;
// The version field is always serialized and we will use it to store the
// version that the graph is generated from. This helps us set up version
- // control. We should use version as
- // xx(major) - xx(minor) - xxxx(bugfix)
- // and we are starting with 0x00000001 (0.0.1), which was the
- // version we published on Oct 10, 2017.
- IR_VERSION_2017_10_10 = 0x00000001;
+ // control.
+ // For the IR, we are using simple numbers starting with 0x00000001,
+ // which was the version we published on Oct 10, 2017.
+ IR_VERSION_2017_10_10 = 0x0000000000000001;
- // IR_VERSION 0.0.2 published on Oct 30, 2017
+ // IR_VERSION 2 published on Oct 30, 2017
// - Added type discriminator to AttributeProto to support proto3 users
- IR_VERSION_2017_10_30 = 0x00000002;
+ IR_VERSION_2017_10_30 = 0x0000000000000002;
- // IR VERSION 0.0.3 published on Nov 3, 2017
+ // IR VERSION 3 published on Nov 3, 2017
// - For operator versioning:
// - Added new message OperatorSetIdProto
// - Added opset_import in ModelProto
// - For vendor extensions, added domain in NodeProto
- IR_VERSION = 0x00000003;
+ IR_VERSION_2017_11_3 = 0x0000000000000003;
+
+ // IR VERSION 4 published on Jan 22, 2019
+ // - Relax constraint that initializers should be a subset of graph inputs
+ // - Add type BFLOAT16
+ IR_VERSION_2019_1_22 = 0x0000000000000004;
+
+ // IR VERSION 5 published on March 18, 2019
+ // - Add message TensorAnnotation.
+ // - Add quantization annotation in GraphProto to map tensor with its scale and zero point quantization parameters.
+ IR_VERSION_2019_3_18 = 0x0000000000000005;
+
+ // IR VERSION 6 published on Sep 19, 2019
+ // - Add support for sparse tensor constants stored in model.
+ // - Add message SparseTensorProto
+ // - Add sparse initializers
+ IR_VERSION_2019_9_19 = 0x0000000000000006;
+
+ // IR VERSION 7 published on May 8, 2020
+ // - Add support to allow function body graph to rely on multiple external opreator sets.
+ // - Add a list to promote inference graph's initializers to global and
+ // mutable variables. Global variables are visible in all graphs of the
+ // stored models.
+ // - Add message TrainingInfoProto to store initialization
+ // method and training algorithm. The execution of TrainingInfoProto
+ // can modify the values of mutable variables.
+ // - Implicitly add inference graph into each TrainingInfoProto's algorithm.
+ IR_VERSION_2020_5_8 = 0x0000000000000007;
+
+ // IR VERSION 8 published on July 30, 2021
+ // Introduce TypeProto.SparseTensor
+ // Introduce TypeProto.Optional
+ // Added a list of FunctionProtos local to the model
+ // Deprecated since_version and operator status from FunctionProto
+ IR_VERSION_2021_7_30 = 0x0000000000000008;
+
+ // IR VERSION 9 published on May 5, 2023
+ // Added AttributeProto to FunctionProto so that default attribute values can be set.
+ // Added FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ.
+ IR_VERSION = 0x0000000000000009;
}
// Attributes
@@ -95,17 +126,21 @@ message AttributeProto {
STRING = 3;
TENSOR = 4;
GRAPH = 5;
+ SPARSE_TENSOR = 11;
+ TYPE_PROTO = 13;
FLOATS = 6;
INTS = 7;
STRINGS = 8;
TENSORS = 9;
GRAPHS = 10;
+ SPARSE_TENSORS = 12;
+ TYPE_PROTOS = 14;
}
// The name field MUST be present for this version of the IR.
optional string name = 1; // namespace Attribute
-
+
// if ref_attr_name is not empty, ref_attr_name is the attribute name in parent function.
// In this case, this AttributeProto does not contain data, and it's a reference of attribute
// in parent scope.
@@ -117,10 +152,10 @@ message AttributeProto {
// The type field MUST be present for this version of the IR.
// For 0.0.1 versions of the IR, this field was not defined, and
- // implementations needed to use has_field hueristics to determine
+ // implementations needed to use has_field heuristics to determine
// which value field was in use. For IR_VERSION 0.0.2 or later, this
// field MUST be set and match the f|i|s|t|... field in use. This
- // change was made to accomodate proto3 implementations.
+ // change was made to accommodate proto3 implementations.
optional AttributeType type = 20; // discriminator that indicates which field below is in use
// Exactly ONE of the following fields must be present for this version of the IR
@@ -129,14 +164,18 @@ message AttributeProto {
optional bytes s = 4; // UTF-8 string
optional TensorProto t = 5; // tensor value
optional GraphProto g = 6; // graph
+ optional SparseTensorProto sparse_tensor = 22; // sparse tensor value
// Do not use field below, it's deprecated.
// optional ValueProto v = 12; // value - subsumes everything but graph
+ optional TypeProto tp = 14; // type proto
repeated float floats = 7; // list of floats
repeated int64 ints = 8; // list of ints
repeated bytes strings = 9; // list of UTF-8 strings
repeated TensorProto tensors = 10; // list of tensors
repeated GraphProto graphs = 11; // list of graph
+ repeated SparseTensorProto sparse_tensors = 23; // list of sparse tensors
+ repeated TypeProto type_protos = 15;// list of type protos
}
// Defines information on value, including the name, the type, and
@@ -144,7 +183,8 @@ message AttributeProto {
message ValueInfoProto {
// This field MUST be present in this version of the IR.
optional string name = 1; // namespace Value
- // This field MUST be present in this version of the IR.
+ // This field MUST be present in this version of the IR for
+ // inputs and outputs of the top-level graph.
optional TypeProto type = 2;
// A human-readable documentation for this value. Markdown is allowed.
optional string doc_string = 3;
@@ -155,7 +195,7 @@ message ValueInfoProto {
// Computation graphs are made up of a DAG of nodes, which represent what is
// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
//
-// For example, it can be a node of type "Conv" that takes in an image, a filter
+// For example, it can be a node of type "Conv" that takes in an image, a filter
// tensor and a bias tensor, and produces the convolved output.
message NodeProto {
repeated string input = 1; // namespace Value
@@ -177,12 +217,130 @@ message NodeProto {
optional string doc_string = 6;
}
+// Training information
+// TrainingInfoProto stores information for training a model.
+// In particular, this defines two functionalities: an initialization-step
+// and a training-algorithm-step. Initialization resets the model
+// back to its original state as if no training has been performed.
+// Training algorithm improves the model based on input data.
+//
+// The semantics of the initialization-step is that the initializers
+// in ModelProto.graph and in TrainingInfoProto.algorithm are first
+// initialized as specified by the initializers in the graph, and then
+// updated by the "initialization_binding" in every instance in
+// ModelProto.training_info.
+//
+// The field "algorithm" defines a computation graph which represents a
+// training algorithm's step. After the execution of a
+// TrainingInfoProto.algorithm, the initializers specified by "update_binding"
+// may be immediately updated. If the targeted training algorithm contains
+// consecutive update steps (such as block coordinate descent methods),
+// the user needs to create a TrainingInfoProto for each step.
+message TrainingInfoProto {
+ // This field describes a graph to compute the initial tensors
+ // upon starting the training process. Initialization graph has no input
+ // and can have multiple outputs. Usually, trainable tensors in neural
+ // networks are randomly initialized. To achieve that, for each tensor,
+ // the user can put a random number operator such as RandomNormal or
+ // RandomUniform in TrainingInfoProto.initialization.node and assign its
+ // random output to the specific tensor using "initialization_binding".
+ // This graph can also set the initializers in "algorithm" in the same
+ // TrainingInfoProto; a use case is resetting the number of training
+ // iteration to zero.
+ //
+ // By default, this field is an empty graph and its evaluation does not
+ // produce any output. Thus, no initializer would be changed by default.
+ optional GraphProto initialization = 1;
+
+ // This field represents a training algorithm step. Given required inputs,
+ // it computes outputs to update initializers in its own or inference graph's
+ // initializer lists. In general, this field contains loss node, gradient node,
+ // optimizer node, increment of iteration count.
+ //
+ // An execution of the training algorithm step is performed by executing the
+ // graph obtained by combining the inference graph (namely "ModelProto.graph")
+ // and the "algorithm" graph. That is, the actual
+ // input/initializer/output/node/value_info/sparse_initializer list of
+ // the training graph is the concatenation of
+ // "ModelProto.graph.input/initializer/output/node/value_info/sparse_initializer"
+ // and "algorithm.input/initializer/output/node/value_info/sparse_initializer"
+ // in that order. This combined graph must satisfy the normal ONNX conditions.
+ // Now, let's provide a visualization of graph combination for clarity.
+ // Let the inference graph (i.e., "ModelProto.graph") be
+ // tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d
+ // and the "algorithm" graph be
+ // tensor_d -> Add -> tensor_e
+ // The combination process results
+ // tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d -> Add -> tensor_e
+ //
+ // Notice that an input of a node in the "algorithm" graph may reference the
+ // output of a node in the inference graph (but not the other way round). Also, inference
+ // node cannot reference inputs of "algorithm". With these restrictions, inference graph
+ // can always be run independently without training information.
+ //
+ // By default, this field is an empty graph and its evaluation does not
+ // produce any output. Evaluating the default training step never
+ // update any initializers.
+ optional GraphProto algorithm = 2;
+
+ // This field specifies the bindings from the outputs of "initialization" to
+ // some initializers in "ModelProto.graph.initializer" and
+ // the "algorithm.initializer" in the same TrainingInfoProto.
+ // See "update_binding" below for details.
+ //
+ // By default, this field is empty and no initializer would be changed
+ // by the execution of "initialization".
+ repeated StringStringEntryProto initialization_binding = 3;
+
+ // Gradient-based training is usually an iterative procedure. In one gradient
+ // descent iteration, we apply
+ //
+ // x = x - r * g
+ //
+ // where "x" is the optimized tensor, "r" stands for learning rate, and "g" is
+ // gradient of "x" with respect to a chosen loss. To avoid adding assignments
+ // into the training graph, we split the update equation into
+ //
+ // y = x - r * g
+ // x = y
+ //
+ // The user needs to save "y = x - r * g" into TrainingInfoProto.algorithm. To
+ // tell that "y" should be assigned to "x", the field "update_binding" may
+ // contain a key-value pair of strings, "x" (key of StringStringEntryProto)
+ // and "y" (value of StringStringEntryProto).
+ // For a neural network with multiple trainable (mutable) tensors, there can
+ // be multiple key-value pairs in "update_binding".
+ //
+ // The initializers appears as keys in "update_binding" are considered
+ // mutable variables. This implies some behaviors
+ // as described below.
+ //
+ // 1. We have only unique keys in all "update_binding"s so that two
+ // variables may not have the same name. This ensures that one
+ // variable is assigned up to once.
+ // 2. The keys must appear in names of "ModelProto.graph.initializer" or
+ // "TrainingInfoProto.algorithm.initializer".
+ // 3. The values must be output names of "algorithm" or "ModelProto.graph.output".
+ // 4. Mutable variables are initialized to the value specified by the
+ // corresponding initializer, and then potentially updated by
+ // "initializer_binding"s and "update_binding"s in "TrainingInfoProto"s.
+ //
+ // This field usually contains names of trainable tensors
+ // (in ModelProto.graph), optimizer states such as momentums in advanced
+ // stochastic gradient methods (in TrainingInfoProto.graph),
+ // and number of training iterations (in TrainingInfoProto.graph).
+ //
+ // By default, this field is empty and no initializer would be changed
+ // by the execution of "algorithm".
+ repeated StringStringEntryProto update_binding = 4;
+}
+
// Models
//
// ModelProto is a top-level file/container format for bundling a ML model and
// associating its computation graph with metadata.
//
-// The semantics of the model are described by the associated GraphProto.
+// The semantics of the model are described by the associated GraphProto's.
message ModelProto {
// The version of the IR this model targets. See Version enum above.
// This field MUST be present.
@@ -227,18 +385,58 @@ message ModelProto {
// Named metadata values; keys should be distinct.
repeated StringStringEntryProto metadata_props = 14;
+
+ // Training-specific information. Sequentially executing all stored
+ // `TrainingInfoProto.algorithm`s and assigning their outputs following
+ // the corresponding `TrainingInfoProto.update_binding`s is one training
+ // iteration. Similarly, to initialize the model
+ // (as if training hasn't happened), the user should sequentially execute
+ // all stored `TrainingInfoProto.initialization`s and assigns their outputs
+ // using `TrainingInfoProto.initialization_binding`s.
+ //
+ // If this field is empty, the training behavior of the model is undefined.
+ repeated TrainingInfoProto training_info = 20;
+
+ // A list of function protos local to the model.
+ //
+ // Name of the function "FunctionProto.name" should be unique within the domain "FunctionProto.domain".
+ // In case of any conflicts the behavior (whether the model local functions are given higher priority,
+ // or standard operator sets are given higher priotity or this is treated as error) is defined by
+ // the runtimes.
+ //
+ // The operator sets imported by FunctionProto should be compatible with the ones
+ // imported by ModelProto and other model local FunctionProtos.
+ // Example, if same operator set say 'A' is imported by a FunctionProto and ModelProto
+ // or by 2 FunctionProtos then versions for the operator set may be different but,
+ // the operator schema returned for op_type, domain, version combination
+ // for both the versions should be same for every node in the function body.
+ //
+ // One FunctionProto can reference other FunctionProto in the model, however, recursive reference
+ // is not allowed.
+ repeated FunctionProto functions = 25;
};
// StringStringEntryProto follows the pattern for cross-proto-version maps.
// See https://developers.google.com/protocol-buffers/docs/proto3#maps
message StringStringEntryProto {
optional string key = 1;
- optional string value= 2;
+ optional string value = 2;
};
+message TensorAnnotation {
+ optional string tensor_name = 1;
+ // <key, value> pairs to annotate tensor specified by <tensor_name> above.
+ // The keys used in the mapping below must be pre-defined in ONNX spec.
+ // For example, for 8-bit linear quantization case, 'SCALE_TENSOR', 'ZERO_POINT_TENSOR' will be pre-defined as
+ // quantization parameter keys.
+ repeated StringStringEntryProto quant_parameter_tensor_names = 2;
+}
+
+
+
// Graphs
//
-// A graph defines the computational logic of a model and is comprised of a parameterized
+// A graph defines the computational logic of a model and is comprised of a parameterized
// list of nodes that form a directed acyclic graph based on their inputs and outputs.
// This is the equivalent of the "network" or "graph" in many deep learning
// frameworks.
@@ -250,10 +448,14 @@ message GraphProto {
optional string name = 2; // namespace Graph
// A list of named tensor values, used to specify constant inputs of the graph.
- // Each TensorProto entry must have a distinct name (within the list) that
- // also appears in the input list.
+ // Each initializer (both TensorProto as well SparseTensorProto) MUST have a name.
+ // The name MUST be unique across both initializer and sparse_initializer,
+ // but the name MAY also appear in the input list.
repeated TensorProto initializer = 5;
+ // Initializers (see above) stored in sparse format.
+ repeated SparseTensorProto sparse_initializer = 15;
+
// A human-readable documentation for this graph. Markdown is allowed.
optional string doc_string = 10;
@@ -265,13 +467,14 @@ message GraphProto {
// must be distinct. It is optional for a value to appear in value_info list.
repeated ValueInfoProto value_info = 13;
- // DO NOT USE the following fields, they were deprecated from earlier versions.
- // repeated string input = 3;
- // repeated string output = 4;
- // optional int64 ir_version = 6;
- // optional int64 producer_version = 7;
- // optional string producer_tag = 8;
- // optional string domain = 9;
+ // This field carries information to indicate the mapping among a tensor and its
+ // quantization parameter tensors. For example:
+ // For tensor 'a', it may have {'SCALE_TENSOR', 'a_scale'} and {'ZERO_POINT_TENSOR', 'a_zero_point'} annotated,
+ // which means, tensor 'a_scale' and tensor 'a_zero_point' are scale and zero point of tensor 'a' in the model.
+ repeated TensorAnnotation quantization_annotation = 14;
+
+ reserved 3, 4, 6 to 9;
+ reserved "ir_version", "producer_version", "producer_tag", "domain";
}
// Tensors
@@ -291,13 +494,32 @@ message TensorProto {
STRING = 8; // string
BOOL = 9; // bool
- // Advanced types
+ // IEEE754 half-precision floating-point format (16 bits wide).
+ // This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
FLOAT16 = 10;
+
DOUBLE = 11;
UINT32 = 12;
UINT64 = 13;
COMPLEX64 = 14; // complex with float32 real and imaginary components
COMPLEX128 = 15; // complex with float64 real and imaginary components
+
+ // Non-IEEE floating-point format based on IEEE754 single-precision
+ // floating-point number truncated to 16 bits.
+ // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
+ BFLOAT16 = 16;
+
+ // Non-IEEE floating-point format based on papers
+ // FP8 Formats for Deep Learning, https://arxiv.org/abs/2209.05433,
+ // 8-bit Numerical Formats For Deep Neural Networks, https://arxiv.org/pdf/2206.02915.pdf.
+ // Operators supported FP8 are Cast, CastLike, QuantizeLinear, DequantizeLinear.
+ // The computation usually happens inside a block quantize / dequantize
+ // fused by the runtime.
+ FLOAT8E4M3FN = 17; // float 8, mostly used for coefficients, supports nan, not inf
+ FLOAT8E4M3FNUZ = 18; // float 8, mostly used for coefficients, supports nan, not inf, no negative zero
+ FLOAT8E5M2 = 19; // follows IEEE 754, supports nan, inf, mostly used for gradients
+ FLOAT8E5M2FNUZ = 20; // follows IEEE 754, supports nan, inf, mostly used for gradients, no negative zero
+
// Future extensions go here.
}
@@ -305,7 +527,8 @@ message TensorProto {
repeated int64 dims = 1;
// The data type of the tensor.
- optional DataType data_type = 2;
+ // This field MUST have a valid TensorProto.DataType value
+ optional int32 data_type = 2;
// For very large tensors, we may want to store them in chunks, in which
// case the following fields will specify the segment that is stored in
@@ -324,17 +547,17 @@ message TensorProto {
// For float and complex64 values
// Complex64 tensors are encoded as a single array of floats,
// with the real components appearing in odd numbered positions,
- // and the corresponding imaginary component apparing in the
+ // and the corresponding imaginary component appearing in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
repeated float float_data = 4 [packed = true];
- // For int32, uint8, int8, uint16, int16, bool, and float16 values
- // float16 values must be bit-wise converted to an uint16_t prior
+ // For int32, uint8, int8, uint16, int16, bool, float8, and float16 values
+ // float16 and float8 values must be bit-wise converted to an uint16_t prior
// to writing to the buffer.
// When this field is present, the data_type field MUST be
- // INT32, INT16, INT8, UINT16, INT8, BOOL, or FLOAT32
+ // INT32, INT16, INT8, UINT16, UINT8, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
repeated int32 int32_data = 5 [packed = true];
// For strings.
@@ -371,10 +594,32 @@ message TensorProto {
// When this field is present, the data_type field MUST NOT be STRING or UNDEFINED
optional bytes raw_data = 9;
+ // Data can be stored inside the protobuf file using type-specific fields or raw_data.
+ // Alternatively, raw bytes data can be stored in an external file, using the external_data field.
+ // external_data stores key-value pairs describing data location. Recognized keys are:
+ // - "location" (required) - POSIX filesystem path relative to the directory where the ONNX
+ // protobuf model was stored
+ // - "offset" (optional) - position of byte at which stored data begins. Integer stored as string.
+ // Offset values SHOULD be multiples 4096 (page size) to enable mmap support.
+ // - "length" (optional) - number of bytes containing data. Integer stored as string.
+ // - "checksum" (optional) - SHA1 digest of file specified in under 'location' key.
+ repeated StringStringEntryProto external_data = 13;
+
+ // Location of the data for this tensor. MUST be one of:
+ // - DEFAULT - data stored inside the protobuf message. Data is stored in raw_data (if set) otherwise in type-specified field.
+ // - EXTERNAL - data stored in an external location as described by external_data field.
+ enum DataLocation {
+ DEFAULT = 0;
+ EXTERNAL = 1;
+ }
+
+ // If value not set, data is stored in raw_data (if set) otherwise in type-specified field.
+ optional DataLocation data_location = 14;
+
// For double
- // Complex64 tensors are encoded as a single array of doubles,
+ // Complex128 tensors are encoded as a single array of doubles,
// with the real components appearing in odd numbered positions,
- // and the corresponding imaginary component apparing in the
+ // and the corresponding imaginary component appearing in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be DOUBLE or COMPLEX128
@@ -386,6 +631,30 @@ message TensorProto {
repeated uint64 uint64_data = 11 [packed = true];
}
+// A serialized sparse-tensor value
+message SparseTensorProto {
+ // The sequence of non-default values are encoded as a tensor of shape [NNZ].
+ // The default-value is zero for numeric tensors, and empty-string for string tensors.
+ // values must have a non-empty name present which serves as a name for SparseTensorProto
+ // when used in sparse_initializer list.
+ optional TensorProto values = 1;
+
+ // The indices of the non-default values, which may be stored in one of two formats.
+ // (a) Indices can be a tensor of shape [NNZ, rank] with the [i,j]-th value
+ // corresponding to the j-th index of the i-th value (in the values tensor).
+ // (b) Indices can be a tensor of shape [NNZ], in which case the i-th value
+ // must be the linearized-index of the i-th value (in the values tensor).
+ // The linearized-index can be converted into an index tuple (k_1,...,k_rank)
+ // using the shape provided below.
+ // The indices must appear in ascending order without duplication.
+ // In the first format, the ordering is lexicographic-ordering:
+ // e.g., index-value [1,4] must appear before [2,1]
+ optional TensorProto indices = 2;
+
+ // The shape of the underlying dense-tensor: [dim_1, dim_2, ... dim_rank]
+ repeated int64 dims = 3;
+}
+
// Defines a tensor shape. A dimension can be either an integer value
// or a symbolic variable. A symbolic variable represents an unknown
// dimension.
@@ -398,36 +667,13 @@ message TensorShapeProto {
// Standard denotation can optionally be used to denote tensor
// dimensions with standard semantic descriptions to ensure
// that operations are applied to the correct axis of a tensor.
+ // Refer to https://github.com/onnx/onnx/blob/main/docs/DimensionDenotation.md#denotation-definition
+ // for pre-defined dimension denotations.
optional string denotation = 3;
};
repeated Dimension dim = 1;
}
-// A set of pre-defined constants to be used as values for
-// the standard denotation field in TensorShapeProto.Dimension
-// for semantic description of the tensor dimension.
-message DenotationConstProto {
- // Describe a batch number dimension.
- optional string DATA_BATCH = 1 [default = "DATA_BATCH"];
- // Describe a channel dimension.
- optional string DATA_CHANNEL = 2 [default = "DATA_CHANNEL"];
- // Describe a time dimension.
- optional string DATA_TIME = 3 [default = "DATA_TIME"];
- // Describe a feature dimension. This is typically a feature
- // dimension in RNN and/or spatial dimension in CNN.
- optional string DATA_FEATURE = 4 [default = "DATA_FEATURE"];
- // Describe a filter in-channel dimension. This is the dimension
- // that is identical (in size) to the channel dimension of the input
- // image feature maps.
- optional string FILTER_IN_CHANNEL = 5 [default = "FILTER_IN_CHANNEL"];
- // Describe a filter out channel dimension. This is the dimension
- // that is identical (int size) to the channel dimension of the output
- // image feature maps.
- optional string FILTER_OUT_CHANNEL = 6 [default = "FILTER_OUT_CHANNEL"];
- // Describe a filter spatial dimension.
- optional string FILTER_SPATIAL = 7 [default = "FILTER_SPATIAL"];
-}
-
// Types
//
// The standard ONNX data types.
@@ -435,8 +681,43 @@ message TypeProto {
message Tensor {
// This field MUST NOT have the value of UNDEFINED
+ // This field MUST have a valid TensorProto.DataType value
+ // This field MUST be present for this version of the IR.
+ optional int32 elem_type = 1;
+ optional TensorShapeProto shape = 2;
+ }
+
+ // repeated T
+ message Sequence {
+ // The type and optional shape of each element of the sequence.
+ // This field MUST be present for this version of the IR.
+ optional TypeProto elem_type = 1;
+ };
+
+ // map<K,V>
+ message Map {
+ // This field MUST have a valid TensorProto.DataType value
+ // This field MUST be present for this version of the IR.
+ // This field MUST refer to an integral type ([U]INT{8|16|32|64}) or STRING
+ optional int32 key_type = 1;
+ // This field MUST be present for this version of the IR.
+ optional TypeProto value_type = 2;
+ };
+
+ // wrapper for Tensor, Sequence, or Map
+ message Optional {
+ // The type and optional shape of the element wrapped.
+ // This field MUST be present for this version of the IR.
+ // Possible values correspond to OptionalProto.DataType enum
+ optional TypeProto elem_type = 1;
+ };
+
+
+ message SparseTensor {
+ // This field MUST NOT have the value of UNDEFINED
+ // This field MUST have a valid TensorProto.DataType value
// This field MUST be present for this version of the IR.
- optional TensorProto.DataType elem_type = 1;
+ optional int32 elem_type = 1;
optional TensorShapeProto shape = 2;
}
@@ -445,7 +726,31 @@ message TypeProto {
// The type of a tensor.
Tensor tensor_type = 1;
+ // NOTE: DNN-only implementations of ONNX MAY elect to not support non-tensor values
+ // as input and output to graphs and nodes. These types are needed to naturally
+ // support classical ML operators. DNN operators SHOULD restrict their input
+ // and output types to tensors.
+
+ // The type of a sequence.
+ Sequence sequence_type = 4;
+
+ // The type of a map.
+ Map map_type = 5;
+
+ // The type of an optional.
+ Optional optional_type = 9;
+
+
+ // Type of the sparse tensor
+ SparseTensor sparse_tensor_type = 8;
+
}
+
+ // An optional denotation can be used to denote the whole
+ // type with a standard semantic description as to what is
+ // stored inside. Refer to https://github.com/onnx/onnx/blob/main/docs/TypeDenotation.md#type-denotation-definition
+ // for pre-defined type denotations.
+ optional string denotation = 6;
}
// Operator Sets
@@ -461,4 +766,70 @@ message OperatorSetIdProto {
// The version of the operator set being identified.
// This field MUST be present in this version of the IR.
optional int64 version = 2;
-} \ No newline at end of file
+}
+
+// Operator/function status.
+enum OperatorStatus {
+ EXPERIMENTAL = 0;
+ STABLE = 1;
+}
+
+message FunctionProto {
+ // The name of the function, similar usage of op_type in OperatorProto.
+ // Combined with FunctionProto.domain, this forms the unique identity of
+ // the FunctionProto.
+ optional string name = 1;
+
+ // Deprecated since IR Version 8
+ // optional int64 since_version = 2;
+ reserved 2;
+ reserved "since_version";
+
+ // Deprecated since IR Version 8
+ // optional OperatorStatus status = 3;
+ reserved 3;
+ reserved "status";
+
+ // The inputs and outputs of the function.
+ repeated string input = 4;
+ repeated string output = 5;
+
+ // The attribute parameters of the function.
+ // It is for function parameters without default values.
+ repeated string attribute = 6;
+
+ // The attribute protos of the function.
+ // It is for function attributes with default values.
+ // A function attribute shall be represented either as
+ // a string attribute or an AttributeProto, not both.
+ repeated AttributeProto attribute_proto = 11;
+
+ // The nodes in the function.
+ repeated NodeProto node = 7;
+ // A human-readable documentation for this function. Markdown is allowed.
+ optional string doc_string = 8;
+
+ // The OperatorSets this function body (graph) relies on.
+ //
+ // All nodes in the function body (graph) will bind against the operator
+ // with the same-domain/same-op_type operator with the HIGHEST version
+ // in the referenced operator sets. This means at most one version can be relied
+ // for one domain.
+ //
+ // The operator sets imported by FunctionProto should be compatible with the ones
+ // imported by ModelProto. Example, if same operator set say 'A' is imported by FunctionProto
+ // and ModelProto then versions for the operator set may be different but,
+ // the operator schema returned for op_type, domain, version combination
+ // for both the versions should be same.
+
+ repeated OperatorSetIdProto opset_import = 9;
+
+ // The domain which this function belongs to. Combined with FunctionProto.name, this forms the unique identity of
+ // the FunctionProto.
+ optional string domain = 10;
+}
+
+
+// For using protobuf-lite
+option optimize_for = LITE_RUNTIME;
+
diff --git a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/onnx/OnnxOperationsTestCase.java b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/onnx/OnnxOperationsTestCase.java
index 3ef96cdf166..2b707c3beb3 100644
--- a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/onnx/OnnxOperationsTestCase.java
+++ b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/onnx/OnnxOperationsTestCase.java
@@ -775,10 +775,10 @@ public class OnnxOperationsTestCase {
Onnx.TensorProto.Builder builder = Onnx.TensorProto.newBuilder();
tensor.type().dimensions().forEach(d -> builder.addDims(d.size().get()));
if (tensor.type().valueType() == TensorType.Value.FLOAT) {
- builder.setDataType(Onnx.TensorProto.DataType.FLOAT);
+ builder.setDataType(Onnx.TensorProto.DataType.FLOAT_VALUE);
tensor.valueIterator().forEachRemaining(d -> builder.addFloatData(d.floatValue()));
} else {
- builder.setDataType(Onnx.TensorProto.DataType.DOUBLE);
+ builder.setDataType(Onnx.TensorProto.DataType.DOUBLE_VALUE);
tensor.valueIterator().forEachRemaining(builder::addDoubleData);
}
Onnx.TensorProto val = builder.build();