diff options
Diffstat (limited to 'model-integration')
4 files changed, 453 insertions, 80 deletions
diff --git a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TensorConverter.java b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TensorConverter.java index f12f60dcc8e..f690b8e8c8a 100644 --- a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TensorConverter.java +++ b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TensorConverter.java @@ -32,8 +32,9 @@ class TensorConverter { } private static Values readValuesOf(Onnx.TensorProto tensorProto) { + var elemType = Onnx.TensorProto.DataType.forNumber(tensorProto.getDataType()); if (tensorProto.hasRawData()) { - switch (tensorProto.getDataType()) { + switch (elemType) { case BOOL: return new RawBoolValues(tensorProto); case FLOAT: return new RawFloatValues(tensorProto); case DOUBLE: return new RawDoubleValues(tensorProto); @@ -41,7 +42,7 @@ class TensorConverter { case INT64: return new RawLongValues(tensorProto); } } else { - switch (tensorProto.getDataType()) { + switch (elemType) { case FLOAT: return new FloatValues(tensorProto); case DOUBLE: return new DoubleValues(tensorProto); case INT32: return new IntValues(tensorProto); diff --git a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TypeConverter.java b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TypeConverter.java index 35ec1d8c54a..deac950d324 100644 --- a/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TypeConverter.java +++ b/model-integration/src/main/java/ai/vespa/rankingexpression/importer/onnx/TypeConverter.java @@ -37,7 +37,8 @@ class TypeConverter { static OrderedTensorType typeFrom(Onnx.TypeProto type) { String dimensionPrefix = "d"; // standard naming convention: d0, d1, ... Onnx.TensorShapeProto shape = type.getTensorType().getShape(); - OrderedTensorType.Builder builder = new OrderedTensorType.Builder(toValueType(type.getTensorType().getElemType())); + var elemType = Onnx.TensorProto.DataType.forNumber(type.getTensorType().getElemType()); + OrderedTensorType.Builder builder = new OrderedTensorType.Builder(toValueType(elemType)); for (int i = 0; i < shape.getDimCount(); ++ i) { String dimensionName = dimensionPrefix + i; Onnx.TensorShapeProto.Dimension onnxDimension = shape.getDim(i); @@ -52,8 +53,8 @@ class TypeConverter { } static OrderedTensorType typeFrom(Onnx.TensorProto tensor) { - return OrderedTensorType.fromDimensionList(toValueType(tensor.getDataType()), - tensor.getDimsList()); + var elemType = Onnx.TensorProto.DataType.forNumber(tensor.getDataType()); + return OrderedTensorType.fromDimensionList(toValueType(elemType), tensor.getDimsList()); } private static TensorType.Value toValueType(Onnx.TensorProto.DataType dataType) { diff --git a/model-integration/src/main/protobuf/onnx.proto b/model-integration/src/main/protobuf/onnx.proto index dc6542867e0..1d265ae9f28 100644 --- a/model-integration/src/main/protobuf/onnx.proto +++ b/model-integration/src/main/protobuf/onnx.proto @@ -3,8 +3,8 @@ // -// Copyright (c) Facebook Inc. and Microsoft Corporation. -// Licensed under the MIT license. +// SPDX-License-Identifier: Apache-2.0 + syntax = "proto2"; @@ -20,23 +20,16 @@ package onnx; // // This document describes the syntax of models and their computation graphs, // as well as the standard data types. Together, they are referred to as the ONNX -// Intermediate Representation, or 'IR' for short. +// Intermediate Representation, or 'IR' for short. // // The normative semantic specification of the ONNX IR is found in docs/IR.md. // Definitions of the built-in neural network operators may be found in docs/Operators.md. // Notes // -// Release -// -// We are still in the very early stage of defining ONNX. The current -// version of ONNX is a starting point. While we are actively working -// towards a complete spec, we would like to get the community involved -// by sharing our working version of ONNX. -// // Protobuf compatibility -// -// To simplify framework compatibility, ONNX is defined using the subset of protobuf +// +// To simplify framework compatibility, ONNX is defined using the subset of protobuf // that is compatible with both protobuf v2 and v3. This means that we do not use any // protobuf features that are only available in one of the two versions. // @@ -60,22 +53,60 @@ enum Version { _START_VERSION = 0; // The version field is always serialized and we will use it to store the // version that the graph is generated from. This helps us set up version - // control. We should use version as - // xx(major) - xx(minor) - xxxx(bugfix) - // and we are starting with 0x00000001 (0.0.1), which was the - // version we published on Oct 10, 2017. - IR_VERSION_2017_10_10 = 0x00000001; + // control. + // For the IR, we are using simple numbers starting with 0x00000001, + // which was the version we published on Oct 10, 2017. + IR_VERSION_2017_10_10 = 0x0000000000000001; - // IR_VERSION 0.0.2 published on Oct 30, 2017 + // IR_VERSION 2 published on Oct 30, 2017 // - Added type discriminator to AttributeProto to support proto3 users - IR_VERSION_2017_10_30 = 0x00000002; + IR_VERSION_2017_10_30 = 0x0000000000000002; - // IR VERSION 0.0.3 published on Nov 3, 2017 + // IR VERSION 3 published on Nov 3, 2017 // - For operator versioning: // - Added new message OperatorSetIdProto // - Added opset_import in ModelProto // - For vendor extensions, added domain in NodeProto - IR_VERSION = 0x00000003; + IR_VERSION_2017_11_3 = 0x0000000000000003; + + // IR VERSION 4 published on Jan 22, 2019 + // - Relax constraint that initializers should be a subset of graph inputs + // - Add type BFLOAT16 + IR_VERSION_2019_1_22 = 0x0000000000000004; + + // IR VERSION 5 published on March 18, 2019 + // - Add message TensorAnnotation. + // - Add quantization annotation in GraphProto to map tensor with its scale and zero point quantization parameters. + IR_VERSION_2019_3_18 = 0x0000000000000005; + + // IR VERSION 6 published on Sep 19, 2019 + // - Add support for sparse tensor constants stored in model. + // - Add message SparseTensorProto + // - Add sparse initializers + IR_VERSION_2019_9_19 = 0x0000000000000006; + + // IR VERSION 7 published on May 8, 2020 + // - Add support to allow function body graph to rely on multiple external opreator sets. + // - Add a list to promote inference graph's initializers to global and + // mutable variables. Global variables are visible in all graphs of the + // stored models. + // - Add message TrainingInfoProto to store initialization + // method and training algorithm. The execution of TrainingInfoProto + // can modify the values of mutable variables. + // - Implicitly add inference graph into each TrainingInfoProto's algorithm. + IR_VERSION_2020_5_8 = 0x0000000000000007; + + // IR VERSION 8 published on July 30, 2021 + // Introduce TypeProto.SparseTensor + // Introduce TypeProto.Optional + // Added a list of FunctionProtos local to the model + // Deprecated since_version and operator status from FunctionProto + IR_VERSION_2021_7_30 = 0x0000000000000008; + + // IR VERSION 9 published on May 5, 2023 + // Added AttributeProto to FunctionProto so that default attribute values can be set. + // Added FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ. + IR_VERSION = 0x0000000000000009; } // Attributes @@ -95,17 +126,21 @@ message AttributeProto { STRING = 3; TENSOR = 4; GRAPH = 5; + SPARSE_TENSOR = 11; + TYPE_PROTO = 13; FLOATS = 6; INTS = 7; STRINGS = 8; TENSORS = 9; GRAPHS = 10; + SPARSE_TENSORS = 12; + TYPE_PROTOS = 14; } // The name field MUST be present for this version of the IR. optional string name = 1; // namespace Attribute - + // if ref_attr_name is not empty, ref_attr_name is the attribute name in parent function. // In this case, this AttributeProto does not contain data, and it's a reference of attribute // in parent scope. @@ -117,10 +152,10 @@ message AttributeProto { // The type field MUST be present for this version of the IR. // For 0.0.1 versions of the IR, this field was not defined, and - // implementations needed to use has_field hueristics to determine + // implementations needed to use has_field heuristics to determine // which value field was in use. For IR_VERSION 0.0.2 or later, this // field MUST be set and match the f|i|s|t|... field in use. This - // change was made to accomodate proto3 implementations. + // change was made to accommodate proto3 implementations. optional AttributeType type = 20; // discriminator that indicates which field below is in use // Exactly ONE of the following fields must be present for this version of the IR @@ -129,14 +164,18 @@ message AttributeProto { optional bytes s = 4; // UTF-8 string optional TensorProto t = 5; // tensor value optional GraphProto g = 6; // graph + optional SparseTensorProto sparse_tensor = 22; // sparse tensor value // Do not use field below, it's deprecated. // optional ValueProto v = 12; // value - subsumes everything but graph + optional TypeProto tp = 14; // type proto repeated float floats = 7; // list of floats repeated int64 ints = 8; // list of ints repeated bytes strings = 9; // list of UTF-8 strings repeated TensorProto tensors = 10; // list of tensors repeated GraphProto graphs = 11; // list of graph + repeated SparseTensorProto sparse_tensors = 23; // list of sparse tensors + repeated TypeProto type_protos = 15;// list of type protos } // Defines information on value, including the name, the type, and @@ -144,7 +183,8 @@ message AttributeProto { message ValueInfoProto { // This field MUST be present in this version of the IR. optional string name = 1; // namespace Value - // This field MUST be present in this version of the IR. + // This field MUST be present in this version of the IR for + // inputs and outputs of the top-level graph. optional TypeProto type = 2; // A human-readable documentation for this value. Markdown is allowed. optional string doc_string = 3; @@ -155,7 +195,7 @@ message ValueInfoProto { // Computation graphs are made up of a DAG of nodes, which represent what is // commonly called a "layer" or "pipeline stage" in machine learning frameworks. // -// For example, it can be a node of type "Conv" that takes in an image, a filter +// For example, it can be a node of type "Conv" that takes in an image, a filter // tensor and a bias tensor, and produces the convolved output. message NodeProto { repeated string input = 1; // namespace Value @@ -177,12 +217,130 @@ message NodeProto { optional string doc_string = 6; } +// Training information +// TrainingInfoProto stores information for training a model. +// In particular, this defines two functionalities: an initialization-step +// and a training-algorithm-step. Initialization resets the model +// back to its original state as if no training has been performed. +// Training algorithm improves the model based on input data. +// +// The semantics of the initialization-step is that the initializers +// in ModelProto.graph and in TrainingInfoProto.algorithm are first +// initialized as specified by the initializers in the graph, and then +// updated by the "initialization_binding" in every instance in +// ModelProto.training_info. +// +// The field "algorithm" defines a computation graph which represents a +// training algorithm's step. After the execution of a +// TrainingInfoProto.algorithm, the initializers specified by "update_binding" +// may be immediately updated. If the targeted training algorithm contains +// consecutive update steps (such as block coordinate descent methods), +// the user needs to create a TrainingInfoProto for each step. +message TrainingInfoProto { + // This field describes a graph to compute the initial tensors + // upon starting the training process. Initialization graph has no input + // and can have multiple outputs. Usually, trainable tensors in neural + // networks are randomly initialized. To achieve that, for each tensor, + // the user can put a random number operator such as RandomNormal or + // RandomUniform in TrainingInfoProto.initialization.node and assign its + // random output to the specific tensor using "initialization_binding". + // This graph can also set the initializers in "algorithm" in the same + // TrainingInfoProto; a use case is resetting the number of training + // iteration to zero. + // + // By default, this field is an empty graph and its evaluation does not + // produce any output. Thus, no initializer would be changed by default. + optional GraphProto initialization = 1; + + // This field represents a training algorithm step. Given required inputs, + // it computes outputs to update initializers in its own or inference graph's + // initializer lists. In general, this field contains loss node, gradient node, + // optimizer node, increment of iteration count. + // + // An execution of the training algorithm step is performed by executing the + // graph obtained by combining the inference graph (namely "ModelProto.graph") + // and the "algorithm" graph. That is, the actual + // input/initializer/output/node/value_info/sparse_initializer list of + // the training graph is the concatenation of + // "ModelProto.graph.input/initializer/output/node/value_info/sparse_initializer" + // and "algorithm.input/initializer/output/node/value_info/sparse_initializer" + // in that order. This combined graph must satisfy the normal ONNX conditions. + // Now, let's provide a visualization of graph combination for clarity. + // Let the inference graph (i.e., "ModelProto.graph") be + // tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d + // and the "algorithm" graph be + // tensor_d -> Add -> tensor_e + // The combination process results + // tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d -> Add -> tensor_e + // + // Notice that an input of a node in the "algorithm" graph may reference the + // output of a node in the inference graph (but not the other way round). Also, inference + // node cannot reference inputs of "algorithm". With these restrictions, inference graph + // can always be run independently without training information. + // + // By default, this field is an empty graph and its evaluation does not + // produce any output. Evaluating the default training step never + // update any initializers. + optional GraphProto algorithm = 2; + + // This field specifies the bindings from the outputs of "initialization" to + // some initializers in "ModelProto.graph.initializer" and + // the "algorithm.initializer" in the same TrainingInfoProto. + // See "update_binding" below for details. + // + // By default, this field is empty and no initializer would be changed + // by the execution of "initialization". + repeated StringStringEntryProto initialization_binding = 3; + + // Gradient-based training is usually an iterative procedure. In one gradient + // descent iteration, we apply + // + // x = x - r * g + // + // where "x" is the optimized tensor, "r" stands for learning rate, and "g" is + // gradient of "x" with respect to a chosen loss. To avoid adding assignments + // into the training graph, we split the update equation into + // + // y = x - r * g + // x = y + // + // The user needs to save "y = x - r * g" into TrainingInfoProto.algorithm. To + // tell that "y" should be assigned to "x", the field "update_binding" may + // contain a key-value pair of strings, "x" (key of StringStringEntryProto) + // and "y" (value of StringStringEntryProto). + // For a neural network with multiple trainable (mutable) tensors, there can + // be multiple key-value pairs in "update_binding". + // + // The initializers appears as keys in "update_binding" are considered + // mutable variables. This implies some behaviors + // as described below. + // + // 1. We have only unique keys in all "update_binding"s so that two + // variables may not have the same name. This ensures that one + // variable is assigned up to once. + // 2. The keys must appear in names of "ModelProto.graph.initializer" or + // "TrainingInfoProto.algorithm.initializer". + // 3. The values must be output names of "algorithm" or "ModelProto.graph.output". + // 4. Mutable variables are initialized to the value specified by the + // corresponding initializer, and then potentially updated by + // "initializer_binding"s and "update_binding"s in "TrainingInfoProto"s. + // + // This field usually contains names of trainable tensors + // (in ModelProto.graph), optimizer states such as momentums in advanced + // stochastic gradient methods (in TrainingInfoProto.graph), + // and number of training iterations (in TrainingInfoProto.graph). + // + // By default, this field is empty and no initializer would be changed + // by the execution of "algorithm". + repeated StringStringEntryProto update_binding = 4; +} + // Models // // ModelProto is a top-level file/container format for bundling a ML model and // associating its computation graph with metadata. // -// The semantics of the model are described by the associated GraphProto. +// The semantics of the model are described by the associated GraphProto's. message ModelProto { // The version of the IR this model targets. See Version enum above. // This field MUST be present. @@ -227,18 +385,58 @@ message ModelProto { // Named metadata values; keys should be distinct. repeated StringStringEntryProto metadata_props = 14; + + // Training-specific information. Sequentially executing all stored + // `TrainingInfoProto.algorithm`s and assigning their outputs following + // the corresponding `TrainingInfoProto.update_binding`s is one training + // iteration. Similarly, to initialize the model + // (as if training hasn't happened), the user should sequentially execute + // all stored `TrainingInfoProto.initialization`s and assigns their outputs + // using `TrainingInfoProto.initialization_binding`s. + // + // If this field is empty, the training behavior of the model is undefined. + repeated TrainingInfoProto training_info = 20; + + // A list of function protos local to the model. + // + // Name of the function "FunctionProto.name" should be unique within the domain "FunctionProto.domain". + // In case of any conflicts the behavior (whether the model local functions are given higher priority, + // or standard operator sets are given higher priotity or this is treated as error) is defined by + // the runtimes. + // + // The operator sets imported by FunctionProto should be compatible with the ones + // imported by ModelProto and other model local FunctionProtos. + // Example, if same operator set say 'A' is imported by a FunctionProto and ModelProto + // or by 2 FunctionProtos then versions for the operator set may be different but, + // the operator schema returned for op_type, domain, version combination + // for both the versions should be same for every node in the function body. + // + // One FunctionProto can reference other FunctionProto in the model, however, recursive reference + // is not allowed. + repeated FunctionProto functions = 25; }; // StringStringEntryProto follows the pattern for cross-proto-version maps. // See https://developers.google.com/protocol-buffers/docs/proto3#maps message StringStringEntryProto { optional string key = 1; - optional string value= 2; + optional string value = 2; }; +message TensorAnnotation { + optional string tensor_name = 1; + // <key, value> pairs to annotate tensor specified by <tensor_name> above. + // The keys used in the mapping below must be pre-defined in ONNX spec. + // For example, for 8-bit linear quantization case, 'SCALE_TENSOR', 'ZERO_POINT_TENSOR' will be pre-defined as + // quantization parameter keys. + repeated StringStringEntryProto quant_parameter_tensor_names = 2; +} + + + // Graphs // -// A graph defines the computational logic of a model and is comprised of a parameterized +// A graph defines the computational logic of a model and is comprised of a parameterized // list of nodes that form a directed acyclic graph based on their inputs and outputs. // This is the equivalent of the "network" or "graph" in many deep learning // frameworks. @@ -250,10 +448,14 @@ message GraphProto { optional string name = 2; // namespace Graph // A list of named tensor values, used to specify constant inputs of the graph. - // Each TensorProto entry must have a distinct name (within the list) that - // also appears in the input list. + // Each initializer (both TensorProto as well SparseTensorProto) MUST have a name. + // The name MUST be unique across both initializer and sparse_initializer, + // but the name MAY also appear in the input list. repeated TensorProto initializer = 5; + // Initializers (see above) stored in sparse format. + repeated SparseTensorProto sparse_initializer = 15; + // A human-readable documentation for this graph. Markdown is allowed. optional string doc_string = 10; @@ -265,13 +467,14 @@ message GraphProto { // must be distinct. It is optional for a value to appear in value_info list. repeated ValueInfoProto value_info = 13; - // DO NOT USE the following fields, they were deprecated from earlier versions. - // repeated string input = 3; - // repeated string output = 4; - // optional int64 ir_version = 6; - // optional int64 producer_version = 7; - // optional string producer_tag = 8; - // optional string domain = 9; + // This field carries information to indicate the mapping among a tensor and its + // quantization parameter tensors. For example: + // For tensor 'a', it may have {'SCALE_TENSOR', 'a_scale'} and {'ZERO_POINT_TENSOR', 'a_zero_point'} annotated, + // which means, tensor 'a_scale' and tensor 'a_zero_point' are scale and zero point of tensor 'a' in the model. + repeated TensorAnnotation quantization_annotation = 14; + + reserved 3, 4, 6 to 9; + reserved "ir_version", "producer_version", "producer_tag", "domain"; } // Tensors @@ -291,13 +494,32 @@ message TensorProto { STRING = 8; // string BOOL = 9; // bool - // Advanced types + // IEEE754 half-precision floating-point format (16 bits wide). + // This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits. FLOAT16 = 10; + DOUBLE = 11; UINT32 = 12; UINT64 = 13; COMPLEX64 = 14; // complex with float32 real and imaginary components COMPLEX128 = 15; // complex with float64 real and imaginary components + + // Non-IEEE floating-point format based on IEEE754 single-precision + // floating-point number truncated to 16 bits. + // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits. + BFLOAT16 = 16; + + // Non-IEEE floating-point format based on papers + // FP8 Formats for Deep Learning, https://arxiv.org/abs/2209.05433, + // 8-bit Numerical Formats For Deep Neural Networks, https://arxiv.org/pdf/2206.02915.pdf. + // Operators supported FP8 are Cast, CastLike, QuantizeLinear, DequantizeLinear. + // The computation usually happens inside a block quantize / dequantize + // fused by the runtime. + FLOAT8E4M3FN = 17; // float 8, mostly used for coefficients, supports nan, not inf + FLOAT8E4M3FNUZ = 18; // float 8, mostly used for coefficients, supports nan, not inf, no negative zero + FLOAT8E5M2 = 19; // follows IEEE 754, supports nan, inf, mostly used for gradients + FLOAT8E5M2FNUZ = 20; // follows IEEE 754, supports nan, inf, mostly used for gradients, no negative zero + // Future extensions go here. } @@ -305,7 +527,8 @@ message TensorProto { repeated int64 dims = 1; // The data type of the tensor. - optional DataType data_type = 2; + // This field MUST have a valid TensorProto.DataType value + optional int32 data_type = 2; // For very large tensors, we may want to store them in chunks, in which // case the following fields will specify the segment that is stored in @@ -324,17 +547,17 @@ message TensorProto { // For float and complex64 values // Complex64 tensors are encoded as a single array of floats, // with the real components appearing in odd numbered positions, - // and the corresponding imaginary component apparing in the + // and the corresponding imaginary component appearing in the // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i] // is encoded as [1.0, 2.0 ,3.0 ,4.0] // When this field is present, the data_type field MUST be FLOAT or COMPLEX64. repeated float float_data = 4 [packed = true]; - // For int32, uint8, int8, uint16, int16, bool, and float16 values - // float16 values must be bit-wise converted to an uint16_t prior + // For int32, uint8, int8, uint16, int16, bool, float8, and float16 values + // float16 and float8 values must be bit-wise converted to an uint16_t prior // to writing to the buffer. // When this field is present, the data_type field MUST be - // INT32, INT16, INT8, UINT16, INT8, BOOL, or FLOAT32 + // INT32, INT16, INT8, UINT16, UINT8, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ repeated int32 int32_data = 5 [packed = true]; // For strings. @@ -371,10 +594,32 @@ message TensorProto { // When this field is present, the data_type field MUST NOT be STRING or UNDEFINED optional bytes raw_data = 9; + // Data can be stored inside the protobuf file using type-specific fields or raw_data. + // Alternatively, raw bytes data can be stored in an external file, using the external_data field. + // external_data stores key-value pairs describing data location. Recognized keys are: + // - "location" (required) - POSIX filesystem path relative to the directory where the ONNX + // protobuf model was stored + // - "offset" (optional) - position of byte at which stored data begins. Integer stored as string. + // Offset values SHOULD be multiples 4096 (page size) to enable mmap support. + // - "length" (optional) - number of bytes containing data. Integer stored as string. + // - "checksum" (optional) - SHA1 digest of file specified in under 'location' key. + repeated StringStringEntryProto external_data = 13; + + // Location of the data for this tensor. MUST be one of: + // - DEFAULT - data stored inside the protobuf message. Data is stored in raw_data (if set) otherwise in type-specified field. + // - EXTERNAL - data stored in an external location as described by external_data field. + enum DataLocation { + DEFAULT = 0; + EXTERNAL = 1; + } + + // If value not set, data is stored in raw_data (if set) otherwise in type-specified field. + optional DataLocation data_location = 14; + // For double - // Complex64 tensors are encoded as a single array of doubles, + // Complex128 tensors are encoded as a single array of doubles, // with the real components appearing in odd numbered positions, - // and the corresponding imaginary component apparing in the + // and the corresponding imaginary component appearing in the // subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i] // is encoded as [1.0, 2.0 ,3.0 ,4.0] // When this field is present, the data_type field MUST be DOUBLE or COMPLEX128 @@ -386,6 +631,30 @@ message TensorProto { repeated uint64 uint64_data = 11 [packed = true]; } +// A serialized sparse-tensor value +message SparseTensorProto { + // The sequence of non-default values are encoded as a tensor of shape [NNZ]. + // The default-value is zero for numeric tensors, and empty-string for string tensors. + // values must have a non-empty name present which serves as a name for SparseTensorProto + // when used in sparse_initializer list. + optional TensorProto values = 1; + + // The indices of the non-default values, which may be stored in one of two formats. + // (a) Indices can be a tensor of shape [NNZ, rank] with the [i,j]-th value + // corresponding to the j-th index of the i-th value (in the values tensor). + // (b) Indices can be a tensor of shape [NNZ], in which case the i-th value + // must be the linearized-index of the i-th value (in the values tensor). + // The linearized-index can be converted into an index tuple (k_1,...,k_rank) + // using the shape provided below. + // The indices must appear in ascending order without duplication. + // In the first format, the ordering is lexicographic-ordering: + // e.g., index-value [1,4] must appear before [2,1] + optional TensorProto indices = 2; + + // The shape of the underlying dense-tensor: [dim_1, dim_2, ... dim_rank] + repeated int64 dims = 3; +} + // Defines a tensor shape. A dimension can be either an integer value // or a symbolic variable. A symbolic variable represents an unknown // dimension. @@ -398,36 +667,13 @@ message TensorShapeProto { // Standard denotation can optionally be used to denote tensor // dimensions with standard semantic descriptions to ensure // that operations are applied to the correct axis of a tensor. + // Refer to https://github.com/onnx/onnx/blob/main/docs/DimensionDenotation.md#denotation-definition + // for pre-defined dimension denotations. optional string denotation = 3; }; repeated Dimension dim = 1; } -// A set of pre-defined constants to be used as values for -// the standard denotation field in TensorShapeProto.Dimension -// for semantic description of the tensor dimension. -message DenotationConstProto { - // Describe a batch number dimension. - optional string DATA_BATCH = 1 [default = "DATA_BATCH"]; - // Describe a channel dimension. - optional string DATA_CHANNEL = 2 [default = "DATA_CHANNEL"]; - // Describe a time dimension. - optional string DATA_TIME = 3 [default = "DATA_TIME"]; - // Describe a feature dimension. This is typically a feature - // dimension in RNN and/or spatial dimension in CNN. - optional string DATA_FEATURE = 4 [default = "DATA_FEATURE"]; - // Describe a filter in-channel dimension. This is the dimension - // that is identical (in size) to the channel dimension of the input - // image feature maps. - optional string FILTER_IN_CHANNEL = 5 [default = "FILTER_IN_CHANNEL"]; - // Describe a filter out channel dimension. This is the dimension - // that is identical (int size) to the channel dimension of the output - // image feature maps. - optional string FILTER_OUT_CHANNEL = 6 [default = "FILTER_OUT_CHANNEL"]; - // Describe a filter spatial dimension. - optional string FILTER_SPATIAL = 7 [default = "FILTER_SPATIAL"]; -} - // Types // // The standard ONNX data types. @@ -435,8 +681,43 @@ message TypeProto { message Tensor { // This field MUST NOT have the value of UNDEFINED + // This field MUST have a valid TensorProto.DataType value + // This field MUST be present for this version of the IR. + optional int32 elem_type = 1; + optional TensorShapeProto shape = 2; + } + + // repeated T + message Sequence { + // The type and optional shape of each element of the sequence. + // This field MUST be present for this version of the IR. + optional TypeProto elem_type = 1; + }; + + // map<K,V> + message Map { + // This field MUST have a valid TensorProto.DataType value + // This field MUST be present for this version of the IR. + // This field MUST refer to an integral type ([U]INT{8|16|32|64}) or STRING + optional int32 key_type = 1; + // This field MUST be present for this version of the IR. + optional TypeProto value_type = 2; + }; + + // wrapper for Tensor, Sequence, or Map + message Optional { + // The type and optional shape of the element wrapped. + // This field MUST be present for this version of the IR. + // Possible values correspond to OptionalProto.DataType enum + optional TypeProto elem_type = 1; + }; + + + message SparseTensor { + // This field MUST NOT have the value of UNDEFINED + // This field MUST have a valid TensorProto.DataType value // This field MUST be present for this version of the IR. - optional TensorProto.DataType elem_type = 1; + optional int32 elem_type = 1; optional TensorShapeProto shape = 2; } @@ -445,7 +726,31 @@ message TypeProto { // The type of a tensor. Tensor tensor_type = 1; + // NOTE: DNN-only implementations of ONNX MAY elect to not support non-tensor values + // as input and output to graphs and nodes. These types are needed to naturally + // support classical ML operators. DNN operators SHOULD restrict their input + // and output types to tensors. + + // The type of a sequence. + Sequence sequence_type = 4; + + // The type of a map. + Map map_type = 5; + + // The type of an optional. + Optional optional_type = 9; + + + // Type of the sparse tensor + SparseTensor sparse_tensor_type = 8; + } + + // An optional denotation can be used to denote the whole + // type with a standard semantic description as to what is + // stored inside. Refer to https://github.com/onnx/onnx/blob/main/docs/TypeDenotation.md#type-denotation-definition + // for pre-defined type denotations. + optional string denotation = 6; } // Operator Sets @@ -461,4 +766,70 @@ message OperatorSetIdProto { // The version of the operator set being identified. // This field MUST be present in this version of the IR. optional int64 version = 2; -}
\ No newline at end of file +} + +// Operator/function status. +enum OperatorStatus { + EXPERIMENTAL = 0; + STABLE = 1; +} + +message FunctionProto { + // The name of the function, similar usage of op_type in OperatorProto. + // Combined with FunctionProto.domain, this forms the unique identity of + // the FunctionProto. + optional string name = 1; + + // Deprecated since IR Version 8 + // optional int64 since_version = 2; + reserved 2; + reserved "since_version"; + + // Deprecated since IR Version 8 + // optional OperatorStatus status = 3; + reserved 3; + reserved "status"; + + // The inputs and outputs of the function. + repeated string input = 4; + repeated string output = 5; + + // The attribute parameters of the function. + // It is for function parameters without default values. + repeated string attribute = 6; + + // The attribute protos of the function. + // It is for function attributes with default values. + // A function attribute shall be represented either as + // a string attribute or an AttributeProto, not both. + repeated AttributeProto attribute_proto = 11; + + // The nodes in the function. + repeated NodeProto node = 7; + // A human-readable documentation for this function. Markdown is allowed. + optional string doc_string = 8; + + // The OperatorSets this function body (graph) relies on. + // + // All nodes in the function body (graph) will bind against the operator + // with the same-domain/same-op_type operator with the HIGHEST version + // in the referenced operator sets. This means at most one version can be relied + // for one domain. + // + // The operator sets imported by FunctionProto should be compatible with the ones + // imported by ModelProto. Example, if same operator set say 'A' is imported by FunctionProto + // and ModelProto then versions for the operator set may be different but, + // the operator schema returned for op_type, domain, version combination + // for both the versions should be same. + + repeated OperatorSetIdProto opset_import = 9; + + // The domain which this function belongs to. Combined with FunctionProto.name, this forms the unique identity of + // the FunctionProto. + optional string domain = 10; +} + + +// For using protobuf-lite +option optimize_for = LITE_RUNTIME; + diff --git a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/onnx/OnnxOperationsTestCase.java b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/onnx/OnnxOperationsTestCase.java index 3ef96cdf166..2b707c3beb3 100644 --- a/model-integration/src/test/java/ai/vespa/rankingexpression/importer/onnx/OnnxOperationsTestCase.java +++ b/model-integration/src/test/java/ai/vespa/rankingexpression/importer/onnx/OnnxOperationsTestCase.java @@ -775,10 +775,10 @@ public class OnnxOperationsTestCase { Onnx.TensorProto.Builder builder = Onnx.TensorProto.newBuilder(); tensor.type().dimensions().forEach(d -> builder.addDims(d.size().get())); if (tensor.type().valueType() == TensorType.Value.FLOAT) { - builder.setDataType(Onnx.TensorProto.DataType.FLOAT); + builder.setDataType(Onnx.TensorProto.DataType.FLOAT_VALUE); tensor.valueIterator().forEachRemaining(d -> builder.addFloatData(d.floatValue())); } else { - builder.setDataType(Onnx.TensorProto.DataType.DOUBLE); + builder.setDataType(Onnx.TensorProto.DataType.DOUBLE_VALUE); tensor.valueIterator().forEachRemaining(builder::addDoubleData); } Onnx.TensorProto val = builder.build(); |