summaryrefslogtreecommitdiffstats
path: root/config-model/src
diff options
context:
space:
mode:
Diffstat (limited to 'config-model/src')
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RedundancyIncreaseValidator.java2
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/first/RedundancyValidator.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java2
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/Container.java1
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java19
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/DistributorCluster.java15
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java18
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java5
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java34
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/StorageCluster.java25
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelInfo.java3
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java4
-rw-r--r--config-model/src/main/protobuf/onnx.proto517
-rw-r--r--config-model/src/main/resources/schema/content.rnc3
-rw-r--r--config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java36
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomSchemaTuningBuilderTest.java6
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java119
17 files changed, 648 insertions, 165 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RedundancyIncreaseValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RedundancyIncreaseValidator.java
index 82ad8e5d6e8..47024c1171c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RedundancyIncreaseValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/RedundancyIncreaseValidator.java
@@ -35,7 +35,7 @@ public class RedundancyIncreaseValidator implements ChangeValidator {
}
private int redundancyOf(ContentCluster cluster) {
- return cluster.redundancy().finalRedundancy();
+ return cluster.getRedundancy().finalRedundancy();
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/first/RedundancyValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/first/RedundancyValidator.java
index 5228610537f..2be0f0b8422 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/first/RedundancyValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/first/RedundancyValidator.java
@@ -2,7 +2,6 @@
package com.yahoo.vespa.model.application.validation.first;
import com.yahoo.config.application.api.ValidationId;
-import com.yahoo.config.application.api.ValidationOverrides;
import com.yahoo.config.model.api.ConfigChangeAction;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.vespa.model.VespaModel;
@@ -10,7 +9,6 @@ import com.yahoo.vespa.model.application.validation.Validator;
import com.yahoo.vespa.model.application.validation.change.ChangeValidator;
import com.yahoo.vespa.model.content.cluster.ContentCluster;
-import java.time.Instant;
import java.util.List;
import java.util.stream.Stream;
@@ -48,7 +46,7 @@ public class RedundancyValidator extends Validator implements ChangeValidator {
}
private boolean hasRedundancyOne(ContentCluster cluster) {
- return cluster != null && cluster.redundancy().finalRedundancy() == 1 && cluster.redundancy().groups() == 1;
+ return cluster != null && cluster.getRedundancy().finalRedundancy() == 1 && cluster.getRedundancy().groups() == 1;
}
private void invalidRedundancy(ContentCluster cluster, DeployState deployState) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java
index 64592e75c41..a0a4151daf5 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java
@@ -298,6 +298,8 @@ public class DomSearchTuningBuilder extends VespaDomBuilder.DomConfigProducerBui
for (Element e : XML.getChildren(spec)) {
if (equals("concurrency", e)) {
sn.feeding.concurrency = asDouble(e);
+ } else if (equals("niceness", e)) {
+ sn.feeding.niceness = asDouble(e);
}
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java b/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java
index 2ca6d5d7155..f7d4fe28c6e 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/Container.java
@@ -109,6 +109,7 @@ public abstract class Container extends AbstractService implements
addChild(new SimpleComponent("com.yahoo.container.jdisc.ConfiguredApplication$ApplicationContext"));
appendJvmOptions(jvmOmitStackTraceInFastThrowOption(deployState.featureFlags()));
+ addEnvironmentVariable("VESPA_MALLOC_MMAP_THRESHOLD","0x200000");
}
protected String jvmOmitStackTraceInFastThrowOption(ModelContext.FeatureFlags featureFlags) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java
index ec7acaf819f..34ea41384bc 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java
@@ -270,6 +270,25 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer>
clusters.put(sc.getClusterName(), sc);
}
+ /**
+ * Returns whether the schemas in this cluster use streaming mode.
+ *
+ * @return True if this cluster only has schemas with streaming mode, False if it only has schemas
+ * with indexing, null if it has both or none.
+ */
+ public Boolean isStreaming() {
+ boolean hasStreaming = false;
+ boolean hasIndexed = false;
+ for (var cluster : clusters.values()) {
+ if (cluster.isStreaming())
+ hasStreaming = true;
+ else
+ hasIndexed = true;
+ }
+ if (hasIndexed == hasStreaming) return null;
+ return hasStreaming;
+ }
+
public List<SearchNode> getSearchNodes() {
return hasIndexedCluster() ? getIndexed().getSearchNodes() : nonIndexed;
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/DistributorCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/DistributorCluster.java
index 4aac8bfb647..6f0a03bab60 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/DistributorCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/DistributorCluster.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.content;
+import ai.vespa.metrics.DistributorMetrics;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.vespa.config.content.core.StorDistributormanagerConfig;
import com.yahoo.vespa.config.content.core.StorServerConfig;
@@ -135,13 +136,13 @@ public class DistributorCluster extends TreeConfigProducer<Distributor> implemen
@Override
public void getConfig(MetricsmanagerConfig.Builder builder) {
ContentCluster.getMetricBuilder("log", builder).
- addedmetrics("vds.distributor.docsstored").
- addedmetrics("vds.distributor.bytesstored").
- addedmetrics("vds.idealstate.delete_bucket.done_ok").
- addedmetrics("vds.idealstate.merge_bucket.done_ok").
- addedmetrics("vds.idealstate.split_bucket.done_ok").
- addedmetrics("vds.idealstate.join_bucket.done_ok").
- addedmetrics("vds.idealstate.buckets_rechecking");
+ addedmetrics(DistributorMetrics.VDS_DISTRIBUTOR_DOCSSTORED.baseName()).
+ addedmetrics(DistributorMetrics.VDS_DISTRIBUTOR_BYTESSTORED.baseName()).
+ addedmetrics(DistributorMetrics.VDS_IDEALSTATE_DELETE_BUCKET_DONE_OK.baseName()).
+ addedmetrics(DistributorMetrics.VDS_IDEALSTATE_MERGE_BUCKET_DONE_OK.baseName()).
+ addedmetrics(DistributorMetrics.VDS_IDEALSTATE_SPLIT_BUCKET_DONE_OK.baseName()).
+ addedmetrics(DistributorMetrics.VDS_IDEALSTATE_JOIN_BUCKET_DONE_OK.baseName()).
+ addedmetrics(DistributorMetrics.VDS_IDEALSTATE_BUCKETS_RECHECKING.baseName());
}
@Override
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java
index 52b2ce06dfe..6078215f9b6 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/StorageGroup.java
@@ -171,12 +171,11 @@ public class StorageGroup {
}
@Override
- public boolean equals(Object obj) {
- if (obj instanceof StorageGroup) {
- StorageGroup rhs = (StorageGroup)obj;
- return this.index.equals(rhs.index) &&
- this.name.equals(rhs.name) &&
- this.partitions.equals(rhs.partitions);
+ public boolean equals(Object o) {
+ if (o instanceof StorageGroup other) {
+ return this.index.equals(other.index) &&
+ this.name.equals(other.name) &&
+ this.partitions.equals(other.partitions);
}
return false;
}
@@ -208,9 +207,7 @@ public class StorageGroup {
this.context = context;
}
- public StorageGroup buildRootGroup(DeployState deployState,
- RedundancyBuilder redundancyBuilder,
- ContentCluster owner) {
+ public StorageGroup buildRootGroup(DeployState deployState, ContentCluster owner, Boolean isStreaming) {
try {
if (owner.isHosted())
validateRedundancyAndGroups(deployState.zone().environment());
@@ -229,7 +226,8 @@ public class StorageGroup {
? groupBuilder.buildHosted(deployState, owner, Optional.empty(), context)
: groupBuilder.buildNonHosted(deployState, owner, Optional.empty());
- Redundancy redundancy = redundancyBuilder.build(owner.isHosted(), storageGroup.subgroups.size(),
+ RedundancyBuilder redundancyBuilder = new RedundancyBuilder(clusterElement);
+ Redundancy redundancy = redundancyBuilder.build(owner.isHosted(), isStreaming, storageGroup.subgroups.size(),
storageGroup.getNumberOfLeafGroups(), storageGroup.countNodes(false));
owner.setRedundancy(redundancy);
if (storageGroup.partitions.isEmpty() && (redundancy.groups() > 1)) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
index 2592beca6c6..f792ac3a591 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
@@ -114,7 +114,6 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem
new SearchDefinitionBuilder().build(deployState.getDocumentModel().getDocumentManager(), documentsElement);
String routingSelection = new DocumentSelectionBuilder().build(documentsElement);
- RedundancyBuilder redundancyBuilder = new RedundancyBuilder(contentElement);
Set<NewDocumentType> globallyDistributedDocuments = new GlobalDistributionBuilder(documentDefinitions).build(documentsElement);
String clusterId = getClusterId(contentElement);
@@ -133,7 +132,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem
c.persistenceFactory = new EngineFactoryBuilder().build(contentElement, c);
c.storageNodes = new StorageCluster.Builder().build(deployState, c, w3cContentElement);
c.distributorNodes = new DistributorCluster.Builder(c).build(deployState, c, w3cContentElement);
- c.rootGroup = new StorageGroup.Builder(contentElement, context).buildRootGroup(deployState, redundancyBuilder, c);
+ c.rootGroup = new StorageGroup.Builder(contentElement, context).buildRootGroup(deployState, c, c.search.isStreaming());
c.clusterControllerConfig = createClusterControllerConfig(contentElement, deployState, c, resourceLimits);
validateThatGroupSiblingsAreUnique(c.clusterId, c.rootGroup);
c.search.handleRedundancy(c.redundancy);
@@ -447,7 +446,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem
public final ContentSearchCluster getSearch() { return search; }
- public Redundancy redundancy() { return redundancy; }
+ public Redundancy getRedundancy() { return redundancy; }
public ContentCluster setRedundancy(Redundancy redundancy) {
this.redundancy = redundancy;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java
index e7bafdf52e4..d310db067a6 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/RedundancyBuilder.java
@@ -20,7 +20,7 @@ public class RedundancyBuilder {
// Always global (across groups)
private Integer globalMinRedundancy = null;
- RedundancyBuilder(ModelElement clusterXml) {
+ public RedundancyBuilder(ModelElement clusterXml) {
ModelElement redundancyElement = clusterXml.child("redundancy");
if (redundancyElement != null) {
initialRedundancy = redundancyElement.integerAttribute("reply-after");
@@ -47,22 +47,40 @@ public class RedundancyBuilder {
throw new IllegalArgumentException("Either <redundancy> or <min-redundancy> must be set");
}
- public Redundancy build(boolean isHosted, int subGroups, int leafGroups, int totalNodes) {
+ /**
+ * @param isHosted
+ * @param isStreaming true if this cluster only has schemas with streaming mode, false if it only has schemas
+ * without streaming, null if it has both
+ * @param subGroups
+ * @param leafGroups
+ * @param totalNodes
+ * @return
+ */
+ public Redundancy build(boolean isHosted, Boolean isStreaming, int subGroups, int leafGroups, int totalNodes) {
if (isHosted) {
if (globalMinRedundancy != null && ( finalRedundancy == null || finalRedundancy * leafGroups < globalMinRedundancy ))
initialRedundancy = finalRedundancy = (int)Math.ceil((double)globalMinRedundancy / leafGroups);
if (readyCopies == null) {
- if (leafGroups > 1)
- readyCopies = 1;
- else
- readyCopies = finalRedundancy > 1 ? 2 : 1;
+ if (isStreaming == Boolean.TRUE) {
+ readyCopies = finalRedundancy;
+ }
+ else { // If isStreaming is null (mixed mode cluster) there are no good options ...
+ if (leafGroups > 1)
+ readyCopies = 1;
+ else
+ readyCopies = finalRedundancy > 1 ? 2 : 1;
+ }
}
return new Redundancy(initialRedundancy, finalRedundancy, readyCopies, leafGroups, totalNodes);
} else {
if (globalMinRedundancy != null && ( finalRedundancy == null || finalRedundancy < globalMinRedundancy))
initialRedundancy = finalRedundancy = globalMinRedundancy;
- if (readyCopies == null)
- readyCopies = finalRedundancy > 1 ? Math.max(subGroups, 2) : 1;
+ if (readyCopies == null) {
+ if (isStreaming == Boolean.TRUE)
+ readyCopies = finalRedundancy;
+ else // If isStreaming is null (mixed mode cluster) there are no good options ...
+ readyCopies = finalRedundancy > 1 ? Math.max(subGroups, 2) : 1;
+ }
subGroups = Math.max(1, subGroups);
IndexedHierarchicDistributionValidator.validateThatLeafGroupsCountIsAFactorOfRedundancy(finalRedundancy, subGroups);
IndexedHierarchicDistributionValidator.validateThatReadyCopiesIsCompatibleWithRedundancy(finalRedundancy, readyCopies, subGroups);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/StorageCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/StorageCluster.java
index 2d67a344a17..a1e809098f2 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/StorageCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/storagecluster/StorageCluster.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.content.storagecluster;
+import ai.vespa.metrics.StorageMetrics;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.vespa.config.content.core.StorIntegritycheckerConfig;
import com.yahoo.vespa.config.content.core.StorBucketmoverConfig;
@@ -75,24 +76,24 @@ public class StorageCluster extends TreeConfigProducer<StorageNode>
@Override
public void getConfig(MetricsmanagerConfig.Builder builder) {
ContentCluster.getMetricBuilder("fleetcontroller", builder).
- addedmetrics("vds.datastored.alldisks.docs").
- addedmetrics("vds.datastored.alldisks.bytes").
- addedmetrics("vds.datastored.alldisks.buckets").
- addedmetrics("vds.datastored.bucket_space.buckets_total");
+ addedmetrics(StorageMetrics.VDS_DATASTORED_ALLDISKS_DOCS.baseName()).
+ addedmetrics(StorageMetrics.VDS_DATASTORED_ALLDISKS_BYTES.baseName()).
+ addedmetrics(StorageMetrics.VDS_DATASTORED_ALLDISKS_BUCKETS.baseName()).
+ addedmetrics(StorageMetrics.VDS_DATASTORED_BUCKET_SPACE_BUCKETS_TOTAL.baseName());
ContentCluster.getMetricBuilder("log", builder).
addedmetrics("vds.filestor.allthreads.put").
addedmetrics("vds.filestor.allthreads.get").
addedmetrics("vds.filestor.allthreads.remove").
addedmetrics("vds.filestor.allthreads.update").
- addedmetrics("vds.datastored.alldisks.docs").
- addedmetrics("vds.datastored.alldisks.bytes").
- addedmetrics("vds.filestor.queuesize").
- addedmetrics("vds.filestor.averagequeuewait").
- addedmetrics("vds.visitor.cv_queuewaittime").
- addedmetrics("vds.visitor.allthreads.averagequeuewait").
- addedmetrics("vds.visitor.allthreads.averagevisitorlifetime").
- addedmetrics("vds.visitor.allthreads.created");
+ addedmetrics(StorageMetrics.VDS_DATASTORED_ALLDISKS_DOCS.baseName()).
+ addedmetrics(StorageMetrics.VDS_DATASTORED_ALLDISKS_BYTES.baseName()).
+ addedmetrics(StorageMetrics.VDS_FILESTOR_QUEUESIZE.baseName()).
+ addedmetrics(StorageMetrics.VDS_FILESTOR_AVERAGEQUEUEWAIT.baseName()).
+ addedmetrics(StorageMetrics.VDS_VISITOR_CV_QUEUEWAITTIME.baseName()).
+ addedmetrics(StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEQUEUEWAIT.baseName()).
+ addedmetrics(StorageMetrics.VDS_VISITOR_ALLTHREADS_AVERAGEVISITORLIFETIME.baseName()).
+ addedmetrics(StorageMetrics.VDS_VISITOR_ALLTHREADS_CREATED.baseName());
}
public String getClusterName() {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelInfo.java b/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelInfo.java
index 1984ceadac6..8edd446b209 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelInfo.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelInfo.java
@@ -274,7 +274,8 @@ public class OnnxModelInfo {
static private void onnxTypeToJson(JsonGenerator g, Onnx.ValueInfoProto valueInfo) throws IOException {
g.writeStartObject();
g.writeStringField("name", valueInfo.getName());
- g.writeStringField("type", onnxValueTypeToString(valueInfo.getType().getTensorType().getElemType()));
+ var elemType = Onnx.TensorProto.DataType.forNumber(valueInfo.getType().getTensorType().getElemType());
+ g.writeStringField("type", onnxValueTypeToString(elemType));
g.writeArrayFieldStart("dim");
for (Onnx.TensorShapeProto.Dimension dim : valueInfo.getType().getTensorType().getShape().getDimList()) {
g.writeStartObject();
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java b/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java
index 93e3a6e7a19..83eccc8697c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java
@@ -371,12 +371,16 @@ public class Tuning extends AnyConfigProducer implements ProtonConfig.Producer {
public static class Feeding implements ProtonConfig.Producer {
public Double concurrency = null;
+ public Double niceness = null;
@Override
public void getConfig(ProtonConfig.Builder builder) {
if (concurrency != null) {
builder.feeding.concurrency(concurrency);
}
+ if (niceness != null) {
+ builder.feeding.niceness(niceness);
+ }
}
}
diff --git a/config-model/src/main/protobuf/onnx.proto b/config-model/src/main/protobuf/onnx.proto
index dc6542867e0..1d265ae9f28 100644
--- a/config-model/src/main/protobuf/onnx.proto
+++ b/config-model/src/main/protobuf/onnx.proto
@@ -3,8 +3,8 @@
//
-// Copyright (c) Facebook Inc. and Microsoft Corporation.
-// Licensed under the MIT license.
+// SPDX-License-Identifier: Apache-2.0
+
syntax = "proto2";
@@ -20,23 +20,16 @@ package onnx;
//
// This document describes the syntax of models and their computation graphs,
// as well as the standard data types. Together, they are referred to as the ONNX
-// Intermediate Representation, or 'IR' for short.
+// Intermediate Representation, or 'IR' for short.
//
// The normative semantic specification of the ONNX IR is found in docs/IR.md.
// Definitions of the built-in neural network operators may be found in docs/Operators.md.
// Notes
//
-// Release
-//
-// We are still in the very early stage of defining ONNX. The current
-// version of ONNX is a starting point. While we are actively working
-// towards a complete spec, we would like to get the community involved
-// by sharing our working version of ONNX.
-//
// Protobuf compatibility
-//
-// To simplify framework compatibility, ONNX is defined using the subset of protobuf
+//
+// To simplify framework compatibility, ONNX is defined using the subset of protobuf
// that is compatible with both protobuf v2 and v3. This means that we do not use any
// protobuf features that are only available in one of the two versions.
//
@@ -60,22 +53,60 @@ enum Version {
_START_VERSION = 0;
// The version field is always serialized and we will use it to store the
// version that the graph is generated from. This helps us set up version
- // control. We should use version as
- // xx(major) - xx(minor) - xxxx(bugfix)
- // and we are starting with 0x00000001 (0.0.1), which was the
- // version we published on Oct 10, 2017.
- IR_VERSION_2017_10_10 = 0x00000001;
+ // control.
+ // For the IR, we are using simple numbers starting with 0x00000001,
+ // which was the version we published on Oct 10, 2017.
+ IR_VERSION_2017_10_10 = 0x0000000000000001;
- // IR_VERSION 0.0.2 published on Oct 30, 2017
+ // IR_VERSION 2 published on Oct 30, 2017
// - Added type discriminator to AttributeProto to support proto3 users
- IR_VERSION_2017_10_30 = 0x00000002;
+ IR_VERSION_2017_10_30 = 0x0000000000000002;
- // IR VERSION 0.0.3 published on Nov 3, 2017
+ // IR VERSION 3 published on Nov 3, 2017
// - For operator versioning:
// - Added new message OperatorSetIdProto
// - Added opset_import in ModelProto
// - For vendor extensions, added domain in NodeProto
- IR_VERSION = 0x00000003;
+ IR_VERSION_2017_11_3 = 0x0000000000000003;
+
+ // IR VERSION 4 published on Jan 22, 2019
+ // - Relax constraint that initializers should be a subset of graph inputs
+ // - Add type BFLOAT16
+ IR_VERSION_2019_1_22 = 0x0000000000000004;
+
+ // IR VERSION 5 published on March 18, 2019
+ // - Add message TensorAnnotation.
+ // - Add quantization annotation in GraphProto to map tensor with its scale and zero point quantization parameters.
+ IR_VERSION_2019_3_18 = 0x0000000000000005;
+
+ // IR VERSION 6 published on Sep 19, 2019
+ // - Add support for sparse tensor constants stored in model.
+ // - Add message SparseTensorProto
+ // - Add sparse initializers
+ IR_VERSION_2019_9_19 = 0x0000000000000006;
+
+ // IR VERSION 7 published on May 8, 2020
+ // - Add support to allow function body graph to rely on multiple external opreator sets.
+ // - Add a list to promote inference graph's initializers to global and
+ // mutable variables. Global variables are visible in all graphs of the
+ // stored models.
+ // - Add message TrainingInfoProto to store initialization
+ // method and training algorithm. The execution of TrainingInfoProto
+ // can modify the values of mutable variables.
+ // - Implicitly add inference graph into each TrainingInfoProto's algorithm.
+ IR_VERSION_2020_5_8 = 0x0000000000000007;
+
+ // IR VERSION 8 published on July 30, 2021
+ // Introduce TypeProto.SparseTensor
+ // Introduce TypeProto.Optional
+ // Added a list of FunctionProtos local to the model
+ // Deprecated since_version and operator status from FunctionProto
+ IR_VERSION_2021_7_30 = 0x0000000000000008;
+
+ // IR VERSION 9 published on May 5, 2023
+ // Added AttributeProto to FunctionProto so that default attribute values can be set.
+ // Added FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ.
+ IR_VERSION = 0x0000000000000009;
}
// Attributes
@@ -95,17 +126,21 @@ message AttributeProto {
STRING = 3;
TENSOR = 4;
GRAPH = 5;
+ SPARSE_TENSOR = 11;
+ TYPE_PROTO = 13;
FLOATS = 6;
INTS = 7;
STRINGS = 8;
TENSORS = 9;
GRAPHS = 10;
+ SPARSE_TENSORS = 12;
+ TYPE_PROTOS = 14;
}
// The name field MUST be present for this version of the IR.
optional string name = 1; // namespace Attribute
-
+
// if ref_attr_name is not empty, ref_attr_name is the attribute name in parent function.
// In this case, this AttributeProto does not contain data, and it's a reference of attribute
// in parent scope.
@@ -117,10 +152,10 @@ message AttributeProto {
// The type field MUST be present for this version of the IR.
// For 0.0.1 versions of the IR, this field was not defined, and
- // implementations needed to use has_field hueristics to determine
+ // implementations needed to use has_field heuristics to determine
// which value field was in use. For IR_VERSION 0.0.2 or later, this
// field MUST be set and match the f|i|s|t|... field in use. This
- // change was made to accomodate proto3 implementations.
+ // change was made to accommodate proto3 implementations.
optional AttributeType type = 20; // discriminator that indicates which field below is in use
// Exactly ONE of the following fields must be present for this version of the IR
@@ -129,14 +164,18 @@ message AttributeProto {
optional bytes s = 4; // UTF-8 string
optional TensorProto t = 5; // tensor value
optional GraphProto g = 6; // graph
+ optional SparseTensorProto sparse_tensor = 22; // sparse tensor value
// Do not use field below, it's deprecated.
// optional ValueProto v = 12; // value - subsumes everything but graph
+ optional TypeProto tp = 14; // type proto
repeated float floats = 7; // list of floats
repeated int64 ints = 8; // list of ints
repeated bytes strings = 9; // list of UTF-8 strings
repeated TensorProto tensors = 10; // list of tensors
repeated GraphProto graphs = 11; // list of graph
+ repeated SparseTensorProto sparse_tensors = 23; // list of sparse tensors
+ repeated TypeProto type_protos = 15;// list of type protos
}
// Defines information on value, including the name, the type, and
@@ -144,7 +183,8 @@ message AttributeProto {
message ValueInfoProto {
// This field MUST be present in this version of the IR.
optional string name = 1; // namespace Value
- // This field MUST be present in this version of the IR.
+ // This field MUST be present in this version of the IR for
+ // inputs and outputs of the top-level graph.
optional TypeProto type = 2;
// A human-readable documentation for this value. Markdown is allowed.
optional string doc_string = 3;
@@ -155,7 +195,7 @@ message ValueInfoProto {
// Computation graphs are made up of a DAG of nodes, which represent what is
// commonly called a "layer" or "pipeline stage" in machine learning frameworks.
//
-// For example, it can be a node of type "Conv" that takes in an image, a filter
+// For example, it can be a node of type "Conv" that takes in an image, a filter
// tensor and a bias tensor, and produces the convolved output.
message NodeProto {
repeated string input = 1; // namespace Value
@@ -177,12 +217,130 @@ message NodeProto {
optional string doc_string = 6;
}
+// Training information
+// TrainingInfoProto stores information for training a model.
+// In particular, this defines two functionalities: an initialization-step
+// and a training-algorithm-step. Initialization resets the model
+// back to its original state as if no training has been performed.
+// Training algorithm improves the model based on input data.
+//
+// The semantics of the initialization-step is that the initializers
+// in ModelProto.graph and in TrainingInfoProto.algorithm are first
+// initialized as specified by the initializers in the graph, and then
+// updated by the "initialization_binding" in every instance in
+// ModelProto.training_info.
+//
+// The field "algorithm" defines a computation graph which represents a
+// training algorithm's step. After the execution of a
+// TrainingInfoProto.algorithm, the initializers specified by "update_binding"
+// may be immediately updated. If the targeted training algorithm contains
+// consecutive update steps (such as block coordinate descent methods),
+// the user needs to create a TrainingInfoProto for each step.
+message TrainingInfoProto {
+ // This field describes a graph to compute the initial tensors
+ // upon starting the training process. Initialization graph has no input
+ // and can have multiple outputs. Usually, trainable tensors in neural
+ // networks are randomly initialized. To achieve that, for each tensor,
+ // the user can put a random number operator such as RandomNormal or
+ // RandomUniform in TrainingInfoProto.initialization.node and assign its
+ // random output to the specific tensor using "initialization_binding".
+ // This graph can also set the initializers in "algorithm" in the same
+ // TrainingInfoProto; a use case is resetting the number of training
+ // iteration to zero.
+ //
+ // By default, this field is an empty graph and its evaluation does not
+ // produce any output. Thus, no initializer would be changed by default.
+ optional GraphProto initialization = 1;
+
+ // This field represents a training algorithm step. Given required inputs,
+ // it computes outputs to update initializers in its own or inference graph's
+ // initializer lists. In general, this field contains loss node, gradient node,
+ // optimizer node, increment of iteration count.
+ //
+ // An execution of the training algorithm step is performed by executing the
+ // graph obtained by combining the inference graph (namely "ModelProto.graph")
+ // and the "algorithm" graph. That is, the actual
+ // input/initializer/output/node/value_info/sparse_initializer list of
+ // the training graph is the concatenation of
+ // "ModelProto.graph.input/initializer/output/node/value_info/sparse_initializer"
+ // and "algorithm.input/initializer/output/node/value_info/sparse_initializer"
+ // in that order. This combined graph must satisfy the normal ONNX conditions.
+ // Now, let's provide a visualization of graph combination for clarity.
+ // Let the inference graph (i.e., "ModelProto.graph") be
+ // tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d
+ // and the "algorithm" graph be
+ // tensor_d -> Add -> tensor_e
+ // The combination process results
+ // tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d -> Add -> tensor_e
+ //
+ // Notice that an input of a node in the "algorithm" graph may reference the
+ // output of a node in the inference graph (but not the other way round). Also, inference
+ // node cannot reference inputs of "algorithm". With these restrictions, inference graph
+ // can always be run independently without training information.
+ //
+ // By default, this field is an empty graph and its evaluation does not
+ // produce any output. Evaluating the default training step never
+ // update any initializers.
+ optional GraphProto algorithm = 2;
+
+ // This field specifies the bindings from the outputs of "initialization" to
+ // some initializers in "ModelProto.graph.initializer" and
+ // the "algorithm.initializer" in the same TrainingInfoProto.
+ // See "update_binding" below for details.
+ //
+ // By default, this field is empty and no initializer would be changed
+ // by the execution of "initialization".
+ repeated StringStringEntryProto initialization_binding = 3;
+
+ // Gradient-based training is usually an iterative procedure. In one gradient
+ // descent iteration, we apply
+ //
+ // x = x - r * g
+ //
+ // where "x" is the optimized tensor, "r" stands for learning rate, and "g" is
+ // gradient of "x" with respect to a chosen loss. To avoid adding assignments
+ // into the training graph, we split the update equation into
+ //
+ // y = x - r * g
+ // x = y
+ //
+ // The user needs to save "y = x - r * g" into TrainingInfoProto.algorithm. To
+ // tell that "y" should be assigned to "x", the field "update_binding" may
+ // contain a key-value pair of strings, "x" (key of StringStringEntryProto)
+ // and "y" (value of StringStringEntryProto).
+ // For a neural network with multiple trainable (mutable) tensors, there can
+ // be multiple key-value pairs in "update_binding".
+ //
+ // The initializers appears as keys in "update_binding" are considered
+ // mutable variables. This implies some behaviors
+ // as described below.
+ //
+ // 1. We have only unique keys in all "update_binding"s so that two
+ // variables may not have the same name. This ensures that one
+ // variable is assigned up to once.
+ // 2. The keys must appear in names of "ModelProto.graph.initializer" or
+ // "TrainingInfoProto.algorithm.initializer".
+ // 3. The values must be output names of "algorithm" or "ModelProto.graph.output".
+ // 4. Mutable variables are initialized to the value specified by the
+ // corresponding initializer, and then potentially updated by
+ // "initializer_binding"s and "update_binding"s in "TrainingInfoProto"s.
+ //
+ // This field usually contains names of trainable tensors
+ // (in ModelProto.graph), optimizer states such as momentums in advanced
+ // stochastic gradient methods (in TrainingInfoProto.graph),
+ // and number of training iterations (in TrainingInfoProto.graph).
+ //
+ // By default, this field is empty and no initializer would be changed
+ // by the execution of "algorithm".
+ repeated StringStringEntryProto update_binding = 4;
+}
+
// Models
//
// ModelProto is a top-level file/container format for bundling a ML model and
// associating its computation graph with metadata.
//
-// The semantics of the model are described by the associated GraphProto.
+// The semantics of the model are described by the associated GraphProto's.
message ModelProto {
// The version of the IR this model targets. See Version enum above.
// This field MUST be present.
@@ -227,18 +385,58 @@ message ModelProto {
// Named metadata values; keys should be distinct.
repeated StringStringEntryProto metadata_props = 14;
+
+ // Training-specific information. Sequentially executing all stored
+ // `TrainingInfoProto.algorithm`s and assigning their outputs following
+ // the corresponding `TrainingInfoProto.update_binding`s is one training
+ // iteration. Similarly, to initialize the model
+ // (as if training hasn't happened), the user should sequentially execute
+ // all stored `TrainingInfoProto.initialization`s and assigns their outputs
+ // using `TrainingInfoProto.initialization_binding`s.
+ //
+ // If this field is empty, the training behavior of the model is undefined.
+ repeated TrainingInfoProto training_info = 20;
+
+ // A list of function protos local to the model.
+ //
+ // Name of the function "FunctionProto.name" should be unique within the domain "FunctionProto.domain".
+ // In case of any conflicts the behavior (whether the model local functions are given higher priority,
+ // or standard operator sets are given higher priotity or this is treated as error) is defined by
+ // the runtimes.
+ //
+ // The operator sets imported by FunctionProto should be compatible with the ones
+ // imported by ModelProto and other model local FunctionProtos.
+ // Example, if same operator set say 'A' is imported by a FunctionProto and ModelProto
+ // or by 2 FunctionProtos then versions for the operator set may be different but,
+ // the operator schema returned for op_type, domain, version combination
+ // for both the versions should be same for every node in the function body.
+ //
+ // One FunctionProto can reference other FunctionProto in the model, however, recursive reference
+ // is not allowed.
+ repeated FunctionProto functions = 25;
};
// StringStringEntryProto follows the pattern for cross-proto-version maps.
// See https://developers.google.com/protocol-buffers/docs/proto3#maps
message StringStringEntryProto {
optional string key = 1;
- optional string value= 2;
+ optional string value = 2;
};
+message TensorAnnotation {
+ optional string tensor_name = 1;
+ // <key, value> pairs to annotate tensor specified by <tensor_name> above.
+ // The keys used in the mapping below must be pre-defined in ONNX spec.
+ // For example, for 8-bit linear quantization case, 'SCALE_TENSOR', 'ZERO_POINT_TENSOR' will be pre-defined as
+ // quantization parameter keys.
+ repeated StringStringEntryProto quant_parameter_tensor_names = 2;
+}
+
+
+
// Graphs
//
-// A graph defines the computational logic of a model and is comprised of a parameterized
+// A graph defines the computational logic of a model and is comprised of a parameterized
// list of nodes that form a directed acyclic graph based on their inputs and outputs.
// This is the equivalent of the "network" or "graph" in many deep learning
// frameworks.
@@ -250,10 +448,14 @@ message GraphProto {
optional string name = 2; // namespace Graph
// A list of named tensor values, used to specify constant inputs of the graph.
- // Each TensorProto entry must have a distinct name (within the list) that
- // also appears in the input list.
+ // Each initializer (both TensorProto as well SparseTensorProto) MUST have a name.
+ // The name MUST be unique across both initializer and sparse_initializer,
+ // but the name MAY also appear in the input list.
repeated TensorProto initializer = 5;
+ // Initializers (see above) stored in sparse format.
+ repeated SparseTensorProto sparse_initializer = 15;
+
// A human-readable documentation for this graph. Markdown is allowed.
optional string doc_string = 10;
@@ -265,13 +467,14 @@ message GraphProto {
// must be distinct. It is optional for a value to appear in value_info list.
repeated ValueInfoProto value_info = 13;
- // DO NOT USE the following fields, they were deprecated from earlier versions.
- // repeated string input = 3;
- // repeated string output = 4;
- // optional int64 ir_version = 6;
- // optional int64 producer_version = 7;
- // optional string producer_tag = 8;
- // optional string domain = 9;
+ // This field carries information to indicate the mapping among a tensor and its
+ // quantization parameter tensors. For example:
+ // For tensor 'a', it may have {'SCALE_TENSOR', 'a_scale'} and {'ZERO_POINT_TENSOR', 'a_zero_point'} annotated,
+ // which means, tensor 'a_scale' and tensor 'a_zero_point' are scale and zero point of tensor 'a' in the model.
+ repeated TensorAnnotation quantization_annotation = 14;
+
+ reserved 3, 4, 6 to 9;
+ reserved "ir_version", "producer_version", "producer_tag", "domain";
}
// Tensors
@@ -291,13 +494,32 @@ message TensorProto {
STRING = 8; // string
BOOL = 9; // bool
- // Advanced types
+ // IEEE754 half-precision floating-point format (16 bits wide).
+ // This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
FLOAT16 = 10;
+
DOUBLE = 11;
UINT32 = 12;
UINT64 = 13;
COMPLEX64 = 14; // complex with float32 real and imaginary components
COMPLEX128 = 15; // complex with float64 real and imaginary components
+
+ // Non-IEEE floating-point format based on IEEE754 single-precision
+ // floating-point number truncated to 16 bits.
+ // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
+ BFLOAT16 = 16;
+
+ // Non-IEEE floating-point format based on papers
+ // FP8 Formats for Deep Learning, https://arxiv.org/abs/2209.05433,
+ // 8-bit Numerical Formats For Deep Neural Networks, https://arxiv.org/pdf/2206.02915.pdf.
+ // Operators supported FP8 are Cast, CastLike, QuantizeLinear, DequantizeLinear.
+ // The computation usually happens inside a block quantize / dequantize
+ // fused by the runtime.
+ FLOAT8E4M3FN = 17; // float 8, mostly used for coefficients, supports nan, not inf
+ FLOAT8E4M3FNUZ = 18; // float 8, mostly used for coefficients, supports nan, not inf, no negative zero
+ FLOAT8E5M2 = 19; // follows IEEE 754, supports nan, inf, mostly used for gradients
+ FLOAT8E5M2FNUZ = 20; // follows IEEE 754, supports nan, inf, mostly used for gradients, no negative zero
+
// Future extensions go here.
}
@@ -305,7 +527,8 @@ message TensorProto {
repeated int64 dims = 1;
// The data type of the tensor.
- optional DataType data_type = 2;
+ // This field MUST have a valid TensorProto.DataType value
+ optional int32 data_type = 2;
// For very large tensors, we may want to store them in chunks, in which
// case the following fields will specify the segment that is stored in
@@ -324,17 +547,17 @@ message TensorProto {
// For float and complex64 values
// Complex64 tensors are encoded as a single array of floats,
// with the real components appearing in odd numbered positions,
- // and the corresponding imaginary component apparing in the
+ // and the corresponding imaginary component appearing in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
repeated float float_data = 4 [packed = true];
- // For int32, uint8, int8, uint16, int16, bool, and float16 values
- // float16 values must be bit-wise converted to an uint16_t prior
+ // For int32, uint8, int8, uint16, int16, bool, float8, and float16 values
+ // float16 and float8 values must be bit-wise converted to an uint16_t prior
// to writing to the buffer.
// When this field is present, the data_type field MUST be
- // INT32, INT16, INT8, UINT16, INT8, BOOL, or FLOAT32
+ // INT32, INT16, INT8, UINT16, UINT8, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
repeated int32 int32_data = 5 [packed = true];
// For strings.
@@ -371,10 +594,32 @@ message TensorProto {
// When this field is present, the data_type field MUST NOT be STRING or UNDEFINED
optional bytes raw_data = 9;
+ // Data can be stored inside the protobuf file using type-specific fields or raw_data.
+ // Alternatively, raw bytes data can be stored in an external file, using the external_data field.
+ // external_data stores key-value pairs describing data location. Recognized keys are:
+ // - "location" (required) - POSIX filesystem path relative to the directory where the ONNX
+ // protobuf model was stored
+ // - "offset" (optional) - position of byte at which stored data begins. Integer stored as string.
+ // Offset values SHOULD be multiples 4096 (page size) to enable mmap support.
+ // - "length" (optional) - number of bytes containing data. Integer stored as string.
+ // - "checksum" (optional) - SHA1 digest of file specified in under 'location' key.
+ repeated StringStringEntryProto external_data = 13;
+
+ // Location of the data for this tensor. MUST be one of:
+ // - DEFAULT - data stored inside the protobuf message. Data is stored in raw_data (if set) otherwise in type-specified field.
+ // - EXTERNAL - data stored in an external location as described by external_data field.
+ enum DataLocation {
+ DEFAULT = 0;
+ EXTERNAL = 1;
+ }
+
+ // If value not set, data is stored in raw_data (if set) otherwise in type-specified field.
+ optional DataLocation data_location = 14;
+
// For double
- // Complex64 tensors are encoded as a single array of doubles,
+ // Complex128 tensors are encoded as a single array of doubles,
// with the real components appearing in odd numbered positions,
- // and the corresponding imaginary component apparing in the
+ // and the corresponding imaginary component appearing in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be DOUBLE or COMPLEX128
@@ -386,6 +631,30 @@ message TensorProto {
repeated uint64 uint64_data = 11 [packed = true];
}
+// A serialized sparse-tensor value
+message SparseTensorProto {
+ // The sequence of non-default values are encoded as a tensor of shape [NNZ].
+ // The default-value is zero for numeric tensors, and empty-string for string tensors.
+ // values must have a non-empty name present which serves as a name for SparseTensorProto
+ // when used in sparse_initializer list.
+ optional TensorProto values = 1;
+
+ // The indices of the non-default values, which may be stored in one of two formats.
+ // (a) Indices can be a tensor of shape [NNZ, rank] with the [i,j]-th value
+ // corresponding to the j-th index of the i-th value (in the values tensor).
+ // (b) Indices can be a tensor of shape [NNZ], in which case the i-th value
+ // must be the linearized-index of the i-th value (in the values tensor).
+ // The linearized-index can be converted into an index tuple (k_1,...,k_rank)
+ // using the shape provided below.
+ // The indices must appear in ascending order without duplication.
+ // In the first format, the ordering is lexicographic-ordering:
+ // e.g., index-value [1,4] must appear before [2,1]
+ optional TensorProto indices = 2;
+
+ // The shape of the underlying dense-tensor: [dim_1, dim_2, ... dim_rank]
+ repeated int64 dims = 3;
+}
+
// Defines a tensor shape. A dimension can be either an integer value
// or a symbolic variable. A symbolic variable represents an unknown
// dimension.
@@ -398,36 +667,13 @@ message TensorShapeProto {
// Standard denotation can optionally be used to denote tensor
// dimensions with standard semantic descriptions to ensure
// that operations are applied to the correct axis of a tensor.
+ // Refer to https://github.com/onnx/onnx/blob/main/docs/DimensionDenotation.md#denotation-definition
+ // for pre-defined dimension denotations.
optional string denotation = 3;
};
repeated Dimension dim = 1;
}
-// A set of pre-defined constants to be used as values for
-// the standard denotation field in TensorShapeProto.Dimension
-// for semantic description of the tensor dimension.
-message DenotationConstProto {
- // Describe a batch number dimension.
- optional string DATA_BATCH = 1 [default = "DATA_BATCH"];
- // Describe a channel dimension.
- optional string DATA_CHANNEL = 2 [default = "DATA_CHANNEL"];
- // Describe a time dimension.
- optional string DATA_TIME = 3 [default = "DATA_TIME"];
- // Describe a feature dimension. This is typically a feature
- // dimension in RNN and/or spatial dimension in CNN.
- optional string DATA_FEATURE = 4 [default = "DATA_FEATURE"];
- // Describe a filter in-channel dimension. This is the dimension
- // that is identical (in size) to the channel dimension of the input
- // image feature maps.
- optional string FILTER_IN_CHANNEL = 5 [default = "FILTER_IN_CHANNEL"];
- // Describe a filter out channel dimension. This is the dimension
- // that is identical (int size) to the channel dimension of the output
- // image feature maps.
- optional string FILTER_OUT_CHANNEL = 6 [default = "FILTER_OUT_CHANNEL"];
- // Describe a filter spatial dimension.
- optional string FILTER_SPATIAL = 7 [default = "FILTER_SPATIAL"];
-}
-
// Types
//
// The standard ONNX data types.
@@ -435,8 +681,43 @@ message TypeProto {
message Tensor {
// This field MUST NOT have the value of UNDEFINED
+ // This field MUST have a valid TensorProto.DataType value
+ // This field MUST be present for this version of the IR.
+ optional int32 elem_type = 1;
+ optional TensorShapeProto shape = 2;
+ }
+
+ // repeated T
+ message Sequence {
+ // The type and optional shape of each element of the sequence.
+ // This field MUST be present for this version of the IR.
+ optional TypeProto elem_type = 1;
+ };
+
+ // map<K,V>
+ message Map {
+ // This field MUST have a valid TensorProto.DataType value
+ // This field MUST be present for this version of the IR.
+ // This field MUST refer to an integral type ([U]INT{8|16|32|64}) or STRING
+ optional int32 key_type = 1;
+ // This field MUST be present for this version of the IR.
+ optional TypeProto value_type = 2;
+ };
+
+ // wrapper for Tensor, Sequence, or Map
+ message Optional {
+ // The type and optional shape of the element wrapped.
+ // This field MUST be present for this version of the IR.
+ // Possible values correspond to OptionalProto.DataType enum
+ optional TypeProto elem_type = 1;
+ };
+
+
+ message SparseTensor {
+ // This field MUST NOT have the value of UNDEFINED
+ // This field MUST have a valid TensorProto.DataType value
// This field MUST be present for this version of the IR.
- optional TensorProto.DataType elem_type = 1;
+ optional int32 elem_type = 1;
optional TensorShapeProto shape = 2;
}
@@ -445,7 +726,31 @@ message TypeProto {
// The type of a tensor.
Tensor tensor_type = 1;
+ // NOTE: DNN-only implementations of ONNX MAY elect to not support non-tensor values
+ // as input and output to graphs and nodes. These types are needed to naturally
+ // support classical ML operators. DNN operators SHOULD restrict their input
+ // and output types to tensors.
+
+ // The type of a sequence.
+ Sequence sequence_type = 4;
+
+ // The type of a map.
+ Map map_type = 5;
+
+ // The type of an optional.
+ Optional optional_type = 9;
+
+
+ // Type of the sparse tensor
+ SparseTensor sparse_tensor_type = 8;
+
}
+
+ // An optional denotation can be used to denote the whole
+ // type with a standard semantic description as to what is
+ // stored inside. Refer to https://github.com/onnx/onnx/blob/main/docs/TypeDenotation.md#type-denotation-definition
+ // for pre-defined type denotations.
+ optional string denotation = 6;
}
// Operator Sets
@@ -461,4 +766,70 @@ message OperatorSetIdProto {
// The version of the operator set being identified.
// This field MUST be present in this version of the IR.
optional int64 version = 2;
-} \ No newline at end of file
+}
+
+// Operator/function status.
+enum OperatorStatus {
+ EXPERIMENTAL = 0;
+ STABLE = 1;
+}
+
+message FunctionProto {
+ // The name of the function, similar usage of op_type in OperatorProto.
+ // Combined with FunctionProto.domain, this forms the unique identity of
+ // the FunctionProto.
+ optional string name = 1;
+
+ // Deprecated since IR Version 8
+ // optional int64 since_version = 2;
+ reserved 2;
+ reserved "since_version";
+
+ // Deprecated since IR Version 8
+ // optional OperatorStatus status = 3;
+ reserved 3;
+ reserved "status";
+
+ // The inputs and outputs of the function.
+ repeated string input = 4;
+ repeated string output = 5;
+
+ // The attribute parameters of the function.
+ // It is for function parameters without default values.
+ repeated string attribute = 6;
+
+ // The attribute protos of the function.
+ // It is for function attributes with default values.
+ // A function attribute shall be represented either as
+ // a string attribute or an AttributeProto, not both.
+ repeated AttributeProto attribute_proto = 11;
+
+ // The nodes in the function.
+ repeated NodeProto node = 7;
+ // A human-readable documentation for this function. Markdown is allowed.
+ optional string doc_string = 8;
+
+ // The OperatorSets this function body (graph) relies on.
+ //
+ // All nodes in the function body (graph) will bind against the operator
+ // with the same-domain/same-op_type operator with the HIGHEST version
+ // in the referenced operator sets. This means at most one version can be relied
+ // for one domain.
+ //
+ // The operator sets imported by FunctionProto should be compatible with the ones
+ // imported by ModelProto. Example, if same operator set say 'A' is imported by FunctionProto
+ // and ModelProto then versions for the operator set may be different but,
+ // the operator schema returned for op_type, domain, version combination
+ // for both the versions should be same.
+
+ repeated OperatorSetIdProto opset_import = 9;
+
+ // The domain which this function belongs to. Combined with FunctionProto.name, this forms the unique identity of
+ // the FunctionProto.
+ optional string domain = 10;
+}
+
+
+// For using protobuf-lite
+option optimize_for = LITE_RUNTIME;
+
diff --git a/config-model/src/main/resources/schema/content.rnc b/config-model/src/main/resources/schema/content.rnc
index 5833b575a74..a73236454c6 100644
--- a/config-model/src/main/resources/schema/content.rnc
+++ b/config-model/src/main/resources/schema/content.rnc
@@ -370,7 +370,8 @@ Tuning = element tuning {
element threads { xsd:nonNegativeInteger }?
}? &
element feeding {
- element concurrency { xsd:double { minInclusive = "0.0" maxInclusive = "1.0" } }?
+ element concurrency { xsd:double { minInclusive = "0.0" maxInclusive = "1.0" } }? &
+ element niceness { xsd:double { minInclusive = "0.0" maxInclusive = "1.0" } }?
}? &
element removed-db {
element prune {
diff --git a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
index 84804bc48fa..19fe9e0038d 100644
--- a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
+++ b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
@@ -1183,9 +1183,9 @@ public class ModelProvisioningTest {
ContentCluster cluster = model.getContentClusters().get("bar");
List<StorageGroup> subGroups = cluster.getRootGroup().getSubgroups();
- assertEquals(2*3, cluster.redundancy().effectiveInitialRedundancy()); // Reduced from 3*3
- assertEquals(2*3, cluster.redundancy().effectiveFinalRedundancy()); // Reduced from 3*4
- assertEquals(2*3, cluster.redundancy().effectiveReadyCopies()); // Reduced from 3*3
+ assertEquals(2*3, cluster.getRedundancy().effectiveInitialRedundancy()); // Reduced from 3*3
+ assertEquals(2*3, cluster.getRedundancy().effectiveFinalRedundancy()); // Reduced from 3*4
+ assertEquals(2*3, cluster.getRedundancy().effectiveReadyCopies()); // Reduced from 3*3
assertEquals("2|2|*", cluster.getRootGroup().getPartitions().get()); // Reduced from 4|4|*
assertEquals(0, cluster.getRootGroup().getNodes().size());
assertEquals(3, subGroups.size());
@@ -1257,9 +1257,9 @@ public class ModelProvisioningTest {
assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
ContentCluster cluster = model.getContentClusters().get("bar");
- assertEquals(2, cluster.redundancy().effectiveInitialRedundancy());
- assertEquals(2, cluster.redundancy().effectiveFinalRedundancy());
- assertEquals(2, cluster.redundancy().effectiveReadyCopies());
+ assertEquals(2, cluster.getRedundancy().effectiveInitialRedundancy());
+ assertEquals(2, cluster.getRedundancy().effectiveFinalRedundancy());
+ assertEquals(2, cluster.getRedundancy().effectiveReadyCopies());
assertEquals("1|*", cluster.getRootGroup().getPartitions().get());
assertEquals(0, cluster.getRootGroup().getNodes().size());
assertEquals(2, cluster.getRootGroup().getSubgroups().size());
@@ -1287,9 +1287,9 @@ public class ModelProvisioningTest {
ContentCluster cluster = model.getContentClusters().get("bar");
assertEquals(2, cluster.getStorageCluster().getChildren().size());
- assertEquals(1, cluster.redundancy().effectiveInitialRedundancy());
- assertEquals(1, cluster.redundancy().effectiveFinalRedundancy());
- assertEquals(1, cluster.redundancy().effectiveReadyCopies());
+ assertEquals(1, cluster.getRedundancy().effectiveInitialRedundancy());
+ assertEquals(1, cluster.getRedundancy().effectiveFinalRedundancy());
+ assertEquals(1, cluster.getRedundancy().effectiveReadyCopies());
assertEquals(2, cluster.getRootGroup().getNodes().size());
assertEquals(0, cluster.getRootGroup().getSubgroups().size());
}
@@ -1324,9 +1324,9 @@ public class ModelProvisioningTest {
assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
ContentCluster cluster = model.getContentClusters().get("bar");
- assertEquals(4, cluster.redundancy().effectiveInitialRedundancy());
- assertEquals(4, cluster.redundancy().effectiveFinalRedundancy());
- assertEquals(4, cluster.redundancy().effectiveReadyCopies());
+ assertEquals(4, cluster.getRedundancy().effectiveInitialRedundancy());
+ assertEquals(4, cluster.getRedundancy().effectiveFinalRedundancy());
+ assertEquals(4, cluster.getRedundancy().effectiveReadyCopies());
assertEquals(4, cluster.getSearch().getIndexed().getDispatchSpec().getGroups().size());
assertEquals(4, cluster.getSearch().getIndexed().getSearchableCopies());
assertFalse(cluster.getRootGroup().getPartitions().isPresent());
@@ -1368,9 +1368,9 @@ public class ModelProvisioningTest {
assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
ContentCluster cluster = model.getContentClusters().get("bar");
- assertEquals(1, cluster.redundancy().effectiveInitialRedundancy()); // Reduced from 3*3
- assertEquals(1, cluster.redundancy().effectiveFinalRedundancy()); // Reduced from 3*4
- assertEquals(1, cluster.redundancy().effectiveReadyCopies()); // Reduced from 3*3
+ assertEquals(1, cluster.getRedundancy().effectiveInitialRedundancy()); // Reduced from 3*3
+ assertEquals(1, cluster.getRedundancy().effectiveFinalRedundancy()); // Reduced from 3*4
+ assertEquals(1, cluster.getRedundancy().effectiveReadyCopies()); // Reduced from 3*3
assertFalse(cluster.getRootGroup().getPartitions().isPresent()); // 1 group - > flattened -> no distribution
assertEquals(1, cluster.getRootGroup().getNodes().size());
assertEquals(0, cluster.getRootGroup().getSubgroups().size());
@@ -1473,9 +1473,9 @@ public class ModelProvisioningTest {
assertEquals(numberOfHosts, model.getRoot().hostSystem().getHosts().size());
ContentCluster cluster = model.getContentClusters().get("bar");
- assertEquals(1, cluster.redundancy().effectiveInitialRedundancy());
- assertEquals(1, cluster.redundancy().effectiveFinalRedundancy());
- assertEquals(1, cluster.redundancy().effectiveReadyCopies());
+ assertEquals(1, cluster.getRedundancy().effectiveInitialRedundancy());
+ assertEquals(1, cluster.getRedundancy().effectiveFinalRedundancy());
+ assertEquals(1, cluster.getRedundancy().effectiveReadyCopies());
assertEquals(1, cluster.getSearch().getIndexed().getDispatchSpec().getGroups().size());
assertFalse(cluster.getRootGroup().getPartitions().isPresent());
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomSchemaTuningBuilderTest.java b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomSchemaTuningBuilderTest.java
index e3e9fc1a232..db15d7e0a78 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomSchemaTuningBuilderTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomSchemaTuningBuilderTest.java
@@ -285,9 +285,13 @@ public class DomSchemaTuningBuilderTest extends DomBuilderTest {
void requireThatWeCanParseFeedingTag() {
Tuning t = createTuning(parseXml("<feeding>",
"<concurrency>0.7</concurrency>",
+ "<niceness>0.3</niceness>",
"</feeding>"));
assertEquals(0.7, t.searchNode.feeding.concurrency, DELTA);
- assertEquals(getProtonCfg(t).feeding().concurrency(), 0.7, DELTA);
+ assertEquals(0.3, t.searchNode.feeding.niceness, DELTA);
+ var cfg = getProtonCfg(t);
+ assertEquals(cfg.feeding().concurrency(), 0.7, DELTA);
+ assertEquals(cfg.feeding().niceness(), 0.3, DELTA);
}
}
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java
index 4ce7119f5f7..73bbd6ee464 100644
--- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java
+++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java
@@ -38,6 +38,7 @@ import com.yahoo.vespa.model.routing.DocumentProtocol;
import com.yahoo.vespa.model.routing.Routing;
import com.yahoo.vespa.model.test.utils.ApplicationPackageUtils;
import com.yahoo.vespa.model.test.utils.VespaModelCreatorWithMockPkg;
+import com.yahoo.yolean.Exceptions;
import org.junit.jupiter.api.Test;
import java.util.Arrays;
@@ -470,7 +471,8 @@ public class ContentClusterTest extends ContentBaseTest {
new VespaModelCreatorWithMockPkg(getHosts(), xml, sds).create();
fail("Deploying without redundancy should fail");
} catch (IllegalArgumentException e) {
- assertTrue(e.getMessage().contains("Either <redundancy> or <min-redundancy> must be set"), e.getMessage());
+ assertEquals("In content cluster 'bar': Either <redundancy> or <min-redundancy> must be set",
+ Exceptions.toMessageString(e));
}
}
@@ -478,12 +480,13 @@ public class ContentClusterTest extends ContentBaseTest {
void testRedundancyFinalLessThanInitial() {
try {
parse(
- "<content version=\"1.0\" id=\"storage\">\n" +
- " <redundancy reply-after=\"4\">2</redundancy>\n" +
- " <group>" +
- " <node hostalias='node0' distribution-key='0' />" +
- " </group>" +
- "</content>"
+ """
+ <content version="1.0" id="storage">
+ <redundancy reply-after="4">2</redundancy>
+ <group>
+ <node hostalias='node0' distribution-key='0' />
+ </group>
+ </content>"""
);
fail("no exception thrown");
} catch (Exception e) { /* ignore */
@@ -494,17 +497,18 @@ public class ContentClusterTest extends ContentBaseTest {
void testReadyTooHigh() {
try {
parse(
- "<content version=\"1.0\" id=\"storage\">\n" +
- " <engine>" +
- " <proton>" +
- " <searchable-copies>3</searchable-copies>" +
- " </proton>" +
- " </engine>" +
- " <redundancy>2</redundancy>\n" +
- " <group>" +
- " <node hostalias='node0' distribution-key='0' />" +
- " </group>" +
- "</content>"
+ """
+ <content version="1.0" id="storage">
+ <engine>
+ <proton>
+ <searchable-copies>3</searchable-copies>
+ </proton>
+ </engine>
+ <redundancy>2</redundancy>
+ <group>
+ <node hostalias='node0' distribution-key='0' />
+ </group>
+ </content>"""
);
fail("no exception thrown");
} catch (Exception e) { /* ignore */
@@ -972,15 +976,17 @@ public class ContentClusterTest extends ContentBaseTest {
@Test
void reserved_document_name_throws_exception() {
- String xml = "<content version=\"1.0\" id=\"storage\">" +
- " <redundancy>1</redundancy>" +
- " <documents>" +
- " <document type=\"true\" mode=\"index\"/>" +
- " </documents>" +
- " <group>" +
- " <node distribution-key=\"0\" hostalias=\"mockhost\"/>" +
- " </group>" +
- "</content>";
+ String xml = """
+ <content version="1.0" id="storage">
+ <redundancy>1</redundancy>
+ <documents>
+ <document type="true" mode="index"/>
+ </documents>
+ <group>
+ <node distribution-key="0" hostalias="mockhost"/>
+ </group>
+ </content>
+ """;
List<String> sds = ApplicationPackageUtils.generateSchemas("true");
try {
@@ -991,6 +997,65 @@ public class ContentClusterTest extends ContentBaseTest {
}
}
+ @Test
+ void default_searchable_copies_indexing() {
+ String services = """
+ <content version="1.0" id="storage">
+ <redundancy>3</redundancy>
+ <documents>
+ <document type="music" mode="index"/>
+ </documents>
+ <group>
+ <node distribution-key="0" hostalias="mockhost"/>
+ <node distribution-key="1" hostalias="mockhost"/>
+ <node distribution-key="2" hostalias="mockhost"/>
+ </group>
+ </content>
+ """;
+ var model = new VespaModelCreatorWithMockPkg(null, services, ApplicationPackageUtils.generateSchemas("music")).create();
+ assertEquals(2, model.getContentClusters().get("storage").getRedundancy().readyCopies());
+ }
+
+ @Test
+ void default_searchable_copies_streaming() {
+ String services = """
+ <content version="1.0" id="storage">
+ <redundancy>3</redundancy>
+ <documents>
+ <document type="mail" mode="streaming"/>
+ </documents>
+ <group>
+ <node distribution-key="0" hostalias="mockhost"/>
+ <node distribution-key="1" hostalias="mockhost"/>
+ <node distribution-key="2" hostalias="mockhost"/>
+ </group>
+ </content>
+ """;
+ var model = new VespaModelCreatorWithMockPkg(null, services, ApplicationPackageUtils.generateSchemas("mail")).create();
+ assertEquals(3, model.getContentClusters().get("storage").getRedundancy().readyCopies());
+ }
+
+ /** Here there is no good choice. */
+ @Test
+ void default_searchable_copies_mixed() {
+ String services = """
+ <content version="1.0" id="storage">
+ <redundancy>3</redundancy>
+ <documents>
+ <document type="music" mode="index"/>
+ <document type="mail" mode="streaming"/>
+ </documents>
+ <group>
+ <node distribution-key="0" hostalias="mockhost"/>
+ <node distribution-key="1" hostalias="mockhost"/>
+ <node distribution-key="2" hostalias="mockhost"/>
+ </group>
+ </content>
+ """;
+ var model = new VespaModelCreatorWithMockPkg(null, services, ApplicationPackageUtils.generateSchemas("music", "mail")).create();
+ assertEquals(2, model.getContentClusters().get("storage").getRedundancy().readyCopies());
+ }
+
private void assertClusterHasBucketSpaceMappings(AllClustersBucketSpacesConfig config, String clusterId,
List<String> defaultSpaceTypes, List<String> globalSpaceTypes) {
AllClustersBucketSpacesConfig.Cluster cluster = config.cluster(clusterId);