aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo
diff options
context:
space:
mode:
Diffstat (limited to 'config-model/src/main/java/com/yahoo')
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java14
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java4
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java2
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java11
-rw-r--r--config-model/src/main/java/com/yahoo/schema/document/Matching.java18
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java28
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java120
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java51
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java50
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java2
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java3
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java16
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java119
-rwxr-xr-xconfig-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java10
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java1
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java2
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java12
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java40
25 files changed, 423 insertions, 114 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java
index 4df7a76031a..5ab258ecce8 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java
@@ -18,6 +18,7 @@ import com.yahoo.config.model.api.EndpointCertificateSecrets;
import com.yahoo.config.model.api.HostProvisioner;
import com.yahoo.config.model.api.Model;
import com.yahoo.config.model.api.ModelContext;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.api.Provisioned;
import com.yahoo.config.model.api.Reindexing;
import com.yahoo.config.model.api.ValidationParameters;
@@ -90,6 +91,7 @@ public class DeployState implements ConfigDefinitionStore {
private final Provisioned provisioned;
private final Reindexing reindexing;
private final ExecutorService executor;
+ private final OnnxModelCost onnxModelCost;
public static DeployState createTestState() {
return new Builder().build();
@@ -124,7 +126,8 @@ public class DeployState implements ConfigDefinitionStore {
boolean accessLoggingEnabledByDefault,
Optional<DockerImage> wantedDockerImageRepo,
Reindexing reindexing,
- Optional<ValidationOverrides> validationOverrides) {
+ Optional<ValidationOverrides> validationOverrides,
+ OnnxModelCost onnxModelCost) {
this.logger = deployLogger;
this.fileRegistry = fileRegistry;
this.executor = executor;
@@ -152,6 +155,7 @@ public class DeployState implements ConfigDefinitionStore {
this.now = now;
this.wantedDockerImageRepo = wantedDockerImageRepo;
this.reindexing = reindexing;
+ this.onnxModelCost = onnxModelCost;
}
public static HostProvisioner getDefaultModelHostProvisioner(ApplicationPackage applicationPackage) {
@@ -305,6 +309,8 @@ public class DeployState implements ConfigDefinitionStore {
public Optional<Reindexing> reindexing() { return Optional.ofNullable(reindexing); }
+ public OnnxModelCost onnxModelCost() { return onnxModelCost; }
+
public boolean isHostedTenantApplication(ApplicationType type) {
boolean isTesterApplication = getProperties().applicationId().instance().isTester();
return isHosted() && type == ApplicationType.DEFAULT && !isTesterApplication;
@@ -333,6 +339,7 @@ public class DeployState implements ConfigDefinitionStore {
private QueryProfiles queryProfiles = null;
private Reindexing reindexing = null;
private Optional<ValidationOverrides> validationOverrides = Optional.empty();
+ private OnnxModelCost onnxModelCost = OnnxModelCost.disabled();
public Builder() {}
@@ -450,6 +457,8 @@ public class DeployState implements ConfigDefinitionStore {
return this;
}
+ public Builder onnxModelCost(OnnxModelCost instance) { this.onnxModelCost = instance; return this; }
+
public DeployState build() {
return build(new ValidationParameters());
}
@@ -482,7 +491,8 @@ public class DeployState implements ConfigDefinitionStore {
accessLoggingEnabledByDefault,
wantedDockerImageRepo,
reindexing,
- validationOverrides);
+ validationOverrides,
+ onnxModelCost);
}
}
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
index 815c32e3c8f..77356292f9a 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
@@ -86,6 +86,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
private boolean allowUserFilters = true;
private List<DataplaneToken> dataplaneTokens;
private int contentLayerMetadataFeatureLevel = 0;
+ private boolean dynamicHeapSize = false;
@Override public ModelContext.FeatureFlags featureFlags() { return this; }
@Override public boolean multitenant() { return multitenant; }
@@ -144,6 +145,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
@Override public boolean enableGlobalPhase() { return true; } // Enable global-phase by default for unit tests only
@Override public List<DataplaneToken> dataplaneTokens() { return dataplaneTokens; }
@Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; }
+ @Override public boolean dynamicHeapSize() { return dynamicHeapSize; }
public TestProperties sharedStringRepoNoReclaim(boolean sharedStringRepoNoReclaim) {
this.sharedStringRepoNoReclaim = sharedStringRepoNoReclaim;
@@ -379,6 +381,8 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
return this;
}
+ public TestProperties setDynamicHeapSize(boolean b) { this.dynamicHeapSize = b; return this; }
+
public static class Spec implements ConfigServerSpec {
private final String hostName;
diff --git a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java
index dbcd1cea2fa..342b5f243e7 100644
--- a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java
+++ b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java
@@ -488,6 +488,8 @@ public class MockApplicationPackage implements ApplicationPackage {
throw new UnsupportedOperationException();
}
+ @Override public long getSize() { return file.length(); }
+
@Override
public int compareTo(ApplicationFile other) {
return this.getPath().getName().compareTo((other).getPath().getName());
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java
index 7d88985b2d5..f6a022e9930 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java
@@ -82,7 +82,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
}
// Commands for summary fields
- // TODO: Move to fieldinfo and implement differently. This is not right
+ // TODO: Move to schemainfo and implement differently
for (SummaryField summaryField : schema.getUniqueNamedSummaryFields().values()) {
if (summaryField.getTransform().isTeaser()) {
addIndexCommand(summaryField.getName(), CMD_DYNTEASER);
@@ -90,6 +90,13 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
if (summaryField.getTransform().isBolded()) {
addIndexCommand(summaryField.getName(), CMD_HIGHLIGHT);
}
+
+ var sourceField = schema.getField(summaryField.getSourceField()); // Take the first as they should all be consistent
+ if (sourceField != null && sourceField.getMatching().getType().equals(MatchType.GRAM)) {
+ addIndexCommand(summaryField.getName(),
+ "ngram " + (sourceField.getMatching().getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE)));
+
+ }
}
}
@@ -452,7 +459,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer {
iiB.command(
new IndexInfoConfig.Indexinfo.Command.Builder()
.indexname(fieldSet.getName())
- .command("ngram "+(fieldSetMatching.getGramSize()>0 ? fieldSetMatching.getGramSize() : NGramMatch.DEFAULT_GRAM_SIZE)));
+ .command("ngram " + fieldSetMatching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE)));
} else if (fieldSetMatching.getType().equals(MatchType.TEXT)) {
}
diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
index 1fe947d672b..264fd0ff3b9 100644
--- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java
+++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java
@@ -1,7 +1,10 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.document;
+import com.yahoo.schema.processing.NGramMatch;
+
import java.io.Serializable;
+import java.util.OptionalInt;
/**
* Defines how a field should be matched.
@@ -23,8 +26,8 @@ public class Matching implements Cloneable, Serializable {
private boolean algorithmUserSet = false;
- /** The gram size is the n in n-gram, or -1 if not set. Should only be set with gram matching. */
- private int gramSize = -1;
+ /** The gram size is the n in n-gram, or empty if not set. Should only be set with gram matching. */
+ private OptionalInt gramSize = OptionalInt.empty();
/** Maximum number of characters to consider when searching in this field. Used for limiting resources, especially in streaming search. */
private Integer maxLength;
@@ -67,10 +70,10 @@ public class Matching implements Cloneable, Serializable {
public boolean isSuffix() { return algorithm == MatchAlgorithm.SUFFIX; }
- /** Returns the gram size, or -1 if not set. Should only be set with gram matching. */
- public int getGramSize() { return gramSize; }
+ /** Returns the gram size, or empty if not set. Should only be set with gram matching. */
+ public OptionalInt getGramSize() { return gramSize; }
- public void setGramSize(int gramSize) { this.gramSize=gramSize; }
+ public void setGramSize(int gramSize) { this.gramSize = OptionalInt.of(gramSize); }
/**
* Merge data from another matching object
@@ -107,10 +110,11 @@ public class Matching implements Cloneable, Serializable {
@Override
public String toString() {
- return type + " matching [" + (type==MatchType.GRAM ? "gram size " + gramSize : "supports " + algorithm) +
- "], [exact-terminator "+exactMatchTerminator+"]";
+ return type + " matching [" + (type == MatchType.GRAM ? "gram size " + gramSize.orElse(NGramMatch.DEFAULT_GRAM_SIZE) : "supports " + algorithm) +
+ "], [exact-terminator " + exactMatchTerminator + "]";
}
+ @Override
public Matching clone() {
try {
return (Matching)super.clone();
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java
index f1ff910be43..6ec5428156f 100644
--- a/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java
+++ b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java
@@ -31,7 +31,7 @@ public class NGramMatch extends Processor {
for (SDField field : schema.allConcreteFields()) {
if (field.getMatching().getType().equals(MatchType.GRAM))
implementGramMatch(schema, field, validate);
- else if (validate && field.getMatching().getGramSize() >= 0)
+ else if (validate && field.getMatching().getGramSize().isPresent())
throw new IllegalArgumentException("gram-size can only be set when the matching mode is 'gram'");
}
}
@@ -40,9 +40,7 @@ public class NGramMatch extends Processor {
if (validate && field.doesAttributing() && ! field.doesIndexing())
throw new IllegalArgumentException("gram matching is not supported with attributes, use 'index' in indexing");
- int n = field.getMatching().getGramSize();
- if (n < 0)
- n = DEFAULT_GRAM_SIZE; // not set - use default gram size
+ int n = field.getMatching().getGramSize().orElse(DEFAULT_GRAM_SIZE);
if (validate && n == 0)
throw new IllegalArgumentException("Illegal gram size in " + field + ": Must be at least 1");
field.getNormalizing().inferCodepoint();
diff --git a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java
index 7439e65dee6..49cd36e4bc2 100644
--- a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java
+++ b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java
@@ -17,9 +17,7 @@ import static com.yahoo.text.Lowercase.toLowerCase;
*/
public class SummaryField extends Field implements Cloneable, TypedKey {
- /**
- * A source (field name).
- */
+ /** A source (field name). */
public static class Source implements Serializable {
private final String name;
@@ -38,12 +36,8 @@ public class SummaryField extends Field implements Cloneable, TypedKey {
@Override
public boolean equals(Object obj) {
- if (!(obj instanceof Source)) {
- return false;
- }
- Source other = (Source)obj;
- return name.equals(other.name) &&
- override == other.override;
+ if (!(obj instanceof Source other)) return false;
+ return name.equals(other.name) && override == other.override;
}
@Override
@@ -67,14 +61,14 @@ public class SummaryField extends Field implements Cloneable, TypedKey {
*/
private Set<Source> sources = new java.util.LinkedHashSet<>();
- private Set<String> destinations=new java.util.LinkedHashSet<>();
+ private Set<String> destinations =new java.util.LinkedHashSet<>();
/** True if this field was defined implicitly */
- private boolean implicit=false;
+ private boolean implicit = false;
/** Creates a summary field with NONE as transform */
public SummaryField(String name, DataType type) {
- this(name,type, SummaryTransform.NONE);
+ this(name, type, SummaryTransform.NONE);
}
/** Creates a summary field with NONE as transform */
@@ -97,7 +91,7 @@ public class SummaryField extends Field implements Cloneable, TypedKey {
public boolean isImplicit() { return implicit; }
public void setTransform(SummaryTransform transform) {
- this.transform=transform;
+ this.transform = transform;
if (SummaryTransform.DYNAMICTEASER.equals(transform) || SummaryTransform.BOLDED.equals(transform)) {
// This is the kind of logic we want to have in processing,
// but can't because of deriveDocuments mode, which doesn't run
@@ -110,9 +104,9 @@ public class SummaryField extends Field implements Cloneable, TypedKey {
/** Returns the first source field of this, or null if the source field is not present */
public String getSourceField() {
- String sourceName=getName();
- if (sources.size()>0)
- sourceName=sources.iterator().next().getName();
+ String sourceName = getName();
+ if ( ! sources.isEmpty())
+ sourceName = sources.iterator().next().getName();
return sourceName;
}
@@ -137,7 +131,7 @@ public class SummaryField extends Field implements Cloneable, TypedKey {
/** Returns the first source name of this, or the field name if no source has been set */
public String getSingleSource() {
- if (sources.size()==0) return getName();
+ if (sources.isEmpty()) return getName();
return sources.iterator().next().getName();
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java b/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java
new file mode 100644
index 00000000000..fddf8409376
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java
@@ -0,0 +1,120 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.application.api.ApplicationFile;
+import com.yahoo.config.application.api.ApplicationPackage;
+import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.config.model.api.OnnxModelCost;
+import com.yahoo.vespa.model.ml.OnnxModelProbe;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.time.Duration;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.logging.Level;
+
+import static com.yahoo.yolean.Exceptions.uncheck;
+
+/**
+ * Aggregates estimated footprint of configured ONNX models.
+ *
+ * @author bjorncs
+ */
+public class DefaultOnnxModelCost implements OnnxModelCost {
+
+ @Override
+ public Calculator newCalculator(ApplicationPackage appPkg, DeployLogger logger) {
+ return new CalculatorImpl(appPkg, logger);
+ }
+
+ private static class CalculatorImpl implements Calculator {
+ private final DeployLogger log;
+ private final ApplicationPackage appPkg;
+
+ private final ConcurrentMap<String, Long> modelCost = new ConcurrentHashMap<>();
+
+ private CalculatorImpl(ApplicationPackage appPkg, DeployLogger log) {
+ this.appPkg = appPkg;
+ this.log = log;
+ }
+
+ @Override
+ public long aggregatedModelCostInBytes() {
+ return modelCost.values().stream().mapToLong(Long::longValue).sum();
+ }
+
+ @Override
+ public void registerModel(ApplicationFile f) {
+ String path = f.getPath().getRelative();
+ if (alreadyAnalyzed(path)) return;
+ log.log(Level.FINE, () -> "Register model '%s'".formatted(path));
+ if (f.exists() && appPkg != null) {
+ var memoryStats = OnnxModelProbe.probeMemoryStats(appPkg, f.getPath()).orElse(null);
+ if (memoryStats != null) {
+ log.log(Level.FINE, () -> "Register model '%s' with memory stats: %s".formatted(path, memoryStats));
+ deductJvmHeapSizeWithModelCost(f.getSize(), memoryStats, path);
+ } else {
+ deductJvmHeapSizeWithModelCost(f.getSize(), path);
+ }
+ } else {
+ deductJvmHeapSizeWithModelCost(0, path);
+ }
+ }
+
+ @Override
+ public void registerModel(ModelReference ref) {
+ log.log(Level.FINE, () -> "Register model '%s'".formatted(ref.toString()));
+ if (ref.path().isPresent()) {
+ var path = Paths.get(ref.path().get().value());
+ var source = path.getFileName().toString();
+ if (alreadyAnalyzed(source)) return;
+ deductJvmHeapSizeWithModelCost(uncheck(() -> Files.exists(path) ? Files.size(path) : 0), source);
+ } else if (ref.url().isPresent()) deductJvmHeapSizeWithModelCost(URI.create(ref.url().get().value()));
+ else throw new IllegalStateException(ref.toString());
+ }
+
+ private void deductJvmHeapSizeWithModelCost(URI uri) {
+ if (alreadyAnalyzed(uri.toString())) return;
+ if (uri.getScheme().equals("http") || uri.getScheme().equals("https")) {
+ try {
+ var timeout = Duration.ofSeconds(3);
+ var httpClient = HttpClient.newBuilder().connectTimeout(timeout).build();
+ var request = HttpRequest.newBuilder(uri).timeout(timeout).method("HEAD", HttpRequest.BodyPublishers.noBody()).build();
+ var response = httpClient.send(request, HttpResponse.BodyHandlers.discarding());
+ var contentLength = response.headers().firstValue("Content-Length").orElse("0");
+ log.log(Level.FINE, () -> "Got content length '%s' for '%s'".formatted(contentLength, uri));
+ deductJvmHeapSizeWithModelCost(Long.parseLong(contentLength), uri.toString());
+ } catch (IllegalArgumentException | InterruptedException | IOException e) {
+ log.log(Level.INFO, () -> "Failed to get model size for '%s': %s".formatted(uri, e.getMessage()), e);
+ }
+ }
+ }
+
+ private void deductJvmHeapSizeWithModelCost(long size, String source) {
+ long fallbackModelSize = 1024*1024*1024;
+ long estimatedCost = Math.max(300*1024*1024, (long) (1.4D * (size > 0 ? size : fallbackModelSize) + 100*1024*1024));
+ log.log(Level.FINE, () ->
+ "Estimated %s footprint for model of size %s ('%s')".formatted(mb(estimatedCost), mb(size), source));
+ modelCost.put(source, estimatedCost);
+ }
+
+ private void deductJvmHeapSizeWithModelCost(long size, OnnxModelProbe.MemoryStats stats, String source) {
+ long estimatedCost = (long)(1.1D * stats.vmSize());
+ log.log(Level.FINE, () ->
+ "Estimated %s footprint for model of size %s ('%s')".formatted(mb(estimatedCost), mb(size), source));
+ modelCost.put(source, estimatedCost);
+ }
+
+ private boolean alreadyAnalyzed(String source) { return modelCost.containsKey(source); }
+
+ private static String mb(long bytes) { return "%dMB".formatted(bytes / (1024*1024)); }
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
index 28ff8dff620..727a18aee2c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
@@ -21,6 +21,7 @@ import com.yahoo.config.model.api.Model;
import com.yahoo.config.model.api.ModelContext;
import com.yahoo.config.model.api.ModelCreateResult;
import com.yahoo.config.model.api.ModelFactory;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.api.ValidationParameters;
import com.yahoo.config.model.application.provider.ApplicationPackageXmlFilesValidator;
import com.yahoo.config.model.builder.xml.ConfigModelBuilder;
@@ -197,7 +198,8 @@ public class VespaModelFactory implements ModelFactory {
.zone(zone)
.now(clock.instant())
.wantedNodeVespaVersion(modelContext.wantedNodeVespaVersion())
- .wantedDockerImageRepo(modelContext.wantedDockerImageRepo());
+ .wantedDockerImageRepo(modelContext.wantedDockerImageRepo())
+ .onnxModelCost(modelContext.properties().hostedVespa() ? new DefaultOnnxModelCost() : OnnxModelCost.disabled());
modelContext.previousModel().ifPresent(builder::previousModel);
modelContext.reindexing().ifPresent(builder::reindexing);
return builder.build(validationParameters);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
new file mode 100644
index 00000000000..9e231239521
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
@@ -0,0 +1,51 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.application.validation;
+
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.vespa.model.VespaModel;
+
+import java.util.logging.Level;
+
+/**
+ * Validates that the container node flavour has enough resources to run configured ONNX models.
+ *
+ * @author bjorncs
+ */
+public class JvmHeapSizeValidator extends Validator {
+
+ @Override
+ public void validate(VespaModel model, DeployState ds) {
+ if (!ds.featureFlags().dynamicHeapSize()) return;
+ if (!ds.isHostedTenantApplication(model.getAdmin().getApplicationType())) return;
+
+ model.getContainerClusters().forEach((clusterId, appCluster) -> {
+ var mp = appCluster.getMemoryPercentage().orElse(null);
+ if (mp == null) return;
+ if (mp.availableMemoryGb().isEmpty()) {
+ ds.getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'");
+ return;
+ }
+ long jvmModelCost = appCluster.onnxModelCost().aggregatedModelCostInBytes();
+ if (jvmModelCost > 0) {
+ int percentLimit = 15;
+ if (mp.percentage() < percentLimit) {
+ throw new IllegalArgumentException(
+ ("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " +
+ "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
+ "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).")
+ .formatted(clusterId, mp.percentage(), percentLimit, jvmModelCost / (1024D * 1024 * 1024)));
+ }
+ double gbLimit = 0.6;
+ double availableMemoryGb = mp.availableMemoryGb().getAsDouble();
+ if (availableMemoryGb < gbLimit) {
+ throw new IllegalArgumentException(
+ ("Allocated memory to JVM in cluster '%s' is too low (%.2fGB < %.2fGB). " +
+ "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
+ "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).")
+ .formatted(clusterId, availableMemoryGb, gbLimit, jvmModelCost / (1024D * 1024 * 1024)));
+ }
+ }
+ });
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java
new file mode 100644
index 00000000000..d9dd3729bd3
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java
@@ -0,0 +1,50 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.model.application.validation;
+
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.vespa.model.VespaModel;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
+
+/**
+ * Validates that config using s3:// urls is used in public system and with nodes that are exclusive.
+ *
+ * @author hmusum
+ */
+public class UrlConfigValidator extends Validator {
+
+ @Override
+ public void validate(VespaModel model, DeployState state) {
+ if (! state.isHostedTenantApplication(model.getAdmin().getApplicationType())) return;
+
+ model.getContainerClusters().forEach((__, cluster) -> {
+ var isExclusive = hasExclusiveNodes(model, cluster);
+ validateS3UlsInConfig(state, cluster, isExclusive);
+ });
+ }
+
+ private static boolean hasExclusiveNodes(VespaModel model, ApplicationContainerCluster cluster) {
+ return model.hostSystem().getHosts()
+ .stream()
+ .flatMap(hostResource -> hostResource.spec().membership().stream())
+ .filter(membership -> membership.cluster().id().equals(cluster.id()))
+ .anyMatch(membership -> membership.cluster().isExclusive());
+ }
+
+ private static void validateS3UlsInConfig(DeployState state, ApplicationContainerCluster cluster, boolean isExclusive) {
+ if (hasS3UrlInConfig(cluster)) {
+ // TODO: Would be even better if we could add which config/field the url is set for in the error message
+ String message = "Found s3:// urls in config for container cluster " + cluster.getName();
+ if ( ! state.zone().system().isPublic())
+ throw new IllegalArgumentException(message + ". This is only supported in public systems");
+ else if ( ! isExclusive)
+ throw new IllegalArgumentException(message + ". Nodes in the cluster need to be 'exclusive'," +
+ " see https://cloud.vespa.ai/en/reference/services#nodes");
+ }
+ }
+
+ private static boolean hasS3UrlInConfig(ApplicationContainerCluster cluster) {
+ return cluster.userConfiguredUrls().all().stream()
+ .anyMatch(url -> url.startsWith("s3://"));
+ }
+
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
index 53a553ee624..30aafe67be7 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
@@ -87,6 +87,8 @@ public class Validation {
new AccessControlFilterExcludeValidator().validate(model, deployState);
new CloudUserFilterValidator().validate(model, deployState);
new CloudHttpConnectorValidator().validate(model, deployState);
+ new UrlConfigValidator().validate(model, deployState);
+ new JvmHeapSizeValidator().validate(model, deployState);
additionalValidators.forEach(v -> v.validate(model, deployState));
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java
index bbfa939f8a3..f265f2d09a0 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java
@@ -7,6 +7,7 @@ import com.yahoo.schema.document.Matching;
import com.yahoo.schema.document.MatchType;
import com.yahoo.schema.document.NormalizeLevel;
import com.yahoo.schema.document.Stemming;
+import com.yahoo.schema.processing.NGramMatch;
import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.vespa.documentmodel.SummaryTransform;
@@ -89,7 +90,7 @@ public class IndexingScriptChangeMessageBuilder {
MatchType type = matching.getType();
String retval = type.getName();
if (type == MatchType.GRAM) {
- retval += " (size " + matching.getGramSize() + ")";
+ retval += " (size " + matching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE) + ")";
}
return retval;
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
index 7501f6162c7..9ecd359f90d 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
@@ -7,11 +7,12 @@ import com.yahoo.config.model.producer.AnyConfigProducer;
import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.osgi.provider.model.ComponentModel;
import com.yahoo.text.XML;
-import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder;
-import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.component.BertEmbedder;
import com.yahoo.vespa.model.container.component.ColBertEmbedder;
import com.yahoo.vespa.model.container.component.Component;
+import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder;
+import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer;
import com.yahoo.vespa.model.container.xml.BundleInstantiationSpecificationBuilder;
import org.w3c.dom.Element;
@@ -35,19 +36,20 @@ public class DomComponentBuilder extends VespaDomBuilder.DomConfigProducerBuilde
@Override
protected Component<? super Component<?, ?>, ?> doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) {
- var component = buildComponent(spec, deployState);
+ var component = buildComponent(spec, deployState, ancestor);
addChildren(deployState, ancestor, spec, component);
return component;
}
- private Component<? super Component<?, ?>, ?> buildComponent(Element spec, DeployState state) {
+ private Component<? super Component<?, ?>, ?> buildComponent(
+ Element spec, DeployState state, TreeConfigProducer<AnyConfigProducer> ancestor) {
if (spec.hasAttribute("type")) {
var type = spec.getAttribute("type");
return switch (type) {
- case "hugging-face-embedder" -> new HuggingFaceEmbedder(spec, state);
+ case "hugging-face-embedder" -> new HuggingFaceEmbedder((ApplicationContainerCluster)ancestor, spec, state);
case "hugging-face-tokenizer" -> new HuggingFaceTokenizer(spec, state);
- case "bert-embedder" -> new BertEmbedder(spec, state);
- case "colbert-embedder" -> new ColBertEmbedder(spec, state);
+ case "colbert-embedder" -> new ColBertEmbedder((ApplicationContainerCluster)ancestor, spec, state);
+ case "bert-embedder" -> new BertEmbedder((ApplicationContainerCluster)ancestor, spec, state);
default -> throw new IllegalArgumentException("Unknown component type '%s'".formatted(type));
};
} else {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
index b9021912244..4e97b20a3a9 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
@@ -8,14 +8,14 @@ import com.yahoo.component.ComponentId;
import com.yahoo.component.ComponentSpecification;
import com.yahoo.config.FileReference;
import com.yahoo.config.application.api.ComponentInfo;
+import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.model.api.ApplicationClusterEndpoint;
import com.yahoo.config.model.api.ApplicationClusterInfo;
-import com.yahoo.config.model.api.ContainerEndpoint;
import com.yahoo.config.model.api.Model;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.config.provision.AllocatedHosts;
-import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.container.bundle.BundleInstantiationSpecification;
import com.yahoo.container.di.config.ApplicationBundlesConfig;
@@ -40,13 +40,16 @@ import com.yahoo.vespa.model.container.component.SystemBindingPattern;
import com.yahoo.vespa.model.container.configserver.ConfigserverCluster;
import com.yahoo.vespa.model.filedistribution.UserConfiguredFiles;
+import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.logging.Level;
import java.util.stream.Collectors;
import static com.yahoo.vespa.model.container.docproc.DocprocChains.DOCUMENT_TYPE_MANAGER_CLASS;
@@ -82,6 +85,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private final Set<FileReference> applicationBundles = new LinkedHashSet<>();
private final Set<String> previousHosts;
+ private final OnnxModelCost.Calculator onnxModelCost;
+ private final DeployLogger logger;
private ContainerModelEvaluation modelEvaluation;
@@ -92,6 +97,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private int zookeeperSessionTimeoutSeconds = 30;
private final int transport_events_before_wakeup;
private final int transport_connections_per_target;
+ private final boolean dynamicHeapSize;
/** The heap size % of total memory available to the JVM process. */
private final int heapSizePercentageOfAvailableMemory;
@@ -100,9 +106,12 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private List<ApplicationClusterEndpoint> endpoints = List.of();
+ private UserConfiguredUrls userConfiguredUrls = new UserConfiguredUrls();
+
public ApplicationContainerCluster(TreeConfigProducer<?> parent, String configSubId, String clusterId, DeployState deployState) {
super(parent, configSubId, clusterId, deployState, true, 10);
this.tlsClientAuthority = deployState.tlsClientAuthority();
+ dynamicHeapSize = deployState.featureFlags().dynamicHeapSize();
previousHosts = Collections.unmodifiableSet(deployState.getPreviousModel().stream()
.map(Model::allocatedHosts)
.map(AllocatedHosts::getHosts)
@@ -125,8 +134,13 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0
? Math.min(99, deployState.featureFlags().heapSizePercentage())
: defaultHeapSizePercentageOfAvailableMemory;
+ onnxModelCost = deployState.onnxModelCost().newCalculator(
+ deployState.getApplicationPackage(), deployState.getDeployLogger());
+ logger = deployState.getDeployLogger();
}
+ public UserConfiguredUrls userConfiguredUrls() { return userConfiguredUrls; }
+
@Override
protected void doPrepare(DeployState deployState) {
super.doPrepare(deployState);
@@ -147,7 +161,9 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
if (containers.isEmpty()) return;
// Files referenced from user configs to all components.
- UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(), deployState.getDeployLogger());
+ UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(),
+ deployState.getDeployLogger(),
+ userConfiguredUrls);
for (Component<?, ?> component : getAllComponents()) {
files.register(component);
}
@@ -182,19 +198,25 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
public void setMemoryPercentage(Integer memoryPercentage) { this.memoryPercentage = memoryPercentage; }
@Override
- public Optional<Integer> getMemoryPercentage() {
- if (memoryPercentage != null) return Optional.of(memoryPercentage);
+ public Optional<JvmMemoryPercentage> getMemoryPercentage() {
+ if (memoryPercentage != null) return Optional.of(JvmMemoryPercentage.of(memoryPercentage));
if (isHostedVespa()) {
int availableMemoryPercentage = getHostClusterId().isPresent() ?
heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster :
heapSizePercentageOfAvailableMemory;
- if (getContainers().isEmpty()) return Optional.of(availableMemoryPercentage); // Node memory is not known
+ if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known
// Node memory is known so convert available memory percentage to node memory percentage
- double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb();
- double availableMemory = totalMemory - Host.memoryOverheadGb;
- return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage));
+ double totalMemory = dynamicHeapSize
+ ? getContainers().stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow()
+ : getContainers().get(0).getHostResource().realResources().memoryGb();
+ double jvmHeapDeductionGb = dynamicHeapSize ? onnxModelCost.aggregatedModelCostInBytes() / (1024D * 1024 * 1024) : 0;
+ double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb);
+ int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage);
+ logger.log(Level.FINE, () -> "memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f"
+ .formatted(memoryPercentage, availableMemory, totalMemory, availableMemoryPercentage, jvmHeapDeductionGb));
+ return Optional.of(JvmMemoryPercentage.of(memoryPercentage, availableMemory));
}
return Optional.empty();
}
@@ -203,49 +225,23 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private void createEndpoints(DeployState deployState) {
if (!deployState.isHosted()) return;
if (deployState.getProperties().applicationId().instance().isTester()) return;
+ // Add endpoints provided by the controller
+ List<String> hosts = getContainers().stream().map(AbstractService::getHostName).sorted().toList();
List<ApplicationClusterEndpoint> endpoints = new ArrayList<>();
-
- List<String> hosts = getContainers().stream()
- .map(AbstractService::getHostName)
- .sorted()
- .toList();
-
- Set<ContainerEndpoint> endpointsFromController = deployState.getEndpoints();
- // Add zone-scoped endpoints if not provided by the controller
- // TODO(mpolden): Remove this when controller always includes zone-scope endpoints, and config models < 8.230 are gone
- if (endpointsFromController.stream().noneMatch(endpoint -> endpoint.scope() == ApplicationClusterEndpoint.Scope.zone)) {
- for (String suffix : deployState.getProperties().zoneDnsSuffixes()) {
- ApplicationClusterEndpoint.DnsName l4Name = ApplicationClusterEndpoint.DnsName.sharedL4NameFrom(
- deployState.zone().system(),
- ClusterSpec.Id.from(getName()),
- deployState.getProperties().applicationId(),
- suffix);
- endpoints.add(ApplicationClusterEndpoint.builder()
- .zoneScope()
- .sharedL4Routing()
- .dnsName(l4Name)
- .hosts(hosts)
- .clusterId(getName())
- .authMethod(ApplicationClusterEndpoint.AuthMethod.mtls)
- .build());
- }
- }
-
- // Include all endpoints provided by controller
- endpointsFromController.stream()
- .filter(ce -> ce.clusterId().equals(getName()))
- .forEach(ce -> ce.names().forEach(
- name -> endpoints.add(ApplicationClusterEndpoint.builder()
- .scope(ce.scope())
- .weight(ce.weight().orElse(1)) // Default to weight=1 if not set
- .routingMethod(ce.routingMethod())
- .dnsName(ApplicationClusterEndpoint.DnsName.from(name))
- .hosts(hosts)
- .clusterId(getName())
- .authMethod(ce.authMethod())
- .build())
- ));
- this.endpoints = List.copyOf(endpoints);
+ deployState.getEndpoints().stream()
+ .filter(ce -> ce.clusterId().equals(getName()))
+ .forEach(ce -> ce.names().forEach(
+ name -> endpoints.add(ApplicationClusterEndpoint.builder()
+ .scope(ce.scope())
+ .weight(ce.weight().orElse(1))
+ .routingMethod(ce.routingMethod())
+ .dnsName(ApplicationClusterEndpoint.DnsName.from(name))
+ .hosts(hosts)
+ .clusterId(getName())
+ .authMethod(ce.authMethod())
+ .build())
+ ));
+ this.endpoints = Collections.unmodifiableList(endpoints);
}
@Override
@@ -299,12 +295,15 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
@Override
public void getConfig(QrStartConfig.Builder builder) {
super.getConfig(builder);
+ var memoryPct = getMemoryPercentage().orElse(null);
+ int heapsize = memoryPct != null && memoryPct.availableMemoryGb().isPresent()
+ ? (int) (memoryPct.availableMemoryGb().getAsDouble() * 1024) : 1536;
builder.jvm.verbosegc(true)
.availableProcessors(0)
.compressedClassSpaceSize(0)
- .minHeapsize(1536)
- .heapsize(1536);
- getMemoryPercentage().ifPresent(percentage -> builder.jvm.heapSizeAsPercentageOfPhysicalMemory(percentage));
+ .minHeapsize(heapsize)
+ .heapsize(heapsize);
+ if (memoryPct != null) builder.jvm.heapSizeAsPercentageOfPhysicalMemory(memoryPct.percentage());
}
@Override
@@ -373,6 +372,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
@Override
public String name() { return getName(); }
+ public OnnxModelCost.Calculator onnxModelCost() { return onnxModelCost; }
+
public static class MbusParams {
// the amount of the maxpendingbytes to process concurrently, typically 0.2 (20%)
final Double maxConcurrentFactor;
@@ -390,4 +391,14 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
}
}
+ public static class UserConfiguredUrls {
+
+ private final Set<String> urls = new HashSet<>();
+
+ public void add(String url) { urls.add(url); }
+
+ public Set<String> all() { return urls; }
+
+ }
+
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java
index 6bbc24e8739..fa13e7ec9d6 100755
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java
@@ -62,6 +62,7 @@ import com.yahoo.vespa.model.container.search.ContainerSearch;
import com.yahoo.vespa.model.container.search.searchchain.SearchChains;
import com.yahoo.vespa.model.content.Content;
import com.yahoo.vespa.model.search.SearchCluster;
+
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
@@ -71,6 +72,7 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
+import java.util.OptionalDouble;
import java.util.Set;
import java.util.TreeSet;
@@ -718,5 +720,11 @@ public abstract class ContainerCluster<CONTAINER extends Container>
* Returns the percentage of host physical memory this application has specified for nodes in this cluster,
* or empty if this is not specified by the application.
*/
- public Optional<Integer> getMemoryPercentage() { return Optional.empty(); }
+ public record JvmMemoryPercentage(int percentage, OptionalDouble availableMemoryGb) {
+ static JvmMemoryPercentage of(int percentage) { return new JvmMemoryPercentage(percentage, OptionalDouble.empty()); }
+ static JvmMemoryPercentage of(int percentage, double availableMemoryGb) {
+ return new JvmMemoryPercentage(percentage, OptionalDouble.of(availableMemoryGb));
+ }
+ }
+ public Optional<JvmMemoryPercentage> getMemoryPercentage() { return Optional.empty(); }
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
index 906ef739ef1..1b47f59653e 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
@@ -45,10 +45,6 @@ public class ContainerModelEvaluation implements
private final RankProfileList rankProfileList;
private final FileDistributedOnnxModels onnxModels; // For cluster specific ONNX model settings
- public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList) {
- this(cluster, rankProfileList, null);
- }
-
public ContainerModelEvaluation(ApplicationContainerCluster cluster,
RankProfileList rankProfileList, FileDistributedOnnxModels onnxModels) {
this.rankProfileList = Objects.requireNonNull(rankProfileList, "rankProfileList cannot be null");
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
index 205848e1b67..76bb1a9e02a 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
@@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.BertBaseEmbedderConfig;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -33,7 +34,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
private final Integer onnxGpuDevice;
- public BertEmbedder(Element xml, DeployState state) {
+ public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml);
model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state);
vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state);
@@ -49,6 +50,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
+ cluster.onnxModelCost().registerModel(model);
}
@Override
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
index c0fdfe3dc64..63096ebcbe2 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
@@ -5,7 +5,7 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.ColBertEmbedderConfig;
-import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -40,7 +40,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo
private final Integer onnxIntraopThreads;
private final Integer onnxGpuDevice;
- public ColBertEmbedder(Element xml, DeployState state) {
+ public ColBertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.ColBertEmbedder", INTEGRATION_BUNDLE_NAME, xml);
var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow();
model = ModelIdResolver.resolveToModelReference(transformerModelElem, state);
@@ -60,7 +60,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo
onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
-
+ cluster.onnxModelCost().registerModel(model);
}
private static ModelReference resolveDefaultVocab(Element model, DeployState state) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
index f4017339699..41b80bf1cb2 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
@@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -33,7 +34,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
private final Integer onnxGpuDevice;
private final String poolingStrategy;
- public HuggingFaceEmbedder(Element xml, DeployState state) {
+ public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml);
var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow();
model = ModelIdResolver.resolveToModelReference(transformerModelElem, state);
@@ -51,6 +52,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null);
+ cluster.onnxModelCost().registerModel(model);
}
private static ModelReference resolveDefaultVocab(Element model, DeployState state) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
index f0296d49472..3261d454b4f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.container.search;
+import com.yahoo.config.application.api.ApplicationPackage;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.prelude.semantics.SemanticRulesConfig;
@@ -56,12 +57,14 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
private QueryProfiles queryProfiles;
private SemanticRules semanticRules;
private PageTemplates pageTemplates;
+ private ApplicationPackage app;
public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) {
super(chains);
this.globalPhase = deployState.featureFlags().enableGlobalPhase();
this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher();
this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState);
+ this.app = deployState.getApplicationPackage();
this.owningCluster = cluster;
owningCluster.addComponent(Component.fromClassAndBundle(CompiledQueryProfileRegistry.class, SEARCH_AND_DOCPROC_BUNDLE));
@@ -96,6 +99,9 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue;
var factory = new RankProfilesEvaluatorComponent(documentDb);
if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) {
+ var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels();
+ onnxModels.asMap().forEach(
+ (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath())));
owningCluster.addComponent(factory);
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 35b0213bf59..d9c4dea478c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -778,6 +778,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
!container.getHostResource().realResources().gpuResources().isZero());
onnxModel.setGpuDevice(gpuDevice, hasGpu);
}
+ cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath()));
}
cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models));
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
index bb72eda7d04..d18309ef0af 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
@@ -256,7 +256,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem
for (ContainerModel containerModel : containers) {
Optional<String> hostClusterId = containerModel.getCluster().getHostClusterId();
if (hostClusterId.isPresent() && hostClusterId.get().equals(clusterId) && containerModel.getCluster().getMemoryPercentage().isPresent()) {
- return containerModel.getCluster().getMemoryPercentage().get() * 0.01;
+ return containerModel.getCluster().getMemoryPercentage().get().percentage() * 0.01;
}
}
return 0.0;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java b/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java
index 8bed5e64bf5..03541ecadf3 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java
@@ -11,6 +11,7 @@ import com.yahoo.path.Path;
import com.yahoo.vespa.config.ConfigDefinition;
import com.yahoo.vespa.config.ConfigDefinitionKey;
import com.yahoo.vespa.config.ConfigPayloadBuilder;
+
import com.yahoo.yolean.Exceptions;
import java.io.File;
@@ -21,6 +22,8 @@ import java.util.Map;
import java.util.Optional;
import java.util.logging.Level;
+import static com.yahoo.vespa.model.container.ApplicationContainerCluster.UserConfiguredUrls;
+
/**
* Utility methods for registering file distribution of files/paths/urls/models defined by the user.
*
@@ -30,10 +33,12 @@ public class UserConfiguredFiles implements Serializable {
private final FileRegistry fileRegistry;
private final DeployLogger logger;
+ private final UserConfiguredUrls userConfiguredUrls;
- public UserConfiguredFiles(FileRegistry fileRegistry, DeployLogger logger) {
+ public UserConfiguredFiles(FileRegistry fileRegistry, DeployLogger logger, UserConfiguredUrls userConfiguredUrls) {
this.fileRegistry = fileRegistry;
this.logger = logger;
+ this.userConfiguredUrls = userConfiguredUrls;
}
/**
@@ -133,7 +138,10 @@ public class UserConfiguredFiles implements Serializable {
Path path;
if (isModelType) {
var modelReference = ModelReference.valueOf(builder.getValue());
- if (modelReference.path().isEmpty()) return;
+ if (modelReference.path().isEmpty()) {
+ modelReference.url().ifPresent(url -> userConfiguredUrls.add(url.value()));
+ return;
+ }
path = Path.fromString(modelReference.path().get().value());
}
else {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java b/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java
index 7c86267c1b6..38dda3e29ff 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java
@@ -18,6 +18,9 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.Map;
+import java.util.Optional;
+
+import static com.yahoo.yolean.Exceptions.uncheck;
/**
* Defers to 'vespa-analyze-onnx-model' to determine the output type given
@@ -29,6 +32,7 @@ import java.util.Map;
public class OnnxModelProbe {
private static final String binary = "vespa-analyze-onnx-model";
+ private static final ObjectMapper jsonParser = new ObjectMapper();
static TensorType probeModel(ApplicationPackage app, Path modelPath, String outputName, Map<String, TensorType> inputTypes) {
TensorType outputType = TensorType.empty;
@@ -41,8 +45,9 @@ public class OnnxModelProbe {
// Otherwise, run vespa-analyze-onnx-model if the model is available
if (outputType.equals(TensorType.empty) && app.getFile(modelPath).exists()) {
String jsonInput = createJsonInput(app.getFileReference(modelPath).getAbsolutePath(), inputTypes);
- String jsonOutput = callVespaAnalyzeOnnxModel(jsonInput);
+ var jsonOutput = callVespaAnalyzeOnnxModel(jsonInput);
outputType = outputTypeFromJson(jsonOutput, outputName);
+ writeMemoryStats(app, modelPath, MemoryStats.fromJson(jsonOutput));
if ( ! outputType.equals(TensorType.empty)) {
writeProbedOutputType(app, modelPath, contextKey, outputType);
}
@@ -53,6 +58,22 @@ public class OnnxModelProbe {
return outputType;
}
+ public static Optional<MemoryStats> probeMemoryStats(ApplicationPackage app, Path modelPath) {
+ return Optional.of(app.getFile(memoryStatsPath(modelPath)))
+ .filter(ApplicationFile::exists)
+ .map(file -> MemoryStats.fromJson(uncheck(() -> jsonParser.readTree(file.createReader()))));
+ }
+
+ private static void writeMemoryStats(ApplicationPackage app, Path modelPath, MemoryStats memoryStats) throws IOException {
+ String path = app.getFileReference(memoryStatsPath(modelPath)).getAbsolutePath();
+ IOUtils.writeFile(path, memoryStats.toJson().toPrettyString(), false);
+ }
+
+ private static Path memoryStatsPath(Path modelPath) {
+ var fileName = OnnxModelInfo.asValidIdentifier(modelPath.getRelative()) + ".memory_stats";
+ return ApplicationPackage.MODELS_GENERATED_REPLICATED_DIR.append(fileName);
+ }
+
private static String createContextKey(String onnxName, Map<String, TensorType> inputTypes) {
StringBuilder key = new StringBuilder().append(onnxName).append(":");
inputTypes.entrySet().stream().sorted(Map.Entry.comparingByKey())
@@ -95,9 +116,7 @@ public class OnnxModelProbe {
return TensorType.empty;
}
- private static TensorType outputTypeFromJson(String json, String outputName) throws IOException {
- ObjectMapper m = new ObjectMapper();
- JsonNode root = m.readTree(json);
+ private static TensorType outputTypeFromJson(JsonNode root, String outputName) throws IOException {
if ( ! root.isObject() || ! root.has("outputs")) {
return TensorType.empty;
}
@@ -123,7 +142,7 @@ public class OnnxModelProbe {
return out.toString();
}
- private static String callVespaAnalyzeOnnxModel(String jsonInput) throws IOException, InterruptedException {
+ private static JsonNode callVespaAnalyzeOnnxModel(String jsonInput) throws IOException, InterruptedException {
StringBuilder output = new StringBuilder();
ProcessBuilder processBuilder = new ProcessBuilder(binary, "--probe-types");
@@ -148,7 +167,16 @@ public class OnnxModelProbe {
throw new IllegalArgumentException("Error from '" + binary + "'. Return code: " + returnCode + ". " +
"Output: '" + output + "'");
}
- return output.toString();
+ return jsonParser.readTree(output.toString());
+ }
+
+ public record MemoryStats(long vmSize, long vmRss) {
+ static MemoryStats fromJson(JsonNode json) {
+ return new MemoryStats(json.get("vm_size").asLong(), json.get("vm_rss").asLong());
+ }
+ JsonNode toJson() {
+ return jsonParser.createObjectNode().put("vm_size", vmSize).put("vm_rss", vmRss);
+ }
}
}