aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-application-package/src/main/java/com/yahoo/config/model/application/provider/FilesApplicationFile.java15
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/application/api/ApplicationFile.java2
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/application/api/DeployLogger.java5
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java1
-rw-r--r--config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java29
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java14
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java4
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java2
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java99
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java51
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java1
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java16
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java38
-rwxr-xr-xconfig-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java10
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java1
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java2
-rw-r--r--config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java7
-rw-r--r--config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java126
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/DeployHandlerLogger.java15
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java3
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplication.java6
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplicationFile.java9
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java7
29 files changed, 445 insertions, 46 deletions
diff --git a/config-application-package/src/main/java/com/yahoo/config/model/application/provider/FilesApplicationFile.java b/config-application-package/src/main/java/com/yahoo/config/model/application/provider/FilesApplicationFile.java
index 6b0adb5d079..2dbbc8a5820 100644
--- a/config-application-package/src/main/java/com/yahoo/config/model/application/provider/FilesApplicationFile.java
+++ b/config-application-package/src/main/java/com/yahoo/config/model/application/provider/FilesApplicationFile.java
@@ -5,13 +5,20 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.config.application.api.ApplicationFile;
import com.yahoo.io.IOUtils;
import com.yahoo.path.Path;
-import java.util.logging.Level;
-import com.yahoo.yolean.Exceptions;
import com.yahoo.vespa.config.util.ConfigUtils;
+import com.yahoo.yolean.Exceptions;
-import java.io.*;
+import java.io.File;
+import java.io.FileFilter;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
+import java.util.logging.Level;
import java.util.logging.Logger;
/**
@@ -208,6 +215,8 @@ public class FilesApplicationFile extends ApplicationFile {
}
}
+ @Override public long getSize() { return file.length(); }
+
@Override
public int compareTo(ApplicationFile other) {
if (other == this) return 0;
diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/ApplicationFile.java b/config-model-api/src/main/java/com/yahoo/config/application/api/ApplicationFile.java
index a55ae795d28..97336b2bca0 100644
--- a/config-model-api/src/main/java/com/yahoo/config/application/api/ApplicationFile.java
+++ b/config-model-api/src/main/java/com/yahoo/config/application/api/ApplicationFile.java
@@ -160,6 +160,8 @@ public abstract class ApplicationFile implements Comparable<ApplicationFile> {
public abstract MetaData getMetaData();
+ public abstract long getSize();
+
public static class MetaData {
public String status = "unknown";
diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/DeployLogger.java b/config-model-api/src/main/java/com/yahoo/config/application/api/DeployLogger.java
index d9ebd902e3e..65e6bc2803a 100644
--- a/config-model-api/src/main/java/com/yahoo/config/application/api/DeployLogger.java
+++ b/config-model-api/src/main/java/com/yahoo/config/application/api/DeployLogger.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.config.application.api;
+import java.util.function.Supplier;
import java.util.logging.Level;
/**
@@ -13,6 +14,10 @@ public interface DeployLogger {
/** Log a message unrelated to the application package, e.g. internal error/status. */
void log(Level level, String message);
+ default void log(Level level, Supplier<String> message) { log(level, message.get()); }
+
+ default void log(Level level, Supplier<String> message, Throwable throwable) { log(level, message); }
+
/**
* Log a message related to the application package. These messages should be actionable by the user, f.ex. to
* signal usage of invalid/deprecated syntax
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
index 37b24f0ac1d..446c32801e0 100644
--- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
+++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java
@@ -117,6 +117,7 @@ public interface ModelContext {
@ModelFeatureFlag(owners = {"baldersheim"}) default boolean enableNestedMultivalueGrouping() { return false; }
@ModelFeatureFlag(owners = {"jonmv"}) default boolean useReconfigurableDispatcher() { return false; }
@ModelFeatureFlag(owners = {"vekterli"}) default int contentLayerMetadataFeatureLevel() { return 0; }
+ @ModelFeatureFlag(owners = {"bjorncs"}) default boolean dynamicHeapSize() { return false; }
}
/** Warning: As elsewhere in this package, do not make backwards incompatible changes that will break old config models! */
diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java b/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java
new file mode 100644
index 00000000000..422ceba8074
--- /dev/null
+++ b/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java
@@ -0,0 +1,29 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.config.model.api;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.application.api.ApplicationFile;
+import com.yahoo.config.application.api.DeployLogger;
+
+/**
+ * @author bjorncs
+ */
+public interface OnnxModelCost {
+
+ Calculator newCalculator(DeployLogger logger);
+
+ interface Calculator {
+ long aggregatedModelCostInBytes();
+ void registerModel(ApplicationFile path);
+ void registerModel(ModelReference ref);
+ }
+
+ static OnnxModelCost disabled() {
+ return (__) -> new Calculator() {
+ @Override public long aggregatedModelCostInBytes() { return 0; }
+ @Override public void registerModel(ApplicationFile path) {}
+ @Override public void registerModel(ModelReference ref) {}
+ };
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java
index 4df7a76031a..5ab258ecce8 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java
@@ -18,6 +18,7 @@ import com.yahoo.config.model.api.EndpointCertificateSecrets;
import com.yahoo.config.model.api.HostProvisioner;
import com.yahoo.config.model.api.Model;
import com.yahoo.config.model.api.ModelContext;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.api.Provisioned;
import com.yahoo.config.model.api.Reindexing;
import com.yahoo.config.model.api.ValidationParameters;
@@ -90,6 +91,7 @@ public class DeployState implements ConfigDefinitionStore {
private final Provisioned provisioned;
private final Reindexing reindexing;
private final ExecutorService executor;
+ private final OnnxModelCost onnxModelCost;
public static DeployState createTestState() {
return new Builder().build();
@@ -124,7 +126,8 @@ public class DeployState implements ConfigDefinitionStore {
boolean accessLoggingEnabledByDefault,
Optional<DockerImage> wantedDockerImageRepo,
Reindexing reindexing,
- Optional<ValidationOverrides> validationOverrides) {
+ Optional<ValidationOverrides> validationOverrides,
+ OnnxModelCost onnxModelCost) {
this.logger = deployLogger;
this.fileRegistry = fileRegistry;
this.executor = executor;
@@ -152,6 +155,7 @@ public class DeployState implements ConfigDefinitionStore {
this.now = now;
this.wantedDockerImageRepo = wantedDockerImageRepo;
this.reindexing = reindexing;
+ this.onnxModelCost = onnxModelCost;
}
public static HostProvisioner getDefaultModelHostProvisioner(ApplicationPackage applicationPackage) {
@@ -305,6 +309,8 @@ public class DeployState implements ConfigDefinitionStore {
public Optional<Reindexing> reindexing() { return Optional.ofNullable(reindexing); }
+ public OnnxModelCost onnxModelCost() { return onnxModelCost; }
+
public boolean isHostedTenantApplication(ApplicationType type) {
boolean isTesterApplication = getProperties().applicationId().instance().isTester();
return isHosted() && type == ApplicationType.DEFAULT && !isTesterApplication;
@@ -333,6 +339,7 @@ public class DeployState implements ConfigDefinitionStore {
private QueryProfiles queryProfiles = null;
private Reindexing reindexing = null;
private Optional<ValidationOverrides> validationOverrides = Optional.empty();
+ private OnnxModelCost onnxModelCost = OnnxModelCost.disabled();
public Builder() {}
@@ -450,6 +457,8 @@ public class DeployState implements ConfigDefinitionStore {
return this;
}
+ public Builder onnxModelCost(OnnxModelCost instance) { this.onnxModelCost = instance; return this; }
+
public DeployState build() {
return build(new ValidationParameters());
}
@@ -482,7 +491,8 @@ public class DeployState implements ConfigDefinitionStore {
accessLoggingEnabledByDefault,
wantedDockerImageRepo,
reindexing,
- validationOverrides);
+ validationOverrides,
+ onnxModelCost);
}
}
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
index 815c32e3c8f..77356292f9a 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
@@ -86,6 +86,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
private boolean allowUserFilters = true;
private List<DataplaneToken> dataplaneTokens;
private int contentLayerMetadataFeatureLevel = 0;
+ private boolean dynamicHeapSize = false;
@Override public ModelContext.FeatureFlags featureFlags() { return this; }
@Override public boolean multitenant() { return multitenant; }
@@ -144,6 +145,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
@Override public boolean enableGlobalPhase() { return true; } // Enable global-phase by default for unit tests only
@Override public List<DataplaneToken> dataplaneTokens() { return dataplaneTokens; }
@Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; }
+ @Override public boolean dynamicHeapSize() { return dynamicHeapSize; }
public TestProperties sharedStringRepoNoReclaim(boolean sharedStringRepoNoReclaim) {
this.sharedStringRepoNoReclaim = sharedStringRepoNoReclaim;
@@ -379,6 +381,8 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
return this;
}
+ public TestProperties setDynamicHeapSize(boolean b) { this.dynamicHeapSize = b; return this; }
+
public static class Spec implements ConfigServerSpec {
private final String hostName;
diff --git a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java
index dbcd1cea2fa..342b5f243e7 100644
--- a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java
+++ b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java
@@ -488,6 +488,8 @@ public class MockApplicationPackage implements ApplicationPackage {
throw new UnsupportedOperationException();
}
+ @Override public long getSize() { return file.length(); }
+
@Override
public int compareTo(ApplicationFile other) {
return this.getPath().getName().compareTo((other).getPath().getName());
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java b/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java
new file mode 100644
index 00000000000..76733872882
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java
@@ -0,0 +1,99 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.application.api.ApplicationFile;
+import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.config.model.api.OnnxModelCost;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.http.HttpClient;
+import java.net.http.HttpRequest;
+import java.net.http.HttpResponse;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.time.Duration;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.logging.Level;
+
+import static com.yahoo.yolean.Exceptions.uncheck;
+
+/**
+ * Aggregates estimated footprint of configured ONNX models.
+ *
+ * @author bjorncs
+ */
+public class DefaultOnnxModelCost implements OnnxModelCost {
+
+ @Override
+ public Calculator newCalculator(DeployLogger logger) {
+ return new CalculatorImpl(logger);
+ }
+
+ private static class CalculatorImpl implements Calculator {
+ private final DeployLogger log;
+
+ private final ConcurrentMap<String, Long> modelCost = new ConcurrentHashMap<>();
+
+ private CalculatorImpl(DeployLogger log) {
+ this.log = log;
+ }
+
+ @Override
+ public long aggregatedModelCostInBytes() {
+ return modelCost.values().stream().mapToLong(Long::longValue).sum();
+ }
+
+ @Override
+ public void registerModel(ApplicationFile f) {
+ String path = f.getPath().getRelative();
+ if (alreadyAnalyzed(path)) return;
+ log.log(Level.FINE, () -> "Register model '%s'".formatted(path));
+ deductJvmHeapSizeWithModelCost(f.exists() ? f.getSize() : 0, path);
+ }
+
+ @Override
+ public void registerModel(ModelReference ref) {
+ log.log(Level.FINE, () -> "Register model '%s'".formatted(ref.toString()));
+ if (ref.path().isPresent()) {
+ var path = Paths.get(ref.path().get().value());
+ var source = path.getFileName().toString();
+ if (alreadyAnalyzed(source)) return;
+ deductJvmHeapSizeWithModelCost(uncheck(() -> Files.exists(path) ? Files.size(path) : 0), source);
+ } else if (ref.url().isPresent()) deductJvmHeapSizeWithModelCost(URI.create(ref.url().get().value()));
+ else throw new IllegalStateException(ref.toString());
+ }
+
+ private void deductJvmHeapSizeWithModelCost(URI uri) {
+ if (alreadyAnalyzed(uri.toString())) return;
+ if (uri.getScheme().equals("http") || uri.getScheme().equals("https")) {
+ try {
+ var timeout = Duration.ofSeconds(3);
+ var httpClient = HttpClient.newBuilder().connectTimeout(timeout).build();
+ var request = HttpRequest.newBuilder(uri).timeout(timeout).method("HEAD", HttpRequest.BodyPublishers.noBody()).build();
+ var response = httpClient.send(request, HttpResponse.BodyHandlers.discarding());
+ var contentLength = response.headers().firstValue("Content-Length").orElse("0");
+ log.log(Level.FINE, () -> "Got content length '%s' for '%s'".formatted(contentLength, uri));
+ deductJvmHeapSizeWithModelCost(Long.parseLong(contentLength), uri.toString());
+ } catch (IllegalArgumentException | InterruptedException | IOException e) {
+ log.log(Level.INFO, () -> "Failed to get model size for '%s': %s".formatted(uri, e.getMessage()), e);
+ }
+ }
+ }
+
+ private void deductJvmHeapSizeWithModelCost(long size, String source) {
+ long fallbackModelSize = 1024*1024*1024;
+ long estimatedCost = Math.max(300*1024*1024, (long) (1.4D * (size > 0 ? size : fallbackModelSize) + 100*1024*1024));
+ log.log(Level.FINE, () ->
+ "Estimated %s footprint for model of size %s ('%s')".formatted(mb(estimatedCost), mb(size), source));
+ modelCost.put(source, estimatedCost);
+ }
+
+ private boolean alreadyAnalyzed(String source) { return modelCost.containsKey(source); }
+
+ private static String mb(long bytes) { return "%dMB".formatted(bytes / (1024*1024)); }
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
index 28ff8dff620..727a18aee2c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
@@ -21,6 +21,7 @@ import com.yahoo.config.model.api.Model;
import com.yahoo.config.model.api.ModelContext;
import com.yahoo.config.model.api.ModelCreateResult;
import com.yahoo.config.model.api.ModelFactory;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.api.ValidationParameters;
import com.yahoo.config.model.application.provider.ApplicationPackageXmlFilesValidator;
import com.yahoo.config.model.builder.xml.ConfigModelBuilder;
@@ -197,7 +198,8 @@ public class VespaModelFactory implements ModelFactory {
.zone(zone)
.now(clock.instant())
.wantedNodeVespaVersion(modelContext.wantedNodeVespaVersion())
- .wantedDockerImageRepo(modelContext.wantedDockerImageRepo());
+ .wantedDockerImageRepo(modelContext.wantedDockerImageRepo())
+ .onnxModelCost(modelContext.properties().hostedVespa() ? new DefaultOnnxModelCost() : OnnxModelCost.disabled());
modelContext.previousModel().ifPresent(builder::previousModel);
modelContext.reindexing().ifPresent(builder::reindexing);
return builder.build(validationParameters);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
new file mode 100644
index 00000000000..2c5e0db14b9
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java
@@ -0,0 +1,51 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.application.validation;
+
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.vespa.model.VespaModel;
+
+import java.util.logging.Level;
+
+/**
+ * Validates that the container node flavour has enough resources to run configured ONNX models.
+ *
+ * @author bjorncs
+ */
+public class JvmHeapSizeValidator extends Validator {
+
+ @Override
+ public void validate(VespaModel model, DeployState ds) {
+ if (!ds.featureFlags().dynamicHeapSize()) return;
+ if (!ds.isHostedTenantApplication(model.getAdmin().getApplicationType())) return;
+
+ model.getContainerClusters().forEach((clusterId, appCluster) -> {
+ var mp = appCluster.getMemoryPercentage().orElse(null);
+ if (mp == null) return;
+ if (mp.availableMemoryGb().isEmpty()) {
+ ds.getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'");
+ return;
+ }
+ long jvmModelCost = appCluster.onnxModelCost().aggregatedModelCostInBytes();
+ if (jvmModelCost > 0) {
+ int percentLimit = 10;
+ if (mp.percentage() < percentLimit) {
+ throw new IllegalArgumentException(
+ ("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " +
+ "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
+ "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).")
+ .formatted(clusterId, mp.percentage(), percentLimit, jvmModelCost / (1024D * 1024 * 1024)));
+ }
+ double gbLimit = 0.4;
+ double availableMemoryGb = mp.availableMemoryGb().getAsDouble();
+ if (availableMemoryGb < gbLimit) {
+ throw new IllegalArgumentException(
+ ("Allocated memory to JVM in cluster '%s' is too low (%.2fGB < %.2fGB). " +
+ "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " +
+ "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).")
+ .formatted(clusterId, availableMemoryGb, gbLimit, jvmModelCost / (1024D * 1024 * 1024)));
+ }
+ }
+ });
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
index 53a553ee624..b9ecf7c2d22 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java
@@ -87,6 +87,7 @@ public class Validation {
new AccessControlFilterExcludeValidator().validate(model, deployState);
new CloudUserFilterValidator().validate(model, deployState);
new CloudHttpConnectorValidator().validate(model, deployState);
+ new JvmHeapSizeValidator().validate(model, deployState);
additionalValidators.forEach(v -> v.validate(model, deployState));
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
index 7501f6162c7..9ecd359f90d 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
@@ -7,11 +7,12 @@ import com.yahoo.config.model.producer.AnyConfigProducer;
import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.osgi.provider.model.ComponentModel;
import com.yahoo.text.XML;
-import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder;
-import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.component.BertEmbedder;
import com.yahoo.vespa.model.container.component.ColBertEmbedder;
import com.yahoo.vespa.model.container.component.Component;
+import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder;
+import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer;
import com.yahoo.vespa.model.container.xml.BundleInstantiationSpecificationBuilder;
import org.w3c.dom.Element;
@@ -35,19 +36,20 @@ public class DomComponentBuilder extends VespaDomBuilder.DomConfigProducerBuilde
@Override
protected Component<? super Component<?, ?>, ?> doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) {
- var component = buildComponent(spec, deployState);
+ var component = buildComponent(spec, deployState, ancestor);
addChildren(deployState, ancestor, spec, component);
return component;
}
- private Component<? super Component<?, ?>, ?> buildComponent(Element spec, DeployState state) {
+ private Component<? super Component<?, ?>, ?> buildComponent(
+ Element spec, DeployState state, TreeConfigProducer<AnyConfigProducer> ancestor) {
if (spec.hasAttribute("type")) {
var type = spec.getAttribute("type");
return switch (type) {
- case "hugging-face-embedder" -> new HuggingFaceEmbedder(spec, state);
+ case "hugging-face-embedder" -> new HuggingFaceEmbedder((ApplicationContainerCluster)ancestor, spec, state);
case "hugging-face-tokenizer" -> new HuggingFaceTokenizer(spec, state);
- case "bert-embedder" -> new BertEmbedder(spec, state);
- case "colbert-embedder" -> new ColBertEmbedder(spec, state);
+ case "colbert-embedder" -> new ColBertEmbedder((ApplicationContainerCluster)ancestor, spec, state);
+ case "bert-embedder" -> new BertEmbedder((ApplicationContainerCluster)ancestor, spec, state);
default -> throw new IllegalArgumentException("Unknown component type '%s'".formatted(type));
};
} else {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
index b9021912244..d6403c2e8e3 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
@@ -8,10 +8,12 @@ import com.yahoo.component.ComponentId;
import com.yahoo.component.ComponentSpecification;
import com.yahoo.config.FileReference;
import com.yahoo.config.application.api.ComponentInfo;
+import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.model.api.ApplicationClusterEndpoint;
import com.yahoo.config.model.api.ApplicationClusterInfo;
import com.yahoo.config.model.api.ContainerEndpoint;
import com.yahoo.config.model.api.Model;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.config.provision.AllocatedHosts;
@@ -47,6 +49,7 @@ import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.logging.Level;
import java.util.stream.Collectors;
import static com.yahoo.vespa.model.container.docproc.DocprocChains.DOCUMENT_TYPE_MANAGER_CLASS;
@@ -82,6 +85,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private final Set<FileReference> applicationBundles = new LinkedHashSet<>();
private final Set<String> previousHosts;
+ private final OnnxModelCost.Calculator onnxModelCost;
+ private final DeployLogger logger;
private ContainerModelEvaluation modelEvaluation;
@@ -92,6 +97,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private int zookeeperSessionTimeoutSeconds = 30;
private final int transport_events_before_wakeup;
private final int transport_connections_per_target;
+ private final boolean dynamicHeapSize;
/** The heap size % of total memory available to the JVM process. */
private final int heapSizePercentageOfAvailableMemory;
@@ -103,6 +109,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
public ApplicationContainerCluster(TreeConfigProducer<?> parent, String configSubId, String clusterId, DeployState deployState) {
super(parent, configSubId, clusterId, deployState, true, 10);
this.tlsClientAuthority = deployState.tlsClientAuthority();
+ dynamicHeapSize = deployState.featureFlags().dynamicHeapSize();
previousHosts = Collections.unmodifiableSet(deployState.getPreviousModel().stream()
.map(Model::allocatedHosts)
.map(AllocatedHosts::getHosts)
@@ -125,6 +132,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0
? Math.min(99, deployState.featureFlags().heapSizePercentage())
: defaultHeapSizePercentageOfAvailableMemory;
+ onnxModelCost = deployState.onnxModelCost().newCalculator(deployState.getDeployLogger());
+ logger = deployState.getDeployLogger();
}
@Override
@@ -182,19 +191,25 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
public void setMemoryPercentage(Integer memoryPercentage) { this.memoryPercentage = memoryPercentage; }
@Override
- public Optional<Integer> getMemoryPercentage() {
- if (memoryPercentage != null) return Optional.of(memoryPercentage);
+ public Optional<JvmMemoryPercentage> getMemoryPercentage() {
+ if (memoryPercentage != null) return Optional.of(JvmMemoryPercentage.of(memoryPercentage));
if (isHostedVespa()) {
int availableMemoryPercentage = getHostClusterId().isPresent() ?
heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster :
heapSizePercentageOfAvailableMemory;
- if (getContainers().isEmpty()) return Optional.of(availableMemoryPercentage); // Node memory is not known
+ if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known
// Node memory is known so convert available memory percentage to node memory percentage
- double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb();
- double availableMemory = totalMemory - Host.memoryOverheadGb;
- return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage));
+ double totalMemory = dynamicHeapSize
+ ? getContainers().stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow()
+ : getContainers().get(0).getHostResource().realResources().memoryGb();
+ double jvmHeapDeductionGb = dynamicHeapSize ? onnxModelCost.aggregatedModelCostInBytes() / (1024D * 1024 * 1024) : 0;
+ double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb);
+ int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage);
+ logger.log(Level.FINE, () -> "memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f"
+ .formatted(memoryPercentage, availableMemory, totalMemory, availableMemoryPercentage, jvmHeapDeductionGb));
+ return Optional.of(JvmMemoryPercentage.of(memoryPercentage, availableMemory));
}
return Optional.empty();
}
@@ -299,12 +314,15 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
@Override
public void getConfig(QrStartConfig.Builder builder) {
super.getConfig(builder);
+ var memoryPct = getMemoryPercentage().orElse(null);
+ int heapsize = memoryPct != null && memoryPct.availableMemoryGb().isPresent()
+ ? (int) (memoryPct.availableMemoryGb().getAsDouble() * 1024) : 1536;
builder.jvm.verbosegc(true)
.availableProcessors(0)
.compressedClassSpaceSize(0)
- .minHeapsize(1536)
- .heapsize(1536);
- getMemoryPercentage().ifPresent(percentage -> builder.jvm.heapSizeAsPercentageOfPhysicalMemory(percentage));
+ .minHeapsize(heapsize)
+ .heapsize(heapsize);
+ if (memoryPct != null) builder.jvm.heapSizeAsPercentageOfPhysicalMemory(memoryPct.percentage());
}
@Override
@@ -373,6 +391,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
@Override
public String name() { return getName(); }
+ public OnnxModelCost.Calculator onnxModelCost() { return onnxModelCost; }
+
public static class MbusParams {
// the amount of the maxpendingbytes to process concurrently, typically 0.2 (20%)
final Double maxConcurrentFactor;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java
index 6bbc24e8739..fa13e7ec9d6 100755
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java
@@ -62,6 +62,7 @@ import com.yahoo.vespa.model.container.search.ContainerSearch;
import com.yahoo.vespa.model.container.search.searchchain.SearchChains;
import com.yahoo.vespa.model.content.Content;
import com.yahoo.vespa.model.search.SearchCluster;
+
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
@@ -71,6 +72,7 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
+import java.util.OptionalDouble;
import java.util.Set;
import java.util.TreeSet;
@@ -718,5 +720,11 @@ public abstract class ContainerCluster<CONTAINER extends Container>
* Returns the percentage of host physical memory this application has specified for nodes in this cluster,
* or empty if this is not specified by the application.
*/
- public Optional<Integer> getMemoryPercentage() { return Optional.empty(); }
+ public record JvmMemoryPercentage(int percentage, OptionalDouble availableMemoryGb) {
+ static JvmMemoryPercentage of(int percentage) { return new JvmMemoryPercentage(percentage, OptionalDouble.empty()); }
+ static JvmMemoryPercentage of(int percentage, double availableMemoryGb) {
+ return new JvmMemoryPercentage(percentage, OptionalDouble.of(availableMemoryGb));
+ }
+ }
+ public Optional<JvmMemoryPercentage> getMemoryPercentage() { return Optional.empty(); }
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
index 906ef739ef1..1b47f59653e 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
@@ -45,10 +45,6 @@ public class ContainerModelEvaluation implements
private final RankProfileList rankProfileList;
private final FileDistributedOnnxModels onnxModels; // For cluster specific ONNX model settings
- public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList) {
- this(cluster, rankProfileList, null);
- }
-
public ContainerModelEvaluation(ApplicationContainerCluster cluster,
RankProfileList rankProfileList, FileDistributedOnnxModels onnxModels) {
this.rankProfileList = Objects.requireNonNull(rankProfileList, "rankProfileList cannot be null");
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
index 205848e1b67..76bb1a9e02a 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
@@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.BertBaseEmbedderConfig;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -33,7 +34,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
private final Integer onnxGpuDevice;
- public BertEmbedder(Element xml, DeployState state) {
+ public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml);
model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state);
vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state);
@@ -49,6 +50,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
+ cluster.onnxModelCost().registerModel(model);
}
@Override
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
index c0fdfe3dc64..63096ebcbe2 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
@@ -5,7 +5,7 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.ColBertEmbedderConfig;
-import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -40,7 +40,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo
private final Integer onnxIntraopThreads;
private final Integer onnxGpuDevice;
- public ColBertEmbedder(Element xml, DeployState state) {
+ public ColBertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.ColBertEmbedder", INTEGRATION_BUNDLE_NAME, xml);
var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow();
model = ModelIdResolver.resolveToModelReference(transformerModelElem, state);
@@ -60,7 +60,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo
onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
-
+ cluster.onnxModelCost().registerModel(model);
}
private static ModelReference resolveDefaultVocab(Element model, DeployState state) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
index f4017339699..41b80bf1cb2 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
@@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
@@ -33,7 +34,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
private final Integer onnxGpuDevice;
private final String poolingStrategy;
- public HuggingFaceEmbedder(Element xml, DeployState state) {
+ public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml);
var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow();
model = ModelIdResolver.resolveToModelReference(transformerModelElem, state);
@@ -51,6 +52,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null);
+ cluster.onnxModelCost().registerModel(model);
}
private static ModelReference resolveDefaultVocab(Element model, DeployState state) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
index f0296d49472..3261d454b4f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.container.search;
+import com.yahoo.config.application.api.ApplicationPackage;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.prelude.semantics.SemanticRulesConfig;
@@ -56,12 +57,14 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
private QueryProfiles queryProfiles;
private SemanticRules semanticRules;
private PageTemplates pageTemplates;
+ private ApplicationPackage app;
public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) {
super(chains);
this.globalPhase = deployState.featureFlags().enableGlobalPhase();
this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher();
this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState);
+ this.app = deployState.getApplicationPackage();
this.owningCluster = cluster;
owningCluster.addComponent(Component.fromClassAndBundle(CompiledQueryProfileRegistry.class, SEARCH_AND_DOCPROC_BUNDLE));
@@ -96,6 +99,9 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue;
var factory = new RankProfilesEvaluatorComponent(documentDb);
if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) {
+ var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels();
+ onnxModels.asMap().forEach(
+ (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath())));
owningCluster.addComponent(factory);
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 35b0213bf59..d9c4dea478c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -778,6 +778,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
!container.getHostResource().realResources().gpuResources().isZero());
onnxModel.setGpuDevice(gpuDevice, hasGpu);
}
+ cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath()));
}
cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models));
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
index bb72eda7d04..d18309ef0af 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
@@ -256,7 +256,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem
for (ContainerModel containerModel : containers) {
Optional<String> hostClusterId = containerModel.getCluster().getHostClusterId();
if (hostClusterId.isPresent() && hostClusterId.get().equals(clusterId) && containerModel.getCluster().getMemoryPercentage().isPresent()) {
- return containerModel.getCluster().getMemoryPercentage().get() * 0.01;
+ return containerModel.getCluster().getMemoryPercentage().get().percentage() * 0.01;
}
}
return 0.0;
diff --git a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
index 2f8a8bddf20..38f51323ee2 100644
--- a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
+++ b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java
@@ -148,7 +148,7 @@ public class ModelProvisioningTest {
assertEquals("-Xlog:gc", mydisc2.getContainers().get(1).getJvmOptions());
assertEquals("lib/blablamalloc.so", mydisc2.getContainers().get(0).getPreLoad());
assertEquals("lib/blablamalloc.so", mydisc2.getContainers().get(1).getPreLoad());
- assertEquals(Optional.of(45), mydisc2.getMemoryPercentage());
+ assertEquals(45, mydisc2.getMemoryPercentage().get().percentage());
assertEquals(Optional.of("-XX:+UseParNewGC"), mydisc2.getJvmGCOptions());
QrStartConfig.Builder qrStartBuilder = new QrStartConfig.Builder();
mydisc2.getConfig(qrStartBuilder);
@@ -288,10 +288,11 @@ public class ModelProvisioningTest {
assertEquals(2025077080L, protonMemorySize(model.getContentClusters().get("content1")), "Memory for proton is lowered to account for the jvm heap");
assertProvisioned(0, ClusterSpec.Id.from("container1"), ClusterSpec.Type.container, model);
assertProvisioned(2, ClusterSpec.Id.from("content1"), ClusterSpec.Id.from("container1"), ClusterSpec.Type.combined, model);
- assertEquals(1, logger.msgs().size());
+ var msgs = logger.msgs().stream().filter(m -> m.level().equals(Level.WARNING)).toList();
+ assertEquals(1, msgs.size());
assertEquals("Declaring combined cluster with <nodes of=\"...\"> is deprecated without replacement, " +
"and the feature will be removed in Vespa 9. Use separate container and content clusters instead",
- logger.msgs().get(0).message);
+ msgs.get(0).message);
}
@Test
diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java
new file mode 100644
index 00000000000..086f2fe778f
--- /dev/null
+++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java
@@ -0,0 +1,126 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.application.validation;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.application.api.ApplicationFile;
+import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.config.model.NullConfigModelRegistry;
+import com.yahoo.config.model.api.OnnxModelCost;
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.config.model.deploy.TestProperties;
+import com.yahoo.config.model.provision.InMemoryProvisioner;
+import com.yahoo.config.model.test.MockApplicationPackage;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.vespa.model.VespaModel;
+import org.junit.jupiter.api.Test;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * @author bjorncs
+ */
+class JvmHeapSizeValidatorTest {
+
+ @Test
+ void fails_on_too_low_jvm_percentage() throws IOException, SAXException {
+ var deployState = createDeployState(8, 7L * 1024 * 1024 * 1024);
+ var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+ var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState));
+ String expectedMessage = "Allocated percentage of memory of JVM in cluster 'container' is too low (3% < 10%). Estimated cost of ONNX models is 7.00GB";
+ assertTrue(e.getMessage().contains(expectedMessage), e.getMessage());
+ }
+
+ @Test
+ void fails_on_too_low_heap_size() throws IOException, SAXException {
+ var deployState = createDeployState(2, 1024L * 1024 * 1024);
+ var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+ var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState));
+ String expectedMessage = "Allocated memory to JVM in cluster 'container' is too low (0.30GB < 0.40GB). Estimated cost of ONNX models is 1.00GB.";
+ assertTrue(e.getMessage().contains(expectedMessage), e.getMessage());
+ }
+
+ @Test
+ void accepts_adequate_heap_size() throws IOException, SAXException {
+ var deployState = createDeployState(8, 1024L * 1024 * 1024);
+ var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+ assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState));
+ }
+
+ @Test
+ void accepts_services_with_explicit_jvm_size() throws IOException, SAXException {
+ String servicesXml =
+ """
+ <?xml version="1.0" encoding="utf-8" ?>
+ <services version='1.0'>
+ <container version='1.0'>
+ <nodes count="2">
+ <jvm allocated-memory='5%'/>
+ <resources vcpu="4" memory="2Gb" disk="125Gb"/>
+ </nodes>
+ <component id="hf-embedder" type="hugging-face-embedder">
+ <transformer-model url="https://my/url/model.onnx"/>
+ <tokenizer-model path="app/tokenizer.json"/>
+ </component>
+ </container>
+ </services>""";
+ var deployState = createDeployState(servicesXml, 2, 1024L * 1024 * 1024);
+ var model = new VespaModel(new NullConfigModelRegistry(), deployState);
+ assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState));
+ }
+
+ private static DeployState createDeployState(String servicesXml, double nodeGb, long modelCostBytes) {
+ return new DeployState.Builder()
+ .applicationPackage(
+ new MockApplicationPackage.Builder()
+ .withServices(servicesXml)
+ .build())
+ .modelHostProvisioner(new InMemoryProvisioner(5, new NodeResources(4, nodeGb, 125, 0.3), true))
+ .properties(new TestProperties().setHostedVespa(true).setDynamicHeapSize(true))
+ .onnxModelCost(new ModelCostDummy(modelCostBytes))
+ .build();
+ }
+
+ private static DeployState createDeployState(double nodeGb, long modelCostBytes) {
+ String servicesXml =
+ """
+ <?xml version="1.0" encoding="utf-8" ?>
+ <services version='1.0'>
+ <container version='1.0'>
+ <nodes count="2">
+ <resources vcpu="4" memory="%fGb" disk="125Gb"/>
+ </nodes>
+ <component id="hf-embedder" type="hugging-face-embedder">
+ <transformer-model url="https://my/url/model.onnx"/>
+ <tokenizer-model path="app/tokenizer.json"/>
+ </component>
+ </container>
+ </services>""".formatted(nodeGb);
+ return createDeployState(servicesXml, nodeGb, modelCostBytes);
+ }
+
+ private static class ModelCostDummy implements OnnxModelCost, OnnxModelCost.Calculator {
+ final AtomicLong totalCost = new AtomicLong();
+ final long modelCost;
+
+ ModelCostDummy(long modelCost) { this.modelCost = modelCost; }
+
+ @Override public Calculator newCalculator(DeployLogger logger) { return this; }
+ @Override public long aggregatedModelCostInBytes() { return totalCost.get(); }
+ @Override public void registerModel(ApplicationFile path) {}
+
+ @Override
+ public void registerModel(ModelReference ref) {
+ assertEquals("https://my/url/model.onnx", ref.url().orElseThrow().value().toString());
+ totalCost.addAndGet(modelCost);
+ }
+ }
+
+} \ No newline at end of file
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/DeployHandlerLogger.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/DeployHandlerLogger.java
index 154d2d0f2f0..042aa2423f3 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/DeployHandlerLogger.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/DeployHandlerLogger.java
@@ -11,6 +11,7 @@ import com.yahoo.slime.Slime;
import com.yahoo.vespa.config.server.session.PrepareParams;
import com.yahoo.vespa.config.server.tenant.TenantRepository;
+import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -35,15 +36,17 @@ public class DeployHandlerLogger implements DeployLogger {
this.logroot = slime.setObject().setArray("log");
}
+ @Override public void log(Level level, String message) { log(level, () -> message); }
+ @Override public void log(Level level, Supplier<String> message) { log(level, message, null); }
+
@Override
@SuppressWarnings("deprecation")
- public void log(Level level, String message) {
- if (level.intValue() <= LogLevel.DEBUG.intValue() && !verbose)
- return;
+ public void log(Level level, Supplier<String> supplier, Throwable throwable) {
+ // Also tee to a normal log, Vespa log for example, but use level fine
+ log.log(Level.FINE, throwable, () -> prefix + supplier.get());
- logJson(level, message);
- // Also tee to a normal log, Vespa log for example, but use level fine
- log.log(Level.FINE, () -> prefix + message);
+ if (level.intValue() <= LogLevel.DEBUG.intValue() && !verbose) return;
+ logJson(level, supplier.get());
}
@Override
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
index 142f98e13e3..3e33b345437 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java
@@ -201,6 +201,7 @@ public class ModelContextImpl implements ModelContext {
private final boolean enableNestedMultivalueGrouping;
private final boolean useReconfigurableDispatcher;
private final int contentLayerMetadataFeatureLevel;
+ private final boolean dynamicHeapSize;
public FeatureFlags(FlagSource source, ApplicationId appId, Version version) {
this.defaultTermwiseLimit = flagValue(source, appId, version, Flags.DEFAULT_TERM_WISE_LIMIT);
@@ -243,6 +244,7 @@ public class ModelContextImpl implements ModelContext {
this.enableNestedMultivalueGrouping = flagValue(source, appId, version, Flags.ENABLE_NESTED_MULTIVALUE_GROUPING);
this.useReconfigurableDispatcher = flagValue(source, appId, version, Flags.USE_RECONFIGURABLE_DISPATCHER);
this.contentLayerMetadataFeatureLevel = flagValue(source, appId, version, Flags.CONTENT_LAYER_METADATA_FEATURE_LEVEL);
+ this.dynamicHeapSize = flagValue(source, appId, version, Flags.DYNAMIC_HEAP_SIZE);
}
@Override public int heapSizePercentage() { return heapPercentage; }
@@ -293,6 +295,7 @@ public class ModelContextImpl implements ModelContext {
@Override public boolean enableNestedMultivalueGrouping() { return enableNestedMultivalueGrouping; }
@Override public boolean useReconfigurableDispatcher() { return useReconfigurableDispatcher; }
@Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; }
+ @Override public boolean dynamicHeapSize() { return dynamicHeapSize; }
private static <V> V flagValue(FlagSource source, ApplicationId appId, Version vespaVersion, UnboundFlag<? extends V, ?, ?> flag) {
return flag.bindTo(source)
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplication.java b/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplication.java
index 4c262379c35..1288b63cadd 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplication.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplication.java
@@ -111,6 +111,12 @@ public class ZKApplication {
return getBytesInternal(getFullPath(path));
}
+ public long getSize(Path path) {
+ return curator.getStat(path).map(stat -> (long)stat.getDataLength())
+ .orElseThrow(() -> new IllegalArgumentException(
+ "Could not get size from '" + path + "' in zookeeper"));
+ }
+
void putData(Path path, String data) {
byte[] bytes = Utf8.toBytes(data);
ensureDataIsNotTooLarge(bytes, path);
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplicationFile.java b/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplicationFile.java
index 6bc29331efb..e51f8627de2 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplicationFile.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplicationFile.java
@@ -3,8 +3,9 @@ package com.yahoo.vespa.config.server.zookeeper;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.yahoo.config.application.api.ApplicationFile;
-import com.yahoo.path.Path;
import com.yahoo.io.IOUtils;
+import com.yahoo.path.Path;
+import com.yahoo.vespa.config.util.ConfigUtils;
import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
@@ -13,11 +14,9 @@ import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
-import java.util.logging.Level;
-import com.yahoo.vespa.config.util.ConfigUtils;
-
import java.util.ArrayList;
import java.util.List;
+import java.util.logging.Level;
import java.util.logging.Logger;
import static com.yahoo.vespa.config.server.zookeeper.ZKApplication.USERAPP_ZK_SUBPATH;
@@ -184,6 +183,8 @@ class ZKApplicationFile extends ApplicationFile {
}
}
+ @Override public long getSize() { return zkApp.getSize(getZKPath(path)); }
+
@Override
public int compareTo(ApplicationFile other) {
if (other == this) return 0;
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 2e158f0f3ef..e5b76bedecd 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -406,6 +406,13 @@ public class Flags {
"Takes effect at redeployment",
INSTANCE_ID);
+ public static final UnboundBooleanFlag DYNAMIC_HEAP_SIZE = defineFeatureFlag(
+ "dynamic-heap-size", false,
+ List.of("bjorncs"), "2023-09-21", "2024-01-15",
+ "Whether to calculate JVM heap size based on predicted Onnx model memory requirements",
+ "Takes effect at redeployment",
+ INSTANCE_ID);
+
/** WARNING: public for testing: All flags should be defined in {@link Flags}. */
public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List<String> owners,
String createdAt, String expiresAt, String description,