diff options
29 files changed, 445 insertions, 46 deletions
diff --git a/config-application-package/src/main/java/com/yahoo/config/model/application/provider/FilesApplicationFile.java b/config-application-package/src/main/java/com/yahoo/config/model/application/provider/FilesApplicationFile.java index 6b0adb5d079..2dbbc8a5820 100644 --- a/config-application-package/src/main/java/com/yahoo/config/model/application/provider/FilesApplicationFile.java +++ b/config-application-package/src/main/java/com/yahoo/config/model/application/provider/FilesApplicationFile.java @@ -5,13 +5,20 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.yahoo.config.application.api.ApplicationFile; import com.yahoo.io.IOUtils; import com.yahoo.path.Path; -import java.util.logging.Level; -import com.yahoo.yolean.Exceptions; import com.yahoo.vespa.config.util.ConfigUtils; +import com.yahoo.yolean.Exceptions; -import java.io.*; +import java.io.File; +import java.io.FileFilter; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; import java.util.ArrayList; import java.util.List; +import java.util.logging.Level; import java.util.logging.Logger; /** @@ -208,6 +215,8 @@ public class FilesApplicationFile extends ApplicationFile { } } + @Override public long getSize() { return file.length(); } + @Override public int compareTo(ApplicationFile other) { if (other == this) return 0; diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/ApplicationFile.java b/config-model-api/src/main/java/com/yahoo/config/application/api/ApplicationFile.java index a55ae795d28..97336b2bca0 100644 --- a/config-model-api/src/main/java/com/yahoo/config/application/api/ApplicationFile.java +++ b/config-model-api/src/main/java/com/yahoo/config/application/api/ApplicationFile.java @@ -160,6 +160,8 @@ public abstract class ApplicationFile implements Comparable<ApplicationFile> { public abstract MetaData getMetaData(); + public abstract long getSize(); + public static class MetaData { public String status = "unknown"; diff --git a/config-model-api/src/main/java/com/yahoo/config/application/api/DeployLogger.java b/config-model-api/src/main/java/com/yahoo/config/application/api/DeployLogger.java index d9ebd902e3e..65e6bc2803a 100644 --- a/config-model-api/src/main/java/com/yahoo/config/application/api/DeployLogger.java +++ b/config-model-api/src/main/java/com/yahoo/config/application/api/DeployLogger.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.config.application.api; +import java.util.function.Supplier; import java.util.logging.Level; /** @@ -13,6 +14,10 @@ public interface DeployLogger { /** Log a message unrelated to the application package, e.g. internal error/status. */ void log(Level level, String message); + default void log(Level level, Supplier<String> message) { log(level, message.get()); } + + default void log(Level level, Supplier<String> message, Throwable throwable) { log(level, message); } + /** * Log a message related to the application package. These messages should be actionable by the user, f.ex. to * signal usage of invalid/deprecated syntax diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java index 37b24f0ac1d..446c32801e0 100644 --- a/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/ModelContext.java @@ -117,6 +117,7 @@ public interface ModelContext { @ModelFeatureFlag(owners = {"baldersheim"}) default boolean enableNestedMultivalueGrouping() { return false; } @ModelFeatureFlag(owners = {"jonmv"}) default boolean useReconfigurableDispatcher() { return false; } @ModelFeatureFlag(owners = {"vekterli"}) default int contentLayerMetadataFeatureLevel() { return 0; } + @ModelFeatureFlag(owners = {"bjorncs"}) default boolean dynamicHeapSize() { return false; } } /** Warning: As elsewhere in this package, do not make backwards incompatible changes that will break old config models! */ diff --git a/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java b/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java new file mode 100644 index 00000000000..422ceba8074 --- /dev/null +++ b/config-model-api/src/main/java/com/yahoo/config/model/api/OnnxModelCost.java @@ -0,0 +1,29 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.config.model.api; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.application.api.ApplicationFile; +import com.yahoo.config.application.api.DeployLogger; + +/** + * @author bjorncs + */ +public interface OnnxModelCost { + + Calculator newCalculator(DeployLogger logger); + + interface Calculator { + long aggregatedModelCostInBytes(); + void registerModel(ApplicationFile path); + void registerModel(ModelReference ref); + } + + static OnnxModelCost disabled() { + return (__) -> new Calculator() { + @Override public long aggregatedModelCostInBytes() { return 0; } + @Override public void registerModel(ApplicationFile path) {} + @Override public void registerModel(ModelReference ref) {} + }; + } +} diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java index 4df7a76031a..5ab258ecce8 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java @@ -18,6 +18,7 @@ import com.yahoo.config.model.api.EndpointCertificateSecrets; import com.yahoo.config.model.api.HostProvisioner; import com.yahoo.config.model.api.Model; import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.Provisioned; import com.yahoo.config.model.api.Reindexing; import com.yahoo.config.model.api.ValidationParameters; @@ -90,6 +91,7 @@ public class DeployState implements ConfigDefinitionStore { private final Provisioned provisioned; private final Reindexing reindexing; private final ExecutorService executor; + private final OnnxModelCost onnxModelCost; public static DeployState createTestState() { return new Builder().build(); @@ -124,7 +126,8 @@ public class DeployState implements ConfigDefinitionStore { boolean accessLoggingEnabledByDefault, Optional<DockerImage> wantedDockerImageRepo, Reindexing reindexing, - Optional<ValidationOverrides> validationOverrides) { + Optional<ValidationOverrides> validationOverrides, + OnnxModelCost onnxModelCost) { this.logger = deployLogger; this.fileRegistry = fileRegistry; this.executor = executor; @@ -152,6 +155,7 @@ public class DeployState implements ConfigDefinitionStore { this.now = now; this.wantedDockerImageRepo = wantedDockerImageRepo; this.reindexing = reindexing; + this.onnxModelCost = onnxModelCost; } public static HostProvisioner getDefaultModelHostProvisioner(ApplicationPackage applicationPackage) { @@ -305,6 +309,8 @@ public class DeployState implements ConfigDefinitionStore { public Optional<Reindexing> reindexing() { return Optional.ofNullable(reindexing); } + public OnnxModelCost onnxModelCost() { return onnxModelCost; } + public boolean isHostedTenantApplication(ApplicationType type) { boolean isTesterApplication = getProperties().applicationId().instance().isTester(); return isHosted() && type == ApplicationType.DEFAULT && !isTesterApplication; @@ -333,6 +339,7 @@ public class DeployState implements ConfigDefinitionStore { private QueryProfiles queryProfiles = null; private Reindexing reindexing = null; private Optional<ValidationOverrides> validationOverrides = Optional.empty(); + private OnnxModelCost onnxModelCost = OnnxModelCost.disabled(); public Builder() {} @@ -450,6 +457,8 @@ public class DeployState implements ConfigDefinitionStore { return this; } + public Builder onnxModelCost(OnnxModelCost instance) { this.onnxModelCost = instance; return this; } + public DeployState build() { return build(new ValidationParameters()); } @@ -482,7 +491,8 @@ public class DeployState implements ConfigDefinitionStore { accessLoggingEnabledByDefault, wantedDockerImageRepo, reindexing, - validationOverrides); + validationOverrides, + onnxModelCost); } } diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 815c32e3c8f..77356292f9a 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -86,6 +86,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private boolean allowUserFilters = true; private List<DataplaneToken> dataplaneTokens; private int contentLayerMetadataFeatureLevel = 0; + private boolean dynamicHeapSize = false; @Override public ModelContext.FeatureFlags featureFlags() { return this; } @Override public boolean multitenant() { return multitenant; } @@ -144,6 +145,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public boolean enableGlobalPhase() { return true; } // Enable global-phase by default for unit tests only @Override public List<DataplaneToken> dataplaneTokens() { return dataplaneTokens; } @Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; } + @Override public boolean dynamicHeapSize() { return dynamicHeapSize; } public TestProperties sharedStringRepoNoReclaim(boolean sharedStringRepoNoReclaim) { this.sharedStringRepoNoReclaim = sharedStringRepoNoReclaim; @@ -379,6 +381,8 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea return this; } + public TestProperties setDynamicHeapSize(boolean b) { this.dynamicHeapSize = b; return this; } + public static class Spec implements ConfigServerSpec { private final String hostName; diff --git a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java index dbcd1cea2fa..342b5f243e7 100644 --- a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java +++ b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java @@ -488,6 +488,8 @@ public class MockApplicationPackage implements ApplicationPackage { throw new UnsupportedOperationException(); } + @Override public long getSize() { return file.length(); } + @Override public int compareTo(ApplicationFile other) { return this.getPath().getName().compareTo((other).getPath().getName()); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java b/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java new file mode 100644 index 00000000000..76733872882 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java @@ -0,0 +1,99 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.application.api.ApplicationFile; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.OnnxModelCost; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.logging.Level; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * Aggregates estimated footprint of configured ONNX models. + * + * @author bjorncs + */ +public class DefaultOnnxModelCost implements OnnxModelCost { + + @Override + public Calculator newCalculator(DeployLogger logger) { + return new CalculatorImpl(logger); + } + + private static class CalculatorImpl implements Calculator { + private final DeployLogger log; + + private final ConcurrentMap<String, Long> modelCost = new ConcurrentHashMap<>(); + + private CalculatorImpl(DeployLogger log) { + this.log = log; + } + + @Override + public long aggregatedModelCostInBytes() { + return modelCost.values().stream().mapToLong(Long::longValue).sum(); + } + + @Override + public void registerModel(ApplicationFile f) { + String path = f.getPath().getRelative(); + if (alreadyAnalyzed(path)) return; + log.log(Level.FINE, () -> "Register model '%s'".formatted(path)); + deductJvmHeapSizeWithModelCost(f.exists() ? f.getSize() : 0, path); + } + + @Override + public void registerModel(ModelReference ref) { + log.log(Level.FINE, () -> "Register model '%s'".formatted(ref.toString())); + if (ref.path().isPresent()) { + var path = Paths.get(ref.path().get().value()); + var source = path.getFileName().toString(); + if (alreadyAnalyzed(source)) return; + deductJvmHeapSizeWithModelCost(uncheck(() -> Files.exists(path) ? Files.size(path) : 0), source); + } else if (ref.url().isPresent()) deductJvmHeapSizeWithModelCost(URI.create(ref.url().get().value())); + else throw new IllegalStateException(ref.toString()); + } + + private void deductJvmHeapSizeWithModelCost(URI uri) { + if (alreadyAnalyzed(uri.toString())) return; + if (uri.getScheme().equals("http") || uri.getScheme().equals("https")) { + try { + var timeout = Duration.ofSeconds(3); + var httpClient = HttpClient.newBuilder().connectTimeout(timeout).build(); + var request = HttpRequest.newBuilder(uri).timeout(timeout).method("HEAD", HttpRequest.BodyPublishers.noBody()).build(); + var response = httpClient.send(request, HttpResponse.BodyHandlers.discarding()); + var contentLength = response.headers().firstValue("Content-Length").orElse("0"); + log.log(Level.FINE, () -> "Got content length '%s' for '%s'".formatted(contentLength, uri)); + deductJvmHeapSizeWithModelCost(Long.parseLong(contentLength), uri.toString()); + } catch (IllegalArgumentException | InterruptedException | IOException e) { + log.log(Level.INFO, () -> "Failed to get model size for '%s': %s".formatted(uri, e.getMessage()), e); + } + } + } + + private void deductJvmHeapSizeWithModelCost(long size, String source) { + long fallbackModelSize = 1024*1024*1024; + long estimatedCost = Math.max(300*1024*1024, (long) (1.4D * (size > 0 ? size : fallbackModelSize) + 100*1024*1024)); + log.log(Level.FINE, () -> + "Estimated %s footprint for model of size %s ('%s')".formatted(mb(estimatedCost), mb(size), source)); + modelCost.put(source, estimatedCost); + } + + private boolean alreadyAnalyzed(String source) { return modelCost.containsKey(source); } + + private static String mb(long bytes) { return "%dMB".formatted(bytes / (1024*1024)); } + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java index 28ff8dff620..727a18aee2c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java @@ -21,6 +21,7 @@ import com.yahoo.config.model.api.Model; import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.api.ModelCreateResult; import com.yahoo.config.model.api.ModelFactory; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.ValidationParameters; import com.yahoo.config.model.application.provider.ApplicationPackageXmlFilesValidator; import com.yahoo.config.model.builder.xml.ConfigModelBuilder; @@ -197,7 +198,8 @@ public class VespaModelFactory implements ModelFactory { .zone(zone) .now(clock.instant()) .wantedNodeVespaVersion(modelContext.wantedNodeVespaVersion()) - .wantedDockerImageRepo(modelContext.wantedDockerImageRepo()); + .wantedDockerImageRepo(modelContext.wantedDockerImageRepo()) + .onnxModelCost(modelContext.properties().hostedVespa() ? new DefaultOnnxModelCost() : OnnxModelCost.disabled()); modelContext.previousModel().ifPresent(builder::previousModel); modelContext.reindexing().ifPresent(builder::reindexing); return builder.build(validationParameters); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java new file mode 100644 index 00000000000..2c5e0db14b9 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.application.validation; + +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.vespa.model.VespaModel; + +import java.util.logging.Level; + +/** + * Validates that the container node flavour has enough resources to run configured ONNX models. + * + * @author bjorncs + */ +public class JvmHeapSizeValidator extends Validator { + + @Override + public void validate(VespaModel model, DeployState ds) { + if (!ds.featureFlags().dynamicHeapSize()) return; + if (!ds.isHostedTenantApplication(model.getAdmin().getApplicationType())) return; + + model.getContainerClusters().forEach((clusterId, appCluster) -> { + var mp = appCluster.getMemoryPercentage().orElse(null); + if (mp == null) return; + if (mp.availableMemoryGb().isEmpty()) { + ds.getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'"); + return; + } + long jvmModelCost = appCluster.onnxModelCost().aggregatedModelCostInBytes(); + if (jvmModelCost > 0) { + int percentLimit = 10; + if (mp.percentage() < percentLimit) { + throw new IllegalArgumentException( + ("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " + + "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " + + "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).") + .formatted(clusterId, mp.percentage(), percentLimit, jvmModelCost / (1024D * 1024 * 1024))); + } + double gbLimit = 0.4; + double availableMemoryGb = mp.availableMemoryGb().getAsDouble(); + if (availableMemoryGb < gbLimit) { + throw new IllegalArgumentException( + ("Allocated memory to JVM in cluster '%s' is too low (%.2fGB < %.2fGB). " + + "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " + + "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).") + .formatted(clusterId, availableMemoryGb, gbLimit, jvmModelCost / (1024D * 1024 * 1024))); + } + } + }); + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java index 53a553ee624..b9ecf7c2d22 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java @@ -87,6 +87,7 @@ public class Validation { new AccessControlFilterExcludeValidator().validate(model, deployState); new CloudUserFilterValidator().validate(model, deployState); new CloudHttpConnectorValidator().validate(model, deployState); + new JvmHeapSizeValidator().validate(model, deployState); additionalValidators.forEach(v -> v.validate(model, deployState)); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java index 7501f6162c7..9ecd359f90d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java @@ -7,11 +7,12 @@ import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.osgi.provider.model.ComponentModel; import com.yahoo.text.XML; -import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; -import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.component.BertEmbedder; import com.yahoo.vespa.model.container.component.ColBertEmbedder; import com.yahoo.vespa.model.container.component.Component; +import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; +import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; import com.yahoo.vespa.model.container.xml.BundleInstantiationSpecificationBuilder; import org.w3c.dom.Element; @@ -35,19 +36,20 @@ public class DomComponentBuilder extends VespaDomBuilder.DomConfigProducerBuilde @Override protected Component<? super Component<?, ?>, ?> doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) { - var component = buildComponent(spec, deployState); + var component = buildComponent(spec, deployState, ancestor); addChildren(deployState, ancestor, spec, component); return component; } - private Component<? super Component<?, ?>, ?> buildComponent(Element spec, DeployState state) { + private Component<? super Component<?, ?>, ?> buildComponent( + Element spec, DeployState state, TreeConfigProducer<AnyConfigProducer> ancestor) { if (spec.hasAttribute("type")) { var type = spec.getAttribute("type"); return switch (type) { - case "hugging-face-embedder" -> new HuggingFaceEmbedder(spec, state); + case "hugging-face-embedder" -> new HuggingFaceEmbedder((ApplicationContainerCluster)ancestor, spec, state); case "hugging-face-tokenizer" -> new HuggingFaceTokenizer(spec, state); - case "bert-embedder" -> new BertEmbedder(spec, state); - case "colbert-embedder" -> new ColBertEmbedder(spec, state); + case "colbert-embedder" -> new ColBertEmbedder((ApplicationContainerCluster)ancestor, spec, state); + case "bert-embedder" -> new BertEmbedder((ApplicationContainerCluster)ancestor, spec, state); default -> throw new IllegalArgumentException("Unknown component type '%s'".formatted(type)); }; } else { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java index b9021912244..d6403c2e8e3 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java @@ -8,10 +8,12 @@ import com.yahoo.component.ComponentId; import com.yahoo.component.ComponentSpecification; import com.yahoo.config.FileReference; import com.yahoo.config.application.api.ComponentInfo; +import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ApplicationClusterInfo; import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.Model; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.config.provision.AllocatedHosts; @@ -47,6 +49,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.logging.Level; import java.util.stream.Collectors; import static com.yahoo.vespa.model.container.docproc.DocprocChains.DOCUMENT_TYPE_MANAGER_CLASS; @@ -82,6 +85,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private final Set<FileReference> applicationBundles = new LinkedHashSet<>(); private final Set<String> previousHosts; + private final OnnxModelCost.Calculator onnxModelCost; + private final DeployLogger logger; private ContainerModelEvaluation modelEvaluation; @@ -92,6 +97,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private int zookeeperSessionTimeoutSeconds = 30; private final int transport_events_before_wakeup; private final int transport_connections_per_target; + private final boolean dynamicHeapSize; /** The heap size % of total memory available to the JVM process. */ private final int heapSizePercentageOfAvailableMemory; @@ -103,6 +109,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat public ApplicationContainerCluster(TreeConfigProducer<?> parent, String configSubId, String clusterId, DeployState deployState) { super(parent, configSubId, clusterId, deployState, true, 10); this.tlsClientAuthority = deployState.tlsClientAuthority(); + dynamicHeapSize = deployState.featureFlags().dynamicHeapSize(); previousHosts = Collections.unmodifiableSet(deployState.getPreviousModel().stream() .map(Model::allocatedHosts) .map(AllocatedHosts::getHosts) @@ -125,6 +132,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0 ? Math.min(99, deployState.featureFlags().heapSizePercentage()) : defaultHeapSizePercentageOfAvailableMemory; + onnxModelCost = deployState.onnxModelCost().newCalculator(deployState.getDeployLogger()); + logger = deployState.getDeployLogger(); } @Override @@ -182,19 +191,25 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat public void setMemoryPercentage(Integer memoryPercentage) { this.memoryPercentage = memoryPercentage; } @Override - public Optional<Integer> getMemoryPercentage() { - if (memoryPercentage != null) return Optional.of(memoryPercentage); + public Optional<JvmMemoryPercentage> getMemoryPercentage() { + if (memoryPercentage != null) return Optional.of(JvmMemoryPercentage.of(memoryPercentage)); if (isHostedVespa()) { int availableMemoryPercentage = getHostClusterId().isPresent() ? heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster : heapSizePercentageOfAvailableMemory; - if (getContainers().isEmpty()) return Optional.of(availableMemoryPercentage); // Node memory is not known + if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known // Node memory is known so convert available memory percentage to node memory percentage - double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb(); - double availableMemory = totalMemory - Host.memoryOverheadGb; - return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage)); + double totalMemory = dynamicHeapSize + ? getContainers().stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow() + : getContainers().get(0).getHostResource().realResources().memoryGb(); + double jvmHeapDeductionGb = dynamicHeapSize ? onnxModelCost.aggregatedModelCostInBytes() / (1024D * 1024 * 1024) : 0; + double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb); + int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage); + logger.log(Level.FINE, () -> "memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f" + .formatted(memoryPercentage, availableMemory, totalMemory, availableMemoryPercentage, jvmHeapDeductionGb)); + return Optional.of(JvmMemoryPercentage.of(memoryPercentage, availableMemory)); } return Optional.empty(); } @@ -299,12 +314,15 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public void getConfig(QrStartConfig.Builder builder) { super.getConfig(builder); + var memoryPct = getMemoryPercentage().orElse(null); + int heapsize = memoryPct != null && memoryPct.availableMemoryGb().isPresent() + ? (int) (memoryPct.availableMemoryGb().getAsDouble() * 1024) : 1536; builder.jvm.verbosegc(true) .availableProcessors(0) .compressedClassSpaceSize(0) - .minHeapsize(1536) - .heapsize(1536); - getMemoryPercentage().ifPresent(percentage -> builder.jvm.heapSizeAsPercentageOfPhysicalMemory(percentage)); + .minHeapsize(heapsize) + .heapsize(heapsize); + if (memoryPct != null) builder.jvm.heapSizeAsPercentageOfPhysicalMemory(memoryPct.percentage()); } @Override @@ -373,6 +391,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public String name() { return getName(); } + public OnnxModelCost.Calculator onnxModelCost() { return onnxModelCost; } + public static class MbusParams { // the amount of the maxpendingbytes to process concurrently, typically 0.2 (20%) final Double maxConcurrentFactor; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java index 6bbc24e8739..fa13e7ec9d6 100755 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java @@ -62,6 +62,7 @@ import com.yahoo.vespa.model.container.search.ContainerSearch; import com.yahoo.vespa.model.container.search.searchchain.SearchChains; import com.yahoo.vespa.model.content.Content; import com.yahoo.vespa.model.search.SearchCluster; + import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; @@ -71,6 +72,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.OptionalDouble; import java.util.Set; import java.util.TreeSet; @@ -718,5 +720,11 @@ public abstract class ContainerCluster<CONTAINER extends Container> * Returns the percentage of host physical memory this application has specified for nodes in this cluster, * or empty if this is not specified by the application. */ - public Optional<Integer> getMemoryPercentage() { return Optional.empty(); } + public record JvmMemoryPercentage(int percentage, OptionalDouble availableMemoryGb) { + static JvmMemoryPercentage of(int percentage) { return new JvmMemoryPercentage(percentage, OptionalDouble.empty()); } + static JvmMemoryPercentage of(int percentage, double availableMemoryGb) { + return new JvmMemoryPercentage(percentage, OptionalDouble.of(availableMemoryGb)); + } + } + public Optional<JvmMemoryPercentage> getMemoryPercentage() { return Optional.empty(); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java index 906ef739ef1..1b47f59653e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java @@ -45,10 +45,6 @@ public class ContainerModelEvaluation implements private final RankProfileList rankProfileList; private final FileDistributedOnnxModels onnxModels; // For cluster specific ONNX model settings - public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList) { - this(cluster, rankProfileList, null); - } - public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList, FileDistributedOnnxModels onnxModels) { this.rankProfileList = Objects.requireNonNull(rankProfileList, "rankProfileList cannot be null"); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java index 205848e1b67..76bb1a9e02a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java @@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.BertBaseEmbedderConfig; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; @@ -33,7 +34,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf private final Integer onnxGpuDevice; - public BertEmbedder(Element xml, DeployState state) { + public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml); model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state); vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state); @@ -49,6 +50,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); + cluster.onnxModelCost().registerModel(model); } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java index c0fdfe3dc64..63096ebcbe2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java @@ -5,7 +5,7 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.ColBertEmbedderConfig; -import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; @@ -40,7 +40,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo private final Integer onnxIntraopThreads; private final Integer onnxGpuDevice; - public ColBertEmbedder(Element xml, DeployState state) { + public ColBertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.ColBertEmbedder", INTEGRATION_BUNDLE_NAME, xml); var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); @@ -60,7 +60,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); - + cluster.onnxModelCost().registerModel(model); } private static ModelReference resolveDefaultVocab(Element model, DeployState state) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java index f4017339699..41b80bf1cb2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java @@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; @@ -33,7 +34,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm private final Integer onnxGpuDevice; private final String poolingStrategy; - public HuggingFaceEmbedder(Element xml, DeployState state) { + public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml); var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); @@ -51,6 +52,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null); + cluster.onnxModelCost().registerModel(model); } private static ModelReference resolveDefaultVocab(Element model, DeployState state) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java index f0296d49472..3261d454b4f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.search; +import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.QrSearchersConfig; import com.yahoo.prelude.semantics.SemanticRulesConfig; @@ -56,12 +57,14 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> private QueryProfiles queryProfiles; private SemanticRules semanticRules; private PageTemplates pageTemplates; + private ApplicationPackage app; public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) { super(chains); this.globalPhase = deployState.featureFlags().enableGlobalPhase(); this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher(); this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState); + this.app = deployState.getApplicationPackage(); this.owningCluster = cluster; owningCluster.addComponent(Component.fromClassAndBundle(CompiledQueryProfileRegistry.class, SEARCH_AND_DOCPROC_BUNDLE)); @@ -96,6 +99,9 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue; var factory = new RankProfilesEvaluatorComponent(documentDb); if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) { + var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels(); + onnxModels.asMap().forEach( + (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath()))); owningCluster.addComponent(factory); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 35b0213bf59..d9c4dea478c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -778,6 +778,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { !container.getHostResource().realResources().gpuResources().isZero()); onnxModel.setGpuDevice(gpuDevice, hasGpu); } + cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath())); } cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models)); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java index bb72eda7d04..d18309ef0af 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java @@ -256,7 +256,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem for (ContainerModel containerModel : containers) { Optional<String> hostClusterId = containerModel.getCluster().getHostClusterId(); if (hostClusterId.isPresent() && hostClusterId.get().equals(clusterId) && containerModel.getCluster().getMemoryPercentage().isPresent()) { - return containerModel.getCluster().getMemoryPercentage().get() * 0.01; + return containerModel.getCluster().getMemoryPercentage().get().percentage() * 0.01; } } return 0.0; diff --git a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java index 2f8a8bddf20..38f51323ee2 100644 --- a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java +++ b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java @@ -148,7 +148,7 @@ public class ModelProvisioningTest { assertEquals("-Xlog:gc", mydisc2.getContainers().get(1).getJvmOptions()); assertEquals("lib/blablamalloc.so", mydisc2.getContainers().get(0).getPreLoad()); assertEquals("lib/blablamalloc.so", mydisc2.getContainers().get(1).getPreLoad()); - assertEquals(Optional.of(45), mydisc2.getMemoryPercentage()); + assertEquals(45, mydisc2.getMemoryPercentage().get().percentage()); assertEquals(Optional.of("-XX:+UseParNewGC"), mydisc2.getJvmGCOptions()); QrStartConfig.Builder qrStartBuilder = new QrStartConfig.Builder(); mydisc2.getConfig(qrStartBuilder); @@ -288,10 +288,11 @@ public class ModelProvisioningTest { assertEquals(2025077080L, protonMemorySize(model.getContentClusters().get("content1")), "Memory for proton is lowered to account for the jvm heap"); assertProvisioned(0, ClusterSpec.Id.from("container1"), ClusterSpec.Type.container, model); assertProvisioned(2, ClusterSpec.Id.from("content1"), ClusterSpec.Id.from("container1"), ClusterSpec.Type.combined, model); - assertEquals(1, logger.msgs().size()); + var msgs = logger.msgs().stream().filter(m -> m.level().equals(Level.WARNING)).toList(); + assertEquals(1, msgs.size()); assertEquals("Declaring combined cluster with <nodes of=\"...\"> is deprecated without replacement, " + "and the feature will be removed in Vespa 9. Use separate container and content clusters instead", - logger.msgs().get(0).message); + msgs.get(0).message); } @Test diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java new file mode 100644 index 00000000000..086f2fe778f --- /dev/null +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidatorTest.java @@ -0,0 +1,126 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.application.validation; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.application.api.ApplicationFile; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.NullConfigModelRegistry; +import com.yahoo.config.model.api.OnnxModelCost; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.config.model.provision.InMemoryProvisioner; +import com.yahoo.config.model.test.MockApplicationPackage; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.model.VespaModel; +import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicLong; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * @author bjorncs + */ +class JvmHeapSizeValidatorTest { + + @Test + void fails_on_too_low_jvm_percentage() throws IOException, SAXException { + var deployState = createDeployState(8, 7L * 1024 * 1024 * 1024); + var model = new VespaModel(new NullConfigModelRegistry(), deployState); + var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState)); + String expectedMessage = "Allocated percentage of memory of JVM in cluster 'container' is too low (3% < 10%). Estimated cost of ONNX models is 7.00GB"; + assertTrue(e.getMessage().contains(expectedMessage), e.getMessage()); + } + + @Test + void fails_on_too_low_heap_size() throws IOException, SAXException { + var deployState = createDeployState(2, 1024L * 1024 * 1024); + var model = new VespaModel(new NullConfigModelRegistry(), deployState); + var e = assertThrows(IllegalArgumentException.class, () -> new JvmHeapSizeValidator().validate(model, deployState)); + String expectedMessage = "Allocated memory to JVM in cluster 'container' is too low (0.30GB < 0.40GB). Estimated cost of ONNX models is 1.00GB."; + assertTrue(e.getMessage().contains(expectedMessage), e.getMessage()); + } + + @Test + void accepts_adequate_heap_size() throws IOException, SAXException { + var deployState = createDeployState(8, 1024L * 1024 * 1024); + var model = new VespaModel(new NullConfigModelRegistry(), deployState); + assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState)); + } + + @Test + void accepts_services_with_explicit_jvm_size() throws IOException, SAXException { + String servicesXml = + """ + <?xml version="1.0" encoding="utf-8" ?> + <services version='1.0'> + <container version='1.0'> + <nodes count="2"> + <jvm allocated-memory='5%'/> + <resources vcpu="4" memory="2Gb" disk="125Gb"/> + </nodes> + <component id="hf-embedder" type="hugging-face-embedder"> + <transformer-model url="https://my/url/model.onnx"/> + <tokenizer-model path="app/tokenizer.json"/> + </component> + </container> + </services>"""; + var deployState = createDeployState(servicesXml, 2, 1024L * 1024 * 1024); + var model = new VespaModel(new NullConfigModelRegistry(), deployState); + assertDoesNotThrow(() -> new JvmHeapSizeValidator().validate(model, deployState)); + } + + private static DeployState createDeployState(String servicesXml, double nodeGb, long modelCostBytes) { + return new DeployState.Builder() + .applicationPackage( + new MockApplicationPackage.Builder() + .withServices(servicesXml) + .build()) + .modelHostProvisioner(new InMemoryProvisioner(5, new NodeResources(4, nodeGb, 125, 0.3), true)) + .properties(new TestProperties().setHostedVespa(true).setDynamicHeapSize(true)) + .onnxModelCost(new ModelCostDummy(modelCostBytes)) + .build(); + } + + private static DeployState createDeployState(double nodeGb, long modelCostBytes) { + String servicesXml = + """ + <?xml version="1.0" encoding="utf-8" ?> + <services version='1.0'> + <container version='1.0'> + <nodes count="2"> + <resources vcpu="4" memory="%fGb" disk="125Gb"/> + </nodes> + <component id="hf-embedder" type="hugging-face-embedder"> + <transformer-model url="https://my/url/model.onnx"/> + <tokenizer-model path="app/tokenizer.json"/> + </component> + </container> + </services>""".formatted(nodeGb); + return createDeployState(servicesXml, nodeGb, modelCostBytes); + } + + private static class ModelCostDummy implements OnnxModelCost, OnnxModelCost.Calculator { + final AtomicLong totalCost = new AtomicLong(); + final long modelCost; + + ModelCostDummy(long modelCost) { this.modelCost = modelCost; } + + @Override public Calculator newCalculator(DeployLogger logger) { return this; } + @Override public long aggregatedModelCostInBytes() { return totalCost.get(); } + @Override public void registerModel(ApplicationFile path) {} + + @Override + public void registerModel(ModelReference ref) { + assertEquals("https://my/url/model.onnx", ref.url().orElseThrow().value().toString()); + totalCost.addAndGet(modelCost); + } + } + +}
\ No newline at end of file diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/DeployHandlerLogger.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/DeployHandlerLogger.java index 154d2d0f2f0..042aa2423f3 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/DeployHandlerLogger.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/DeployHandlerLogger.java @@ -11,6 +11,7 @@ import com.yahoo.slime.Slime; import com.yahoo.vespa.config.server.session.PrepareParams; import com.yahoo.vespa.config.server.tenant.TenantRepository; +import java.util.function.Supplier; import java.util.logging.Level; import java.util.logging.Logger; @@ -35,15 +36,17 @@ public class DeployHandlerLogger implements DeployLogger { this.logroot = slime.setObject().setArray("log"); } + @Override public void log(Level level, String message) { log(level, () -> message); } + @Override public void log(Level level, Supplier<String> message) { log(level, message, null); } + @Override @SuppressWarnings("deprecation") - public void log(Level level, String message) { - if (level.intValue() <= LogLevel.DEBUG.intValue() && !verbose) - return; + public void log(Level level, Supplier<String> supplier, Throwable throwable) { + // Also tee to a normal log, Vespa log for example, but use level fine + log.log(Level.FINE, throwable, () -> prefix + supplier.get()); - logJson(level, message); - // Also tee to a normal log, Vespa log for example, but use level fine - log.log(Level.FINE, () -> prefix + message); + if (level.intValue() <= LogLevel.DEBUG.intValue() && !verbose) return; + logJson(level, supplier.get()); } @Override diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java index 142f98e13e3..3e33b345437 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/deploy/ModelContextImpl.java @@ -201,6 +201,7 @@ public class ModelContextImpl implements ModelContext { private final boolean enableNestedMultivalueGrouping; private final boolean useReconfigurableDispatcher; private final int contentLayerMetadataFeatureLevel; + private final boolean dynamicHeapSize; public FeatureFlags(FlagSource source, ApplicationId appId, Version version) { this.defaultTermwiseLimit = flagValue(source, appId, version, Flags.DEFAULT_TERM_WISE_LIMIT); @@ -243,6 +244,7 @@ public class ModelContextImpl implements ModelContext { this.enableNestedMultivalueGrouping = flagValue(source, appId, version, Flags.ENABLE_NESTED_MULTIVALUE_GROUPING); this.useReconfigurableDispatcher = flagValue(source, appId, version, Flags.USE_RECONFIGURABLE_DISPATCHER); this.contentLayerMetadataFeatureLevel = flagValue(source, appId, version, Flags.CONTENT_LAYER_METADATA_FEATURE_LEVEL); + this.dynamicHeapSize = flagValue(source, appId, version, Flags.DYNAMIC_HEAP_SIZE); } @Override public int heapSizePercentage() { return heapPercentage; } @@ -293,6 +295,7 @@ public class ModelContextImpl implements ModelContext { @Override public boolean enableNestedMultivalueGrouping() { return enableNestedMultivalueGrouping; } @Override public boolean useReconfigurableDispatcher() { return useReconfigurableDispatcher; } @Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; } + @Override public boolean dynamicHeapSize() { return dynamicHeapSize; } private static <V> V flagValue(FlagSource source, ApplicationId appId, Version vespaVersion, UnboundFlag<? extends V, ?, ?> flag) { return flag.bindTo(source) diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplication.java b/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplication.java index 4c262379c35..1288b63cadd 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplication.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplication.java @@ -111,6 +111,12 @@ public class ZKApplication { return getBytesInternal(getFullPath(path)); } + public long getSize(Path path) { + return curator.getStat(path).map(stat -> (long)stat.getDataLength()) + .orElseThrow(() -> new IllegalArgumentException( + "Could not get size from '" + path + "' in zookeeper")); + } + void putData(Path path, String data) { byte[] bytes = Utf8.toBytes(data); ensureDataIsNotTooLarge(bytes, path); diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplicationFile.java b/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplicationFile.java index 6bc29331efb..e51f8627de2 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplicationFile.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/zookeeper/ZKApplicationFile.java @@ -3,8 +3,9 @@ package com.yahoo.vespa.config.server.zookeeper; import com.fasterxml.jackson.databind.ObjectMapper; import com.yahoo.config.application.api.ApplicationFile; -import com.yahoo.path.Path; import com.yahoo.io.IOUtils; +import com.yahoo.path.Path; +import com.yahoo.vespa.config.util.ConfigUtils; import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; @@ -13,11 +14,9 @@ import java.io.InputStream; import java.io.Reader; import java.io.StringReader; import java.io.StringWriter; -import java.util.logging.Level; -import com.yahoo.vespa.config.util.ConfigUtils; - import java.util.ArrayList; import java.util.List; +import java.util.logging.Level; import java.util.logging.Logger; import static com.yahoo.vespa.config.server.zookeeper.ZKApplication.USERAPP_ZK_SUBPATH; @@ -184,6 +183,8 @@ class ZKApplicationFile extends ApplicationFile { } } + @Override public long getSize() { return zkApp.getSize(getZKPath(path)); } + @Override public int compareTo(ApplicationFile other) { if (other == this) return 0; diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 2e158f0f3ef..e5b76bedecd 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -406,6 +406,13 @@ public class Flags { "Takes effect at redeployment", INSTANCE_ID); + public static final UnboundBooleanFlag DYNAMIC_HEAP_SIZE = defineFeatureFlag( + "dynamic-heap-size", false, + List.of("bjorncs"), "2023-09-21", "2024-01-15", + "Whether to calculate JVM heap size based on predicted Onnx model memory requirements", + "Takes effect at redeployment", + INSTANCE_ID); + /** WARNING: public for testing: All flags should be defined in {@link Flags}. */ public static UnboundBooleanFlag defineFeatureFlag(String flagId, boolean defaultValue, List<String> owners, String createdAt, String expiresAt, String description, |