diff options
Diffstat (limited to 'config-model/src/main/java/com/yahoo')
25 files changed, 423 insertions, 114 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java index 4df7a76031a..5ab258ecce8 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java @@ -18,6 +18,7 @@ import com.yahoo.config.model.api.EndpointCertificateSecrets; import com.yahoo.config.model.api.HostProvisioner; import com.yahoo.config.model.api.Model; import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.Provisioned; import com.yahoo.config.model.api.Reindexing; import com.yahoo.config.model.api.ValidationParameters; @@ -90,6 +91,7 @@ public class DeployState implements ConfigDefinitionStore { private final Provisioned provisioned; private final Reindexing reindexing; private final ExecutorService executor; + private final OnnxModelCost onnxModelCost; public static DeployState createTestState() { return new Builder().build(); @@ -124,7 +126,8 @@ public class DeployState implements ConfigDefinitionStore { boolean accessLoggingEnabledByDefault, Optional<DockerImage> wantedDockerImageRepo, Reindexing reindexing, - Optional<ValidationOverrides> validationOverrides) { + Optional<ValidationOverrides> validationOverrides, + OnnxModelCost onnxModelCost) { this.logger = deployLogger; this.fileRegistry = fileRegistry; this.executor = executor; @@ -152,6 +155,7 @@ public class DeployState implements ConfigDefinitionStore { this.now = now; this.wantedDockerImageRepo = wantedDockerImageRepo; this.reindexing = reindexing; + this.onnxModelCost = onnxModelCost; } public static HostProvisioner getDefaultModelHostProvisioner(ApplicationPackage applicationPackage) { @@ -305,6 +309,8 @@ public class DeployState implements ConfigDefinitionStore { public Optional<Reindexing> reindexing() { return Optional.ofNullable(reindexing); } + public OnnxModelCost onnxModelCost() { return onnxModelCost; } + public boolean isHostedTenantApplication(ApplicationType type) { boolean isTesterApplication = getProperties().applicationId().instance().isTester(); return isHosted() && type == ApplicationType.DEFAULT && !isTesterApplication; @@ -333,6 +339,7 @@ public class DeployState implements ConfigDefinitionStore { private QueryProfiles queryProfiles = null; private Reindexing reindexing = null; private Optional<ValidationOverrides> validationOverrides = Optional.empty(); + private OnnxModelCost onnxModelCost = OnnxModelCost.disabled(); public Builder() {} @@ -450,6 +457,8 @@ public class DeployState implements ConfigDefinitionStore { return this; } + public Builder onnxModelCost(OnnxModelCost instance) { this.onnxModelCost = instance; return this; } + public DeployState build() { return build(new ValidationParameters()); } @@ -482,7 +491,8 @@ public class DeployState implements ConfigDefinitionStore { accessLoggingEnabledByDefault, wantedDockerImageRepo, reindexing, - validationOverrides); + validationOverrides, + onnxModelCost); } } diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 815c32e3c8f..77356292f9a 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -86,6 +86,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private boolean allowUserFilters = true; private List<DataplaneToken> dataplaneTokens; private int contentLayerMetadataFeatureLevel = 0; + private boolean dynamicHeapSize = false; @Override public ModelContext.FeatureFlags featureFlags() { return this; } @Override public boolean multitenant() { return multitenant; } @@ -144,6 +145,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public boolean enableGlobalPhase() { return true; } // Enable global-phase by default for unit tests only @Override public List<DataplaneToken> dataplaneTokens() { return dataplaneTokens; } @Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; } + @Override public boolean dynamicHeapSize() { return dynamicHeapSize; } public TestProperties sharedStringRepoNoReclaim(boolean sharedStringRepoNoReclaim) { this.sharedStringRepoNoReclaim = sharedStringRepoNoReclaim; @@ -379,6 +381,8 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea return this; } + public TestProperties setDynamicHeapSize(boolean b) { this.dynamicHeapSize = b; return this; } + public static class Spec implements ConfigServerSpec { private final String hostName; diff --git a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java index dbcd1cea2fa..342b5f243e7 100644 --- a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java +++ b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java @@ -488,6 +488,8 @@ public class MockApplicationPackage implements ApplicationPackage { throw new UnsupportedOperationException(); } + @Override public long getSize() { return file.length(); } + @Override public int compareTo(ApplicationFile other) { return this.getPath().getName().compareTo((other).getPath().getName()); diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java index 7d88985b2d5..f6a022e9930 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java @@ -82,7 +82,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { } // Commands for summary fields - // TODO: Move to fieldinfo and implement differently. This is not right + // TODO: Move to schemainfo and implement differently for (SummaryField summaryField : schema.getUniqueNamedSummaryFields().values()) { if (summaryField.getTransform().isTeaser()) { addIndexCommand(summaryField.getName(), CMD_DYNTEASER); @@ -90,6 +90,13 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { if (summaryField.getTransform().isBolded()) { addIndexCommand(summaryField.getName(), CMD_HIGHLIGHT); } + + var sourceField = schema.getField(summaryField.getSourceField()); // Take the first as they should all be consistent + if (sourceField != null && sourceField.getMatching().getType().equals(MatchType.GRAM)) { + addIndexCommand(summaryField.getName(), + "ngram " + (sourceField.getMatching().getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE))); + + } } } @@ -452,7 +459,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { iiB.command( new IndexInfoConfig.Indexinfo.Command.Builder() .indexname(fieldSet.getName()) - .command("ngram "+(fieldSetMatching.getGramSize()>0 ? fieldSetMatching.getGramSize() : NGramMatch.DEFAULT_GRAM_SIZE))); + .command("ngram " + fieldSetMatching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE))); } else if (fieldSetMatching.getType().equals(MatchType.TEXT)) { } diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java index 1fe947d672b..264fd0ff3b9 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java +++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java @@ -1,7 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.schema.document; +import com.yahoo.schema.processing.NGramMatch; + import java.io.Serializable; +import java.util.OptionalInt; /** * Defines how a field should be matched. @@ -23,8 +26,8 @@ public class Matching implements Cloneable, Serializable { private boolean algorithmUserSet = false; - /** The gram size is the n in n-gram, or -1 if not set. Should only be set with gram matching. */ - private int gramSize = -1; + /** The gram size is the n in n-gram, or empty if not set. Should only be set with gram matching. */ + private OptionalInt gramSize = OptionalInt.empty(); /** Maximum number of characters to consider when searching in this field. Used for limiting resources, especially in streaming search. */ private Integer maxLength; @@ -67,10 +70,10 @@ public class Matching implements Cloneable, Serializable { public boolean isSuffix() { return algorithm == MatchAlgorithm.SUFFIX; } - /** Returns the gram size, or -1 if not set. Should only be set with gram matching. */ - public int getGramSize() { return gramSize; } + /** Returns the gram size, or empty if not set. Should only be set with gram matching. */ + public OptionalInt getGramSize() { return gramSize; } - public void setGramSize(int gramSize) { this.gramSize=gramSize; } + public void setGramSize(int gramSize) { this.gramSize = OptionalInt.of(gramSize); } /** * Merge data from another matching object @@ -107,10 +110,11 @@ public class Matching implements Cloneable, Serializable { @Override public String toString() { - return type + " matching [" + (type==MatchType.GRAM ? "gram size " + gramSize : "supports " + algorithm) + - "], [exact-terminator "+exactMatchTerminator+"]"; + return type + " matching [" + (type == MatchType.GRAM ? "gram size " + gramSize.orElse(NGramMatch.DEFAULT_GRAM_SIZE) : "supports " + algorithm) + + "], [exact-terminator " + exactMatchTerminator + "]"; } + @Override public Matching clone() { try { return (Matching)super.clone(); diff --git a/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java index f1ff910be43..6ec5428156f 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java @@ -31,7 +31,7 @@ public class NGramMatch extends Processor { for (SDField field : schema.allConcreteFields()) { if (field.getMatching().getType().equals(MatchType.GRAM)) implementGramMatch(schema, field, validate); - else if (validate && field.getMatching().getGramSize() >= 0) + else if (validate && field.getMatching().getGramSize().isPresent()) throw new IllegalArgumentException("gram-size can only be set when the matching mode is 'gram'"); } } @@ -40,9 +40,7 @@ public class NGramMatch extends Processor { if (validate && field.doesAttributing() && ! field.doesIndexing()) throw new IllegalArgumentException("gram matching is not supported with attributes, use 'index' in indexing"); - int n = field.getMatching().getGramSize(); - if (n < 0) - n = DEFAULT_GRAM_SIZE; // not set - use default gram size + int n = field.getMatching().getGramSize().orElse(DEFAULT_GRAM_SIZE); if (validate && n == 0) throw new IllegalArgumentException("Illegal gram size in " + field + ": Must be at least 1"); field.getNormalizing().inferCodepoint(); diff --git a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java index 7439e65dee6..49cd36e4bc2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java +++ b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java @@ -17,9 +17,7 @@ import static com.yahoo.text.Lowercase.toLowerCase; */ public class SummaryField extends Field implements Cloneable, TypedKey { - /** - * A source (field name). - */ + /** A source (field name). */ public static class Source implements Serializable { private final String name; @@ -38,12 +36,8 @@ public class SummaryField extends Field implements Cloneable, TypedKey { @Override public boolean equals(Object obj) { - if (!(obj instanceof Source)) { - return false; - } - Source other = (Source)obj; - return name.equals(other.name) && - override == other.override; + if (!(obj instanceof Source other)) return false; + return name.equals(other.name) && override == other.override; } @Override @@ -67,14 +61,14 @@ public class SummaryField extends Field implements Cloneable, TypedKey { */ private Set<Source> sources = new java.util.LinkedHashSet<>(); - private Set<String> destinations=new java.util.LinkedHashSet<>(); + private Set<String> destinations =new java.util.LinkedHashSet<>(); /** True if this field was defined implicitly */ - private boolean implicit=false; + private boolean implicit = false; /** Creates a summary field with NONE as transform */ public SummaryField(String name, DataType type) { - this(name,type, SummaryTransform.NONE); + this(name, type, SummaryTransform.NONE); } /** Creates a summary field with NONE as transform */ @@ -97,7 +91,7 @@ public class SummaryField extends Field implements Cloneable, TypedKey { public boolean isImplicit() { return implicit; } public void setTransform(SummaryTransform transform) { - this.transform=transform; + this.transform = transform; if (SummaryTransform.DYNAMICTEASER.equals(transform) || SummaryTransform.BOLDED.equals(transform)) { // This is the kind of logic we want to have in processing, // but can't because of deriveDocuments mode, which doesn't run @@ -110,9 +104,9 @@ public class SummaryField extends Field implements Cloneable, TypedKey { /** Returns the first source field of this, or null if the source field is not present */ public String getSourceField() { - String sourceName=getName(); - if (sources.size()>0) - sourceName=sources.iterator().next().getName(); + String sourceName = getName(); + if ( ! sources.isEmpty()) + sourceName = sources.iterator().next().getName(); return sourceName; } @@ -137,7 +131,7 @@ public class SummaryField extends Field implements Cloneable, TypedKey { /** Returns the first source name of this, or the field name if no source has been set */ public String getSingleSource() { - if (sources.size()==0) return getName(); + if (sources.isEmpty()) return getName(); return sources.iterator().next().getName(); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java b/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java new file mode 100644 index 00000000000..fddf8409376 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/DefaultOnnxModelCost.java @@ -0,0 +1,120 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.application.api.ApplicationFile; +import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.model.api.OnnxModelCost; +import com.yahoo.vespa.model.ml.OnnxModelProbe; + +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.logging.Level; + +import static com.yahoo.yolean.Exceptions.uncheck; + +/** + * Aggregates estimated footprint of configured ONNX models. + * + * @author bjorncs + */ +public class DefaultOnnxModelCost implements OnnxModelCost { + + @Override + public Calculator newCalculator(ApplicationPackage appPkg, DeployLogger logger) { + return new CalculatorImpl(appPkg, logger); + } + + private static class CalculatorImpl implements Calculator { + private final DeployLogger log; + private final ApplicationPackage appPkg; + + private final ConcurrentMap<String, Long> modelCost = new ConcurrentHashMap<>(); + + private CalculatorImpl(ApplicationPackage appPkg, DeployLogger log) { + this.appPkg = appPkg; + this.log = log; + } + + @Override + public long aggregatedModelCostInBytes() { + return modelCost.values().stream().mapToLong(Long::longValue).sum(); + } + + @Override + public void registerModel(ApplicationFile f) { + String path = f.getPath().getRelative(); + if (alreadyAnalyzed(path)) return; + log.log(Level.FINE, () -> "Register model '%s'".formatted(path)); + if (f.exists() && appPkg != null) { + var memoryStats = OnnxModelProbe.probeMemoryStats(appPkg, f.getPath()).orElse(null); + if (memoryStats != null) { + log.log(Level.FINE, () -> "Register model '%s' with memory stats: %s".formatted(path, memoryStats)); + deductJvmHeapSizeWithModelCost(f.getSize(), memoryStats, path); + } else { + deductJvmHeapSizeWithModelCost(f.getSize(), path); + } + } else { + deductJvmHeapSizeWithModelCost(0, path); + } + } + + @Override + public void registerModel(ModelReference ref) { + log.log(Level.FINE, () -> "Register model '%s'".formatted(ref.toString())); + if (ref.path().isPresent()) { + var path = Paths.get(ref.path().get().value()); + var source = path.getFileName().toString(); + if (alreadyAnalyzed(source)) return; + deductJvmHeapSizeWithModelCost(uncheck(() -> Files.exists(path) ? Files.size(path) : 0), source); + } else if (ref.url().isPresent()) deductJvmHeapSizeWithModelCost(URI.create(ref.url().get().value())); + else throw new IllegalStateException(ref.toString()); + } + + private void deductJvmHeapSizeWithModelCost(URI uri) { + if (alreadyAnalyzed(uri.toString())) return; + if (uri.getScheme().equals("http") || uri.getScheme().equals("https")) { + try { + var timeout = Duration.ofSeconds(3); + var httpClient = HttpClient.newBuilder().connectTimeout(timeout).build(); + var request = HttpRequest.newBuilder(uri).timeout(timeout).method("HEAD", HttpRequest.BodyPublishers.noBody()).build(); + var response = httpClient.send(request, HttpResponse.BodyHandlers.discarding()); + var contentLength = response.headers().firstValue("Content-Length").orElse("0"); + log.log(Level.FINE, () -> "Got content length '%s' for '%s'".formatted(contentLength, uri)); + deductJvmHeapSizeWithModelCost(Long.parseLong(contentLength), uri.toString()); + } catch (IllegalArgumentException | InterruptedException | IOException e) { + log.log(Level.INFO, () -> "Failed to get model size for '%s': %s".formatted(uri, e.getMessage()), e); + } + } + } + + private void deductJvmHeapSizeWithModelCost(long size, String source) { + long fallbackModelSize = 1024*1024*1024; + long estimatedCost = Math.max(300*1024*1024, (long) (1.4D * (size > 0 ? size : fallbackModelSize) + 100*1024*1024)); + log.log(Level.FINE, () -> + "Estimated %s footprint for model of size %s ('%s')".formatted(mb(estimatedCost), mb(size), source)); + modelCost.put(source, estimatedCost); + } + + private void deductJvmHeapSizeWithModelCost(long size, OnnxModelProbe.MemoryStats stats, String source) { + long estimatedCost = (long)(1.1D * stats.vmSize()); + log.log(Level.FINE, () -> + "Estimated %s footprint for model of size %s ('%s')".formatted(mb(estimatedCost), mb(size), source)); + modelCost.put(source, estimatedCost); + } + + private boolean alreadyAnalyzed(String source) { return modelCost.containsKey(source); } + + private static String mb(long bytes) { return "%dMB".formatted(bytes / (1024*1024)); } + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java index 28ff8dff620..727a18aee2c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java @@ -21,6 +21,7 @@ import com.yahoo.config.model.api.Model; import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.api.ModelCreateResult; import com.yahoo.config.model.api.ModelFactory; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.ValidationParameters; import com.yahoo.config.model.application.provider.ApplicationPackageXmlFilesValidator; import com.yahoo.config.model.builder.xml.ConfigModelBuilder; @@ -197,7 +198,8 @@ public class VespaModelFactory implements ModelFactory { .zone(zone) .now(clock.instant()) .wantedNodeVespaVersion(modelContext.wantedNodeVespaVersion()) - .wantedDockerImageRepo(modelContext.wantedDockerImageRepo()); + .wantedDockerImageRepo(modelContext.wantedDockerImageRepo()) + .onnxModelCost(modelContext.properties().hostedVespa() ? new DefaultOnnxModelCost() : OnnxModelCost.disabled()); modelContext.previousModel().ifPresent(builder::previousModel); modelContext.reindexing().ifPresent(builder::reindexing); return builder.build(validationParameters); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java new file mode 100644 index 00000000000..9e231239521 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java @@ -0,0 +1,51 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.application.validation; + +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.vespa.model.VespaModel; + +import java.util.logging.Level; + +/** + * Validates that the container node flavour has enough resources to run configured ONNX models. + * + * @author bjorncs + */ +public class JvmHeapSizeValidator extends Validator { + + @Override + public void validate(VespaModel model, DeployState ds) { + if (!ds.featureFlags().dynamicHeapSize()) return; + if (!ds.isHostedTenantApplication(model.getAdmin().getApplicationType())) return; + + model.getContainerClusters().forEach((clusterId, appCluster) -> { + var mp = appCluster.getMemoryPercentage().orElse(null); + if (mp == null) return; + if (mp.availableMemoryGb().isEmpty()) { + ds.getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'"); + return; + } + long jvmModelCost = appCluster.onnxModelCost().aggregatedModelCostInBytes(); + if (jvmModelCost > 0) { + int percentLimit = 15; + if (mp.percentage() < percentLimit) { + throw new IllegalArgumentException( + ("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " + + "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " + + "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).") + .formatted(clusterId, mp.percentage(), percentLimit, jvmModelCost / (1024D * 1024 * 1024))); + } + double gbLimit = 0.6; + double availableMemoryGb = mp.availableMemoryGb().getAsDouble(); + if (availableMemoryGb < gbLimit) { + throw new IllegalArgumentException( + ("Allocated memory to JVM in cluster '%s' is too low (%.2fGB < %.2fGB). " + + "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " + + "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).") + .formatted(clusterId, availableMemoryGb, gbLimit, jvmModelCost / (1024D * 1024 * 1024))); + } + } + }); + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java new file mode 100644 index 00000000000..d9dd3729bd3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java @@ -0,0 +1,50 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.application.validation; + +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.vespa.model.VespaModel; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; + +/** + * Validates that config using s3:// urls is used in public system and with nodes that are exclusive. + * + * @author hmusum + */ +public class UrlConfigValidator extends Validator { + + @Override + public void validate(VespaModel model, DeployState state) { + if (! state.isHostedTenantApplication(model.getAdmin().getApplicationType())) return; + + model.getContainerClusters().forEach((__, cluster) -> { + var isExclusive = hasExclusiveNodes(model, cluster); + validateS3UlsInConfig(state, cluster, isExclusive); + }); + } + + private static boolean hasExclusiveNodes(VespaModel model, ApplicationContainerCluster cluster) { + return model.hostSystem().getHosts() + .stream() + .flatMap(hostResource -> hostResource.spec().membership().stream()) + .filter(membership -> membership.cluster().id().equals(cluster.id())) + .anyMatch(membership -> membership.cluster().isExclusive()); + } + + private static void validateS3UlsInConfig(DeployState state, ApplicationContainerCluster cluster, boolean isExclusive) { + if (hasS3UrlInConfig(cluster)) { + // TODO: Would be even better if we could add which config/field the url is set for in the error message + String message = "Found s3:// urls in config for container cluster " + cluster.getName(); + if ( ! state.zone().system().isPublic()) + throw new IllegalArgumentException(message + ". This is only supported in public systems"); + else if ( ! isExclusive) + throw new IllegalArgumentException(message + ". Nodes in the cluster need to be 'exclusive'," + + " see https://cloud.vespa.ai/en/reference/services#nodes"); + } + } + + private static boolean hasS3UrlInConfig(ApplicationContainerCluster cluster) { + return cluster.userConfiguredUrls().all().stream() + .anyMatch(url -> url.startsWith("s3://")); + } + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java index 53a553ee624..30aafe67be7 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java @@ -87,6 +87,8 @@ public class Validation { new AccessControlFilterExcludeValidator().validate(model, deployState); new CloudUserFilterValidator().validate(model, deployState); new CloudHttpConnectorValidator().validate(model, deployState); + new UrlConfigValidator().validate(model, deployState); + new JvmHeapSizeValidator().validate(model, deployState); additionalValidators.forEach(v -> v.validate(model, deployState)); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java index bbfa939f8a3..f265f2d09a0 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java @@ -7,6 +7,7 @@ import com.yahoo.schema.document.Matching; import com.yahoo.schema.document.MatchType; import com.yahoo.schema.document.NormalizeLevel; import com.yahoo.schema.document.Stemming; +import com.yahoo.schema.processing.NGramMatch; import com.yahoo.vespa.documentmodel.SummaryField; import com.yahoo.vespa.documentmodel.SummaryTransform; @@ -89,7 +90,7 @@ public class IndexingScriptChangeMessageBuilder { MatchType type = matching.getType(); String retval = type.getName(); if (type == MatchType.GRAM) { - retval += " (size " + matching.getGramSize() + ")"; + retval += " (size " + matching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE) + ")"; } return retval; } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java index 7501f6162c7..9ecd359f90d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java @@ -7,11 +7,12 @@ import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.osgi.provider.model.ComponentModel; import com.yahoo.text.XML; -import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; -import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.component.BertEmbedder; import com.yahoo.vespa.model.container.component.ColBertEmbedder; import com.yahoo.vespa.model.container.component.Component; +import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; +import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; import com.yahoo.vespa.model.container.xml.BundleInstantiationSpecificationBuilder; import org.w3c.dom.Element; @@ -35,19 +36,20 @@ public class DomComponentBuilder extends VespaDomBuilder.DomConfigProducerBuilde @Override protected Component<? super Component<?, ?>, ?> doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) { - var component = buildComponent(spec, deployState); + var component = buildComponent(spec, deployState, ancestor); addChildren(deployState, ancestor, spec, component); return component; } - private Component<? super Component<?, ?>, ?> buildComponent(Element spec, DeployState state) { + private Component<? super Component<?, ?>, ?> buildComponent( + Element spec, DeployState state, TreeConfigProducer<AnyConfigProducer> ancestor) { if (spec.hasAttribute("type")) { var type = spec.getAttribute("type"); return switch (type) { - case "hugging-face-embedder" -> new HuggingFaceEmbedder(spec, state); + case "hugging-face-embedder" -> new HuggingFaceEmbedder((ApplicationContainerCluster)ancestor, spec, state); case "hugging-face-tokenizer" -> new HuggingFaceTokenizer(spec, state); - case "bert-embedder" -> new BertEmbedder(spec, state); - case "colbert-embedder" -> new ColBertEmbedder(spec, state); + case "colbert-embedder" -> new ColBertEmbedder((ApplicationContainerCluster)ancestor, spec, state); + case "bert-embedder" -> new BertEmbedder((ApplicationContainerCluster)ancestor, spec, state); default -> throw new IllegalArgumentException("Unknown component type '%s'".formatted(type)); }; } else { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java index b9021912244..4e97b20a3a9 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java @@ -8,14 +8,14 @@ import com.yahoo.component.ComponentId; import com.yahoo.component.ComponentSpecification; import com.yahoo.config.FileReference; import com.yahoo.config.application.api.ComponentInfo; +import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ApplicationClusterInfo; -import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.Model; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.config.provision.AllocatedHosts; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostSpec; import com.yahoo.container.bundle.BundleInstantiationSpecification; import com.yahoo.container.di.config.ApplicationBundlesConfig; @@ -40,13 +40,16 @@ import com.yahoo.vespa.model.container.component.SystemBindingPattern; import com.yahoo.vespa.model.container.configserver.ConfigserverCluster; import com.yahoo.vespa.model.filedistribution.UserConfiguredFiles; +import java.net.URL; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.logging.Level; import java.util.stream.Collectors; import static com.yahoo.vespa.model.container.docproc.DocprocChains.DOCUMENT_TYPE_MANAGER_CLASS; @@ -82,6 +85,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private final Set<FileReference> applicationBundles = new LinkedHashSet<>(); private final Set<String> previousHosts; + private final OnnxModelCost.Calculator onnxModelCost; + private final DeployLogger logger; private ContainerModelEvaluation modelEvaluation; @@ -92,6 +97,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private int zookeeperSessionTimeoutSeconds = 30; private final int transport_events_before_wakeup; private final int transport_connections_per_target; + private final boolean dynamicHeapSize; /** The heap size % of total memory available to the JVM process. */ private final int heapSizePercentageOfAvailableMemory; @@ -100,9 +106,12 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private List<ApplicationClusterEndpoint> endpoints = List.of(); + private UserConfiguredUrls userConfiguredUrls = new UserConfiguredUrls(); + public ApplicationContainerCluster(TreeConfigProducer<?> parent, String configSubId, String clusterId, DeployState deployState) { super(parent, configSubId, clusterId, deployState, true, 10); this.tlsClientAuthority = deployState.tlsClientAuthority(); + dynamicHeapSize = deployState.featureFlags().dynamicHeapSize(); previousHosts = Collections.unmodifiableSet(deployState.getPreviousModel().stream() .map(Model::allocatedHosts) .map(AllocatedHosts::getHosts) @@ -125,8 +134,13 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0 ? Math.min(99, deployState.featureFlags().heapSizePercentage()) : defaultHeapSizePercentageOfAvailableMemory; + onnxModelCost = deployState.onnxModelCost().newCalculator( + deployState.getApplicationPackage(), deployState.getDeployLogger()); + logger = deployState.getDeployLogger(); } + public UserConfiguredUrls userConfiguredUrls() { return userConfiguredUrls; } + @Override protected void doPrepare(DeployState deployState) { super.doPrepare(deployState); @@ -147,7 +161,9 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat if (containers.isEmpty()) return; // Files referenced from user configs to all components. - UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(), deployState.getDeployLogger()); + UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(), + deployState.getDeployLogger(), + userConfiguredUrls); for (Component<?, ?> component : getAllComponents()) { files.register(component); } @@ -182,19 +198,25 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat public void setMemoryPercentage(Integer memoryPercentage) { this.memoryPercentage = memoryPercentage; } @Override - public Optional<Integer> getMemoryPercentage() { - if (memoryPercentage != null) return Optional.of(memoryPercentage); + public Optional<JvmMemoryPercentage> getMemoryPercentage() { + if (memoryPercentage != null) return Optional.of(JvmMemoryPercentage.of(memoryPercentage)); if (isHostedVespa()) { int availableMemoryPercentage = getHostClusterId().isPresent() ? heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster : heapSizePercentageOfAvailableMemory; - if (getContainers().isEmpty()) return Optional.of(availableMemoryPercentage); // Node memory is not known + if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known // Node memory is known so convert available memory percentage to node memory percentage - double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb(); - double availableMemory = totalMemory - Host.memoryOverheadGb; - return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage)); + double totalMemory = dynamicHeapSize + ? getContainers().stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow() + : getContainers().get(0).getHostResource().realResources().memoryGb(); + double jvmHeapDeductionGb = dynamicHeapSize ? onnxModelCost.aggregatedModelCostInBytes() / (1024D * 1024 * 1024) : 0; + double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb); + int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage); + logger.log(Level.FINE, () -> "memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f" + .formatted(memoryPercentage, availableMemory, totalMemory, availableMemoryPercentage, jvmHeapDeductionGb)); + return Optional.of(JvmMemoryPercentage.of(memoryPercentage, availableMemory)); } return Optional.empty(); } @@ -203,49 +225,23 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private void createEndpoints(DeployState deployState) { if (!deployState.isHosted()) return; if (deployState.getProperties().applicationId().instance().isTester()) return; + // Add endpoints provided by the controller + List<String> hosts = getContainers().stream().map(AbstractService::getHostName).sorted().toList(); List<ApplicationClusterEndpoint> endpoints = new ArrayList<>(); - - List<String> hosts = getContainers().stream() - .map(AbstractService::getHostName) - .sorted() - .toList(); - - Set<ContainerEndpoint> endpointsFromController = deployState.getEndpoints(); - // Add zone-scoped endpoints if not provided by the controller - // TODO(mpolden): Remove this when controller always includes zone-scope endpoints, and config models < 8.230 are gone - if (endpointsFromController.stream().noneMatch(endpoint -> endpoint.scope() == ApplicationClusterEndpoint.Scope.zone)) { - for (String suffix : deployState.getProperties().zoneDnsSuffixes()) { - ApplicationClusterEndpoint.DnsName l4Name = ApplicationClusterEndpoint.DnsName.sharedL4NameFrom( - deployState.zone().system(), - ClusterSpec.Id.from(getName()), - deployState.getProperties().applicationId(), - suffix); - endpoints.add(ApplicationClusterEndpoint.builder() - .zoneScope() - .sharedL4Routing() - .dnsName(l4Name) - .hosts(hosts) - .clusterId(getName()) - .authMethod(ApplicationClusterEndpoint.AuthMethod.mtls) - .build()); - } - } - - // Include all endpoints provided by controller - endpointsFromController.stream() - .filter(ce -> ce.clusterId().equals(getName())) - .forEach(ce -> ce.names().forEach( - name -> endpoints.add(ApplicationClusterEndpoint.builder() - .scope(ce.scope()) - .weight(ce.weight().orElse(1)) // Default to weight=1 if not set - .routingMethod(ce.routingMethod()) - .dnsName(ApplicationClusterEndpoint.DnsName.from(name)) - .hosts(hosts) - .clusterId(getName()) - .authMethod(ce.authMethod()) - .build()) - )); - this.endpoints = List.copyOf(endpoints); + deployState.getEndpoints().stream() + .filter(ce -> ce.clusterId().equals(getName())) + .forEach(ce -> ce.names().forEach( + name -> endpoints.add(ApplicationClusterEndpoint.builder() + .scope(ce.scope()) + .weight(ce.weight().orElse(1)) + .routingMethod(ce.routingMethod()) + .dnsName(ApplicationClusterEndpoint.DnsName.from(name)) + .hosts(hosts) + .clusterId(getName()) + .authMethod(ce.authMethod()) + .build()) + )); + this.endpoints = Collections.unmodifiableList(endpoints); } @Override @@ -299,12 +295,15 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public void getConfig(QrStartConfig.Builder builder) { super.getConfig(builder); + var memoryPct = getMemoryPercentage().orElse(null); + int heapsize = memoryPct != null && memoryPct.availableMemoryGb().isPresent() + ? (int) (memoryPct.availableMemoryGb().getAsDouble() * 1024) : 1536; builder.jvm.verbosegc(true) .availableProcessors(0) .compressedClassSpaceSize(0) - .minHeapsize(1536) - .heapsize(1536); - getMemoryPercentage().ifPresent(percentage -> builder.jvm.heapSizeAsPercentageOfPhysicalMemory(percentage)); + .minHeapsize(heapsize) + .heapsize(heapsize); + if (memoryPct != null) builder.jvm.heapSizeAsPercentageOfPhysicalMemory(memoryPct.percentage()); } @Override @@ -373,6 +372,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public String name() { return getName(); } + public OnnxModelCost.Calculator onnxModelCost() { return onnxModelCost; } + public static class MbusParams { // the amount of the maxpendingbytes to process concurrently, typically 0.2 (20%) final Double maxConcurrentFactor; @@ -390,4 +391,14 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat } } + public static class UserConfiguredUrls { + + private final Set<String> urls = new HashSet<>(); + + public void add(String url) { urls.add(url); } + + public Set<String> all() { return urls; } + + } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java index 6bbc24e8739..fa13e7ec9d6 100755 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java @@ -62,6 +62,7 @@ import com.yahoo.vespa.model.container.search.ContainerSearch; import com.yahoo.vespa.model.container.search.searchchain.SearchChains; import com.yahoo.vespa.model.content.Content; import com.yahoo.vespa.model.search.SearchCluster; + import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; @@ -71,6 +72,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.OptionalDouble; import java.util.Set; import java.util.TreeSet; @@ -718,5 +720,11 @@ public abstract class ContainerCluster<CONTAINER extends Container> * Returns the percentage of host physical memory this application has specified for nodes in this cluster, * or empty if this is not specified by the application. */ - public Optional<Integer> getMemoryPercentage() { return Optional.empty(); } + public record JvmMemoryPercentage(int percentage, OptionalDouble availableMemoryGb) { + static JvmMemoryPercentage of(int percentage) { return new JvmMemoryPercentage(percentage, OptionalDouble.empty()); } + static JvmMemoryPercentage of(int percentage, double availableMemoryGb) { + return new JvmMemoryPercentage(percentage, OptionalDouble.of(availableMemoryGb)); + } + } + public Optional<JvmMemoryPercentage> getMemoryPercentage() { return Optional.empty(); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java index 906ef739ef1..1b47f59653e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java @@ -45,10 +45,6 @@ public class ContainerModelEvaluation implements private final RankProfileList rankProfileList; private final FileDistributedOnnxModels onnxModels; // For cluster specific ONNX model settings - public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList) { - this(cluster, rankProfileList, null); - } - public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList, FileDistributedOnnxModels onnxModels) { this.rankProfileList = Objects.requireNonNull(rankProfileList, "rankProfileList cannot be null"); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java index 205848e1b67..76bb1a9e02a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java @@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.BertBaseEmbedderConfig; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; @@ -33,7 +34,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf private final Integer onnxGpuDevice; - public BertEmbedder(Element xml, DeployState state) { + public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml); model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state); vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state); @@ -49,6 +50,7 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); + cluster.onnxModelCost().registerModel(model); } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java index c0fdfe3dc64..63096ebcbe2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java @@ -5,7 +5,7 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.ColBertEmbedderConfig; -import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; @@ -40,7 +40,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo private final Integer onnxIntraopThreads; private final Integer onnxGpuDevice; - public ColBertEmbedder(Element xml, DeployState state) { + public ColBertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.ColBertEmbedder", INTEGRATION_BUNDLE_NAME, xml); var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); @@ -60,7 +60,7 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); - + cluster.onnxModelCost().registerModel(model); } private static ModelReference resolveDefaultVocab(Element model, DeployState state) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java index f4017339699..41b80bf1cb2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java @@ -5,6 +5,7 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; @@ -33,7 +34,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm private final Integer onnxGpuDevice; private final String poolingStrategy; - public HuggingFaceEmbedder(Element xml, DeployState state) { + public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml); var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); @@ -51,6 +52,7 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null); + cluster.onnxModelCost().registerModel(model); } private static ModelReference resolveDefaultVocab(Element model, DeployState state) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java index f0296d49472..3261d454b4f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.search; +import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.QrSearchersConfig; import com.yahoo.prelude.semantics.SemanticRulesConfig; @@ -56,12 +57,14 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> private QueryProfiles queryProfiles; private SemanticRules semanticRules; private PageTemplates pageTemplates; + private ApplicationPackage app; public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) { super(chains); this.globalPhase = deployState.featureFlags().enableGlobalPhase(); this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher(); this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState); + this.app = deployState.getApplicationPackage(); this.owningCluster = cluster; owningCluster.addComponent(Component.fromClassAndBundle(CompiledQueryProfileRegistry.class, SEARCH_AND_DOCPROC_BUNDLE)); @@ -96,6 +99,9 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue; var factory = new RankProfilesEvaluatorComponent(documentDb); if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) { + var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels(); + onnxModels.asMap().forEach( + (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath()))); owningCluster.addComponent(factory); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 35b0213bf59..d9c4dea478c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -778,6 +778,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { !container.getHostResource().realResources().gpuResources().isZero()); onnxModel.setGpuDevice(gpuDevice, hasGpu); } + cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath())); } cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models)); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java index bb72eda7d04..d18309ef0af 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java @@ -256,7 +256,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem for (ContainerModel containerModel : containers) { Optional<String> hostClusterId = containerModel.getCluster().getHostClusterId(); if (hostClusterId.isPresent() && hostClusterId.get().equals(clusterId) && containerModel.getCluster().getMemoryPercentage().isPresent()) { - return containerModel.getCluster().getMemoryPercentage().get() * 0.01; + return containerModel.getCluster().getMemoryPercentage().get().percentage() * 0.01; } } return 0.0; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java b/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java index 8bed5e64bf5..03541ecadf3 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java @@ -11,6 +11,7 @@ import com.yahoo.path.Path; import com.yahoo.vespa.config.ConfigDefinition; import com.yahoo.vespa.config.ConfigDefinitionKey; import com.yahoo.vespa.config.ConfigPayloadBuilder; + import com.yahoo.yolean.Exceptions; import java.io.File; @@ -21,6 +22,8 @@ import java.util.Map; import java.util.Optional; import java.util.logging.Level; +import static com.yahoo.vespa.model.container.ApplicationContainerCluster.UserConfiguredUrls; + /** * Utility methods for registering file distribution of files/paths/urls/models defined by the user. * @@ -30,10 +33,12 @@ public class UserConfiguredFiles implements Serializable { private final FileRegistry fileRegistry; private final DeployLogger logger; + private final UserConfiguredUrls userConfiguredUrls; - public UserConfiguredFiles(FileRegistry fileRegistry, DeployLogger logger) { + public UserConfiguredFiles(FileRegistry fileRegistry, DeployLogger logger, UserConfiguredUrls userConfiguredUrls) { this.fileRegistry = fileRegistry; this.logger = logger; + this.userConfiguredUrls = userConfiguredUrls; } /** @@ -133,7 +138,10 @@ public class UserConfiguredFiles implements Serializable { Path path; if (isModelType) { var modelReference = ModelReference.valueOf(builder.getValue()); - if (modelReference.path().isEmpty()) return; + if (modelReference.path().isEmpty()) { + modelReference.url().ifPresent(url -> userConfiguredUrls.add(url.value())); + return; + } path = Path.fromString(modelReference.path().get().value()); } else { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java b/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java index 7c86267c1b6..38dda3e29ff 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java @@ -18,6 +18,9 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.charset.StandardCharsets; import java.util.Map; +import java.util.Optional; + +import static com.yahoo.yolean.Exceptions.uncheck; /** * Defers to 'vespa-analyze-onnx-model' to determine the output type given @@ -29,6 +32,7 @@ import java.util.Map; public class OnnxModelProbe { private static final String binary = "vespa-analyze-onnx-model"; + private static final ObjectMapper jsonParser = new ObjectMapper(); static TensorType probeModel(ApplicationPackage app, Path modelPath, String outputName, Map<String, TensorType> inputTypes) { TensorType outputType = TensorType.empty; @@ -41,8 +45,9 @@ public class OnnxModelProbe { // Otherwise, run vespa-analyze-onnx-model if the model is available if (outputType.equals(TensorType.empty) && app.getFile(modelPath).exists()) { String jsonInput = createJsonInput(app.getFileReference(modelPath).getAbsolutePath(), inputTypes); - String jsonOutput = callVespaAnalyzeOnnxModel(jsonInput); + var jsonOutput = callVespaAnalyzeOnnxModel(jsonInput); outputType = outputTypeFromJson(jsonOutput, outputName); + writeMemoryStats(app, modelPath, MemoryStats.fromJson(jsonOutput)); if ( ! outputType.equals(TensorType.empty)) { writeProbedOutputType(app, modelPath, contextKey, outputType); } @@ -53,6 +58,22 @@ public class OnnxModelProbe { return outputType; } + public static Optional<MemoryStats> probeMemoryStats(ApplicationPackage app, Path modelPath) { + return Optional.of(app.getFile(memoryStatsPath(modelPath))) + .filter(ApplicationFile::exists) + .map(file -> MemoryStats.fromJson(uncheck(() -> jsonParser.readTree(file.createReader())))); + } + + private static void writeMemoryStats(ApplicationPackage app, Path modelPath, MemoryStats memoryStats) throws IOException { + String path = app.getFileReference(memoryStatsPath(modelPath)).getAbsolutePath(); + IOUtils.writeFile(path, memoryStats.toJson().toPrettyString(), false); + } + + private static Path memoryStatsPath(Path modelPath) { + var fileName = OnnxModelInfo.asValidIdentifier(modelPath.getRelative()) + ".memory_stats"; + return ApplicationPackage.MODELS_GENERATED_REPLICATED_DIR.append(fileName); + } + private static String createContextKey(String onnxName, Map<String, TensorType> inputTypes) { StringBuilder key = new StringBuilder().append(onnxName).append(":"); inputTypes.entrySet().stream().sorted(Map.Entry.comparingByKey()) @@ -95,9 +116,7 @@ public class OnnxModelProbe { return TensorType.empty; } - private static TensorType outputTypeFromJson(String json, String outputName) throws IOException { - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); + private static TensorType outputTypeFromJson(JsonNode root, String outputName) throws IOException { if ( ! root.isObject() || ! root.has("outputs")) { return TensorType.empty; } @@ -123,7 +142,7 @@ public class OnnxModelProbe { return out.toString(); } - private static String callVespaAnalyzeOnnxModel(String jsonInput) throws IOException, InterruptedException { + private static JsonNode callVespaAnalyzeOnnxModel(String jsonInput) throws IOException, InterruptedException { StringBuilder output = new StringBuilder(); ProcessBuilder processBuilder = new ProcessBuilder(binary, "--probe-types"); @@ -148,7 +167,16 @@ public class OnnxModelProbe { throw new IllegalArgumentException("Error from '" + binary + "'. Return code: " + returnCode + ". " + "Output: '" + output + "'"); } - return output.toString(); + return jsonParser.readTree(output.toString()); + } + + public record MemoryStats(long vmSize, long vmRss) { + static MemoryStats fromJson(JsonNode json) { + return new MemoryStats(json.get("vm_size").asLong(), json.get("vm_rss").asLong()); + } + JsonNode toJson() { + return jsonParser.createObjectNode().put("vm_size", vmSize).put("vm_rss", vmRss); + } } } |