diff options
author | bjormel <bjormel@yahooinc.com> | 2023-10-01 12:23:12 +0000 |
---|---|---|
committer | bjormel <bjormel@yahooinc.com> | 2023-10-01 12:23:12 +0000 |
commit | e9058b555d4dfea2f6c872d9a677e8678b569569 (patch) | |
tree | fa1b67c6e39712c1e0d9f308b0dd55573b43f913 /config-model/src/main | |
parent | 0ad931fa86658904fe9212b014d810236b0e00e4 (diff) | |
parent | 16030193ec04ee41e98779a3d7ee6a6c1d0d0d6f (diff) |
Merge branch 'master' into bjormel/aws-main-controller
Diffstat (limited to 'config-model/src/main')
41 files changed, 570 insertions, 312 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java index 4df7a76031a..a7e8cd52e01 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/DeployState.java @@ -18,6 +18,7 @@ import com.yahoo.config.model.api.EndpointCertificateSecrets; import com.yahoo.config.model.api.HostProvisioner; import com.yahoo.config.model.api.Model; import com.yahoo.config.model.api.ModelContext; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.api.Provisioned; import com.yahoo.config.model.api.Reindexing; import com.yahoo.config.model.api.ValidationParameters; @@ -90,6 +91,7 @@ public class DeployState implements ConfigDefinitionStore { private final Provisioned provisioned; private final Reindexing reindexing; private final ExecutorService executor; + private final OnnxModelCost onnxModelCost; public static DeployState createTestState() { return new Builder().build(); @@ -124,7 +126,8 @@ public class DeployState implements ConfigDefinitionStore { boolean accessLoggingEnabledByDefault, Optional<DockerImage> wantedDockerImageRepo, Reindexing reindexing, - Optional<ValidationOverrides> validationOverrides) { + Optional<ValidationOverrides> validationOverrides, + OnnxModelCost onnxModelCost) { this.logger = deployLogger; this.fileRegistry = fileRegistry; this.executor = executor; @@ -152,6 +155,7 @@ public class DeployState implements ConfigDefinitionStore { this.now = now; this.wantedDockerImageRepo = wantedDockerImageRepo; this.reindexing = reindexing; + this.onnxModelCost = onnxModelCost; } public static HostProvisioner getDefaultModelHostProvisioner(ApplicationPackage applicationPackage) { @@ -224,7 +228,7 @@ public class DeployState implements ConfigDefinitionStore { // Mapping from key to something that can create a config definition. private Map<ConfigDefinitionKey, UnparsedConfigDefinition> existingConfigDefs = null; - // Cache of config defs for all [def,version] combinations looked up so far. + // Cache of config definitions looked up so far. private final Map<ConfigDefinitionKey, ConfigDefinition> defArchive = new LinkedHashMap<>(); public ApplicationPackage getApplicationPackage() { @@ -305,6 +309,8 @@ public class DeployState implements ConfigDefinitionStore { public Optional<Reindexing> reindexing() { return Optional.ofNullable(reindexing); } + public OnnxModelCost onnxModelCost() { return onnxModelCost; } + public boolean isHostedTenantApplication(ApplicationType type) { boolean isTesterApplication = getProperties().applicationId().instance().isTester(); return isHosted() && type == ApplicationType.DEFAULT && !isTesterApplication; @@ -333,6 +339,7 @@ public class DeployState implements ConfigDefinitionStore { private QueryProfiles queryProfiles = null; private Reindexing reindexing = null; private Optional<ValidationOverrides> validationOverrides = Optional.empty(); + private OnnxModelCost onnxModelCost = OnnxModelCost.disabled(); public Builder() {} @@ -450,6 +457,8 @@ public class DeployState implements ConfigDefinitionStore { return this; } + public Builder onnxModelCost(OnnxModelCost instance) { this.onnxModelCost = instance; return this; } + public DeployState build() { return build(new ValidationParameters()); } @@ -482,7 +491,8 @@ public class DeployState implements ConfigDefinitionStore { accessLoggingEnabledByDefault, wantedDockerImageRepo, reindexing, - validationOverrides); + validationOverrides, + onnxModelCost); } } diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 815c32e3c8f..77356292f9a 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -86,6 +86,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private boolean allowUserFilters = true; private List<DataplaneToken> dataplaneTokens; private int contentLayerMetadataFeatureLevel = 0; + private boolean dynamicHeapSize = false; @Override public ModelContext.FeatureFlags featureFlags() { return this; } @Override public boolean multitenant() { return multitenant; } @@ -144,6 +145,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea @Override public boolean enableGlobalPhase() { return true; } // Enable global-phase by default for unit tests only @Override public List<DataplaneToken> dataplaneTokens() { return dataplaneTokens; } @Override public int contentLayerMetadataFeatureLevel() { return contentLayerMetadataFeatureLevel; } + @Override public boolean dynamicHeapSize() { return dynamicHeapSize; } public TestProperties sharedStringRepoNoReclaim(boolean sharedStringRepoNoReclaim) { this.sharedStringRepoNoReclaim = sharedStringRepoNoReclaim; @@ -379,6 +381,8 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea return this; } + public TestProperties setDynamicHeapSize(boolean b) { this.dynamicHeapSize = b; return this; } + public static class Spec implements ConfigServerSpec { private final String hostName; diff --git a/config-model/src/main/java/com/yahoo/config/model/producer/UserConfigRepo.java b/config-model/src/main/java/com/yahoo/config/model/producer/UserConfigRepo.java index 3d7eafe658f..b59293fbac1 100644 --- a/config-model/src/main/java/com/yahoo/config/model/producer/UserConfigRepo.java +++ b/config-model/src/main/java/com/yahoo/config/model/producer/UserConfigRepo.java @@ -2,7 +2,6 @@ package com.yahoo.config.model.producer; import com.yahoo.vespa.config.ConfigDefinitionKey; -import com.yahoo.vespa.config.ConfigPayload; import com.yahoo.vespa.config.ConfigPayloadBuilder; import java.util.LinkedHashMap; @@ -16,31 +15,13 @@ import java.util.Set; * @author Ulf Lilleengen */ public class UserConfigRepo { + private final Map<ConfigDefinitionKey, ConfigPayloadBuilder> userConfigsMap; public UserConfigRepo() { this.userConfigsMap = new LinkedHashMap<>(); } - @Override - public UserConfigRepo clone() { - return new UserConfigRepo(copyBuilders(userConfigsMap)); - } - - /** - * Must copy the builder, because the merge method on {@link TreeConfigProducer} might override the row's builders otherwise - */ - private Map<ConfigDefinitionKey, ConfigPayloadBuilder> copyBuilders(Map<ConfigDefinitionKey, ConfigPayloadBuilder> source) { - Map<ConfigDefinitionKey, ConfigPayloadBuilder> ret = new LinkedHashMap<>(); - for (Map.Entry<ConfigDefinitionKey, ConfigPayloadBuilder> e : source.entrySet()) { - ConfigDefinitionKey key = e.getKey(); - ConfigPayloadBuilder sourceVal = e.getValue(); - ConfigPayloadBuilder destVal = new ConfigPayloadBuilder(ConfigPayload.fromBuilder(sourceVal)); - ret.put(key, destVal); - } - return ret; - } - public UserConfigRepo(Map<ConfigDefinitionKey, ConfigPayloadBuilder> map) { this.userConfigsMap = map; } diff --git a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java index dbcd1cea2fa..342b5f243e7 100644 --- a/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java +++ b/config-model/src/main/java/com/yahoo/config/model/test/MockApplicationPackage.java @@ -488,6 +488,8 @@ public class MockApplicationPackage implements ApplicationPackage { throw new UnsupportedOperationException(); } + @Override public long getSize() { return file.length(); } + @Override public int compareTo(ApplicationFile other) { return this.getPath().getName().compareTo((other).getPath().getName()); diff --git a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java index 7d88985b2d5..f6a022e9930 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/IndexInfo.java @@ -82,7 +82,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { } // Commands for summary fields - // TODO: Move to fieldinfo and implement differently. This is not right + // TODO: Move to schemainfo and implement differently for (SummaryField summaryField : schema.getUniqueNamedSummaryFields().values()) { if (summaryField.getTransform().isTeaser()) { addIndexCommand(summaryField.getName(), CMD_DYNTEASER); @@ -90,6 +90,13 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { if (summaryField.getTransform().isBolded()) { addIndexCommand(summaryField.getName(), CMD_HIGHLIGHT); } + + var sourceField = schema.getField(summaryField.getSourceField()); // Take the first as they should all be consistent + if (sourceField != null && sourceField.getMatching().getType().equals(MatchType.GRAM)) { + addIndexCommand(summaryField.getName(), + "ngram " + (sourceField.getMatching().getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE))); + + } } } @@ -452,7 +459,7 @@ public class IndexInfo extends Derived implements IndexInfoConfig.Producer { iiB.command( new IndexInfoConfig.Indexinfo.Command.Builder() .indexname(fieldSet.getName()) - .command("ngram "+(fieldSetMatching.getGramSize()>0 ? fieldSetMatching.getGramSize() : NGramMatch.DEFAULT_GRAM_SIZE))); + .command("ngram " + fieldSetMatching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE))); } else if (fieldSetMatching.getType().equals(MatchType.TEXT)) { } diff --git a/config-model/src/main/java/com/yahoo/schema/document/Matching.java b/config-model/src/main/java/com/yahoo/schema/document/Matching.java index 1fe947d672b..e3f49cb834d 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/Matching.java +++ b/config-model/src/main/java/com/yahoo/schema/document/Matching.java @@ -1,7 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.schema.document; +import com.yahoo.schema.processing.NGramMatch; + import java.io.Serializable; +import java.util.OptionalInt; /** * Defines how a field should be matched. @@ -23,8 +26,8 @@ public class Matching implements Cloneable, Serializable { private boolean algorithmUserSet = false; - /** The gram size is the n in n-gram, or -1 if not set. Should only be set with gram matching. */ - private int gramSize = -1; + /** The gram size is the n in n-gram, or empty if not set. Should only be set with gram matching. */ + private OptionalInt gramSize = OptionalInt.empty(); /** Maximum number of characters to consider when searching in this field. Used for limiting resources, especially in streaming search. */ private Integer maxLength; @@ -67,10 +70,10 @@ public class Matching implements Cloneable, Serializable { public boolean isSuffix() { return algorithm == MatchAlgorithm.SUFFIX; } - /** Returns the gram size, or -1 if not set. Should only be set with gram matching. */ - public int getGramSize() { return gramSize; } + /** Returns the gram size, or empty if not set. Should only be set with gram matching. */ + public OptionalInt getGramSize() { return gramSize; } - public void setGramSize(int gramSize) { this.gramSize=gramSize; } + public void setGramSize(int gramSize) { this.gramSize = OptionalInt.of(gramSize); } /** * Merge data from another matching object @@ -107,10 +110,11 @@ public class Matching implements Cloneable, Serializable { @Override public String toString() { - return type + " matching [" + (type==MatchType.GRAM ? "gram size " + gramSize : "supports " + algorithm) + - "], [exact-terminator "+exactMatchTerminator+"]"; + return type + " matching [" + (type == MatchType.GRAM ? "gram size " + gramSize.orElse(NGramMatch.DEFAULT_GRAM_SIZE) : "supports " + algorithm) + + "], [exact-terminator " + exactMatchTerminator + "]"; } + @Override public Matching clone() { try { return (Matching)super.clone(); @@ -129,7 +133,7 @@ public class Matching implements Cloneable, Serializable { if ( this.exactMatchTerminator == null && other.exactMatchTerminator != null) return false; if ( this.exactMatchTerminator != null && ( ! this.exactMatchTerminator.equals(other.exactMatchTerminator)) ) return false; - if ( gramSize != other.gramSize) return false; + if ( ! gramSize.equals(other.gramSize)) return false; return true; } diff --git a/config-model/src/main/java/com/yahoo/schema/document/SDField.java b/config-model/src/main/java/com/yahoo/schema/document/SDField.java index 7821c101880..6cbdb38b9bc 100644 --- a/config-model/src/main/java/com/yahoo/schema/document/SDField.java +++ b/config-model/src/main/java/com/yahoo/schema/document/SDField.java @@ -196,6 +196,8 @@ public class SDField extends Field implements TypedKey, ImmutableSDField { return isExtraField; } + public boolean isDocumentField() { return ! isExtraField; } + @Override public boolean isImportedField() { return false; @@ -613,11 +615,8 @@ public class SDField extends Field implements TypedKey, ImmutableSDField { @Override public RankType getRankType() { return this.rankType; } - /** - * Returns the search-time attribute settings of this field or null if none is set. - * - * <p>TODO: Make unmodifiable.</p> - */ + /** Returns the search-time attribute settings of this field or null if none is set. */ + // TODO: Make unmodifiable @Override public Map<String, Attribute> getAttributes() { return attributes; } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java index 985ec8653c7..0537f1704ab 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingInputs.java @@ -31,9 +31,8 @@ public class IndexingInputs extends Processor { ScriptExpression script = field.getIndexingScript(); if (script == null) continue; - String fieldName = field.getName(); - script = (ScriptExpression)new DefaultToCurrentField(fieldName).convert(script); - script = (ScriptExpression)new EnsureInputExpression(fieldName).convert(script); + script = (ScriptExpression)new DefaultToCurrentField(field).convert(script); + script = (ScriptExpression)new EnsureInputExpression(field).convert(script); if (validate) new VerifyInputExpression(schema, field).visit(script); @@ -43,10 +42,10 @@ public class IndexingInputs extends Processor { private static class DefaultToCurrentField extends ExpressionConverter { - final String fieldName; + final SDField field; - DefaultToCurrentField(String fieldName) { - this.fieldName = fieldName; + DefaultToCurrentField(SDField field) { + this.field = field; } @Override @@ -56,27 +55,28 @@ public class IndexingInputs extends Processor { @Override protected Expression doConvert(Expression exp) { - return new InputExpression(fieldName); + return new InputExpression(field.getName()); } } private static class EnsureInputExpression extends ExpressionConverter { - final String fieldName; + final SDField field; - EnsureInputExpression(String fieldName) { - this.fieldName = fieldName; + EnsureInputExpression(SDField field) { + this.field = field; } @Override protected boolean shouldConvert(Expression exp) { - return exp instanceof StatementExpression; + return exp instanceof StatementExpression + && ( field.isDocumentField() || ( field.getAttribute() != null && field.getAttribute().isMutable())); } @Override protected Expression doConvert(Expression exp) { if (exp.requiredInputType() != null) { - return new StatementExpression(new InputExpression(fieldName), exp); + return new StatementExpression(new InputExpression(field.getName()), exp); } else { return exp; } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java index 3c7e9b4066f..e17b1e46a6e 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingValidation.java @@ -24,6 +24,7 @@ import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression; import com.yahoo.vespa.indexinglanguage.expressions.VerificationContext; import com.yahoo.vespa.indexinglanguage.expressions.VerificationException; import com.yahoo.vespa.model.container.search.QueryProfiles; +import com.yahoo.yolean.Exceptions; import java.util.HashSet; import java.util.Set; @@ -51,7 +52,7 @@ public class IndexingValidation extends Processor { converter.convert(exp); // TODO: stop doing this explicitly when visiting a script does not branch } } catch (VerificationException e) { - fail(schema, field, "For expression '" + e.getExpression() + "': " + e.getMessage()); + fail(schema, field, "For expression '" + e.getExpression() + "': " + Exceptions.toMessageString(e)); } } } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java index f1ff910be43..6ec5428156f 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/NGramMatch.java @@ -31,7 +31,7 @@ public class NGramMatch extends Processor { for (SDField field : schema.allConcreteFields()) { if (field.getMatching().getType().equals(MatchType.GRAM)) implementGramMatch(schema, field, validate); - else if (validate && field.getMatching().getGramSize() >= 0) + else if (validate && field.getMatching().getGramSize().isPresent()) throw new IllegalArgumentException("gram-size can only be set when the matching mode is 'gram'"); } } @@ -40,9 +40,7 @@ public class NGramMatch extends Processor { if (validate && field.doesAttributing() && ! field.doesIndexing()) throw new IllegalArgumentException("gram matching is not supported with attributes, use 'index' in indexing"); - int n = field.getMatching().getGramSize(); - if (n < 0) - n = DEFAULT_GRAM_SIZE; // not set - use default gram size + int n = field.getMatching().getGramSize().orElse(DEFAULT_GRAM_SIZE); if (validate && n == 0) throw new IllegalArgumentException("Illegal gram size in " + field + ": Must be at least 1"); field.getNormalizing().inferCodepoint(); diff --git a/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java b/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java index 0362dc39c4c..1627320dc54 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/PredicateProcessor.java @@ -15,11 +15,11 @@ import com.yahoo.vespa.documentmodel.DocumentSummary; import com.yahoo.vespa.documentmodel.SummaryField; import com.yahoo.vespa.documentmodel.SummaryTransform; import com.yahoo.vespa.indexinglanguage.ExpressionConverter; +import com.yahoo.vespa.indexinglanguage.expressions.ConstantExpression; import com.yahoo.vespa.indexinglanguage.expressions.Expression; import com.yahoo.vespa.indexinglanguage.expressions.OptimizePredicateExpression; import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression; import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression; -import com.yahoo.vespa.indexinglanguage.expressions.SetValueExpression; import com.yahoo.vespa.indexinglanguage.expressions.SetVarExpression; import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression; import com.yahoo.vespa.model.container.search.QueryProfiles; @@ -112,14 +112,14 @@ public class PredicateProcessor extends Processor { private Expression makeSetPredicateVariablesScript(BooleanIndexDefinition options) { List<Expression> expressions = new ArrayList<>(); - expressions.add(new SetValueExpression(new IntegerFieldValue(options.getArity()))); + expressions.add(new ConstantExpression(new IntegerFieldValue(options.getArity()))); expressions.add(new SetVarExpression("arity")); if (options.hasLowerBound()) { - expressions.add(new SetValueExpression(new LongFieldValue(options.getLowerBound()))); + expressions.add(new ConstantExpression(new LongFieldValue(options.getLowerBound()))); expressions.add(new SetVarExpression("lower_bound")); } if (options.hasUpperBound()) { - expressions.add(new SetValueExpression(new LongFieldValue(options.getUpperBound()))); + expressions.add(new ConstantExpression(new LongFieldValue(options.getUpperBound()))); expressions.add(new SetVarExpression("upper_bound")); } return new StatementExpression(expressions); diff --git a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java index 7439e65dee6..49cd36e4bc2 100644 --- a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java +++ b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryField.java @@ -17,9 +17,7 @@ import static com.yahoo.text.Lowercase.toLowerCase; */ public class SummaryField extends Field implements Cloneable, TypedKey { - /** - * A source (field name). - */ + /** A source (field name). */ public static class Source implements Serializable { private final String name; @@ -38,12 +36,8 @@ public class SummaryField extends Field implements Cloneable, TypedKey { @Override public boolean equals(Object obj) { - if (!(obj instanceof Source)) { - return false; - } - Source other = (Source)obj; - return name.equals(other.name) && - override == other.override; + if (!(obj instanceof Source other)) return false; + return name.equals(other.name) && override == other.override; } @Override @@ -67,14 +61,14 @@ public class SummaryField extends Field implements Cloneable, TypedKey { */ private Set<Source> sources = new java.util.LinkedHashSet<>(); - private Set<String> destinations=new java.util.LinkedHashSet<>(); + private Set<String> destinations =new java.util.LinkedHashSet<>(); /** True if this field was defined implicitly */ - private boolean implicit=false; + private boolean implicit = false; /** Creates a summary field with NONE as transform */ public SummaryField(String name, DataType type) { - this(name,type, SummaryTransform.NONE); + this(name, type, SummaryTransform.NONE); } /** Creates a summary field with NONE as transform */ @@ -97,7 +91,7 @@ public class SummaryField extends Field implements Cloneable, TypedKey { public boolean isImplicit() { return implicit; } public void setTransform(SummaryTransform transform) { - this.transform=transform; + this.transform = transform; if (SummaryTransform.DYNAMICTEASER.equals(transform) || SummaryTransform.BOLDED.equals(transform)) { // This is the kind of logic we want to have in processing, // but can't because of deriveDocuments mode, which doesn't run @@ -110,9 +104,9 @@ public class SummaryField extends Field implements Cloneable, TypedKey { /** Returns the first source field of this, or null if the source field is not present */ public String getSourceField() { - String sourceName=getName(); - if (sources.size()>0) - sourceName=sources.iterator().next().getName(); + String sourceName = getName(); + if ( ! sources.isEmpty()) + sourceName = sources.iterator().next().getName(); return sourceName; } @@ -137,7 +131,7 @@ public class SummaryField extends Field implements Cloneable, TypedKey { /** Returns the first source name of this, or the field name if no source has been set */ public String getSingleSource() { - if (sources.size()==0) return getName(); + if (sources.isEmpty()) return getName(); return sources.iterator().next().getName(); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java index 28ff8dff620..269cb2dfa08 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java @@ -7,8 +7,8 @@ import ai.vespa.rankingexpression.importer.onnx.OnnxImporter; import ai.vespa.rankingexpression.importer.tensorflow.TensorFlowImporter; import ai.vespa.rankingexpression.importer.vespa.VespaImporter; import ai.vespa.rankingexpression.importer.xgboost.XGBoostImporter; -import com.yahoo.component.annotation.Inject; import com.yahoo.component.Version; +import com.yahoo.component.annotation.Inject; import com.yahoo.component.provider.ComponentRegistry; import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.application.api.ValidationOverrides; @@ -197,7 +197,8 @@ public class VespaModelFactory implements ModelFactory { .zone(zone) .now(clock.instant()) .wantedNodeVespaVersion(modelContext.wantedNodeVespaVersion()) - .wantedDockerImageRepo(modelContext.wantedDockerImageRepo()); + .wantedDockerImageRepo(modelContext.wantedDockerImageRepo()) + .onnxModelCost(modelContext.onnxModelCost()); modelContext.previousModel().ifPresent(builder::previousModel); modelContext.reindexing().ifPresent(builder::reindexing); return builder.build(validationParameters); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerCluster.java index aa3b8b3b821..d10a631fb90 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/metricsproxy/MetricsProxyContainerCluster.java @@ -159,6 +159,7 @@ public class MetricsProxyContainerCluster extends ContainerCluster<MetricsProxyC builder.consumer.add(toConsumerBuilder(MetricsConsumer.defaultConsumer)); builder.consumer.add(toConsumerBuilder(newDefaultConsumer())); + if (isHostedVespa()) builder.consumer.add(toConsumerBuilder(MetricsConsumer.vespa9)); getAdmin() .map(Admin::getAmendedMetricsConsumers) .map(consumers -> consumers.stream().map(ConsumersConfigGenerator::toConsumerBuilder).toList()) diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java index cfe3c01e03a..987812f11ad 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/MetricsConsumer.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.model.admin.monitoring; import ai.vespa.metrics.set.Metric; import ai.vespa.metrics.set.MetricSet; +import ai.vespa.metrics.set.Vespa9VespaMetricSet; import ai.vespa.metricsproxy.core.VespaMetrics; import ai.vespa.metricsproxy.http.ValuesFetcher; @@ -41,6 +42,9 @@ public class MetricsConsumer { public static final MetricsConsumer vespaCloud = consumer("vespa-cloud", vespaMetricSet, systemMetricSet, networkMetricSet); + public static final MetricsConsumer vespa9 = + consumer("Vespa9", Vespa9VespaMetricSet.vespa9vespaMetricSet, systemMetricSet, networkMetricSet); + private final String id; private final MetricSet metricSet; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java new file mode 100644 index 00000000000..f87cecb58fe --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/JvmHeapSizeValidator.java @@ -0,0 +1,54 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.application.validation; + +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.vespa.model.VespaModel; + +import java.util.logging.Level; + +/** + * Validates that the container node flavour has enough resources to run configured ONNX models. + * + * @author bjorncs + */ +public class JvmHeapSizeValidator extends Validator { + + @Override + public void validate(VespaModel model, DeployState ds) { + if (!ds.featureFlags().dynamicHeapSize()) return; + if (!ds.isHostedTenantApplication(model.getAdmin().getApplicationType())) return; + + model.getContainerClusters().forEach((clusterId, appCluster) -> { + var mp = appCluster.getMemoryPercentage().orElse(null); + if (mp == null) return; + if (mp.availableMemoryGb().isEmpty()) { + ds.getDeployLogger().log(Level.FINE, "Host resources unknown or percentage overridden with 'allocated-memory'"); + return; + } + long jvmModelCost = appCluster.onnxModelCost().aggregatedModelCostInBytes(); + if (jvmModelCost > 0) { + int percentLimit = 15; + double gbLimit = 0.6; + double availableMemoryGb = mp.availableMemoryGb().getAsDouble(); + double modelCostGb = jvmModelCost / (1024D * 1024 * 1024); + ds.getDeployLogger().log(Level.FINE, () -> "JVM: %d%% (limit: %d%%), %.2fGB (limit: %.2fGB), ONNX: %.2fGB" + .formatted(mp.percentage(), percentLimit, availableMemoryGb, gbLimit, modelCostGb)); + if (mp.percentage() < percentLimit) { + throw new IllegalArgumentException( + ("Allocated percentage of memory of JVM in cluster '%s' is too low (%d%% < %d%%). " + + "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " + + "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).") + .formatted(clusterId, mp.percentage(), percentLimit, modelCostGb)); + } + if (availableMemoryGb < gbLimit) { + throw new IllegalArgumentException( + ("Allocated memory to JVM in cluster '%s' is too low (%.2fGB < %.2fGB). " + + "Estimated cost of ONNX models is %.2fGB. Either use a node flavor with more memory or use less expensive models. " + + "You may override this validation by specifying 'allocated-memory' (https://docs.vespa.ai/en/performance/container-tuning.html#jvm-heap-size).") + .formatted(clusterId, availableMemoryGb, gbLimit, modelCostGb)); + } + } + }); + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java new file mode 100644 index 00000000000..d9dd3729bd3 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/UrlConfigValidator.java @@ -0,0 +1,50 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.application.validation; + +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.vespa.model.VespaModel; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; + +/** + * Validates that config using s3:// urls is used in public system and with nodes that are exclusive. + * + * @author hmusum + */ +public class UrlConfigValidator extends Validator { + + @Override + public void validate(VespaModel model, DeployState state) { + if (! state.isHostedTenantApplication(model.getAdmin().getApplicationType())) return; + + model.getContainerClusters().forEach((__, cluster) -> { + var isExclusive = hasExclusiveNodes(model, cluster); + validateS3UlsInConfig(state, cluster, isExclusive); + }); + } + + private static boolean hasExclusiveNodes(VespaModel model, ApplicationContainerCluster cluster) { + return model.hostSystem().getHosts() + .stream() + .flatMap(hostResource -> hostResource.spec().membership().stream()) + .filter(membership -> membership.cluster().id().equals(cluster.id())) + .anyMatch(membership -> membership.cluster().isExclusive()); + } + + private static void validateS3UlsInConfig(DeployState state, ApplicationContainerCluster cluster, boolean isExclusive) { + if (hasS3UrlInConfig(cluster)) { + // TODO: Would be even better if we could add which config/field the url is set for in the error message + String message = "Found s3:// urls in config for container cluster " + cluster.getName(); + if ( ! state.zone().system().isPublic()) + throw new IllegalArgumentException(message + ". This is only supported in public systems"); + else if ( ! isExclusive) + throw new IllegalArgumentException(message + ". Nodes in the cluster need to be 'exclusive'," + + " see https://cloud.vespa.ai/en/reference/services#nodes"); + } + } + + private static boolean hasS3UrlInConfig(ApplicationContainerCluster cluster) { + return cluster.userConfiguredUrls().all().stream() + .anyMatch(url -> url.startsWith("s3://")); + } + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java index 53a553ee624..30aafe67be7 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/Validation.java @@ -87,6 +87,8 @@ public class Validation { new AccessControlFilterExcludeValidator().validate(model, deployState); new CloudUserFilterValidator().validate(model, deployState); new CloudHttpConnectorValidator().validate(model, deployState); + new UrlConfigValidator().validate(model, deployState); + new JvmHeapSizeValidator().validate(model, deployState); additionalValidators.forEach(v -> v.validate(model, deployState)); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java index bbfa939f8a3..f265f2d09a0 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/change/search/IndexingScriptChangeMessageBuilder.java @@ -7,6 +7,7 @@ import com.yahoo.schema.document.Matching; import com.yahoo.schema.document.MatchType; import com.yahoo.schema.document.NormalizeLevel; import com.yahoo.schema.document.Stemming; +import com.yahoo.schema.processing.NGramMatch; import com.yahoo.vespa.documentmodel.SummaryField; import com.yahoo.vespa.documentmodel.SummaryTransform; @@ -89,7 +90,7 @@ public class IndexingScriptChangeMessageBuilder { MatchType type = matching.getType(); String retval = type.getName(); if (type == MatchType.GRAM) { - retval += " (size " + matching.getGramSize() + ")"; + retval += " (size " + matching.getGramSize().orElse(NGramMatch.DEFAULT_GRAM_SIZE) + ")"; } return retval; } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java index 7501f6162c7..9ecd359f90d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java @@ -7,11 +7,12 @@ import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.osgi.provider.model.ComponentModel; import com.yahoo.text.XML; -import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; -import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import com.yahoo.vespa.model.container.component.BertEmbedder; import com.yahoo.vespa.model.container.component.ColBertEmbedder; import com.yahoo.vespa.model.container.component.Component; +import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; +import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; import com.yahoo.vespa.model.container.xml.BundleInstantiationSpecificationBuilder; import org.w3c.dom.Element; @@ -35,19 +36,20 @@ public class DomComponentBuilder extends VespaDomBuilder.DomConfigProducerBuilde @Override protected Component<? super Component<?, ?>, ?> doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) { - var component = buildComponent(spec, deployState); + var component = buildComponent(spec, deployState, ancestor); addChildren(deployState, ancestor, spec, component); return component; } - private Component<? super Component<?, ?>, ?> buildComponent(Element spec, DeployState state) { + private Component<? super Component<?, ?>, ?> buildComponent( + Element spec, DeployState state, TreeConfigProducer<AnyConfigProducer> ancestor) { if (spec.hasAttribute("type")) { var type = spec.getAttribute("type"); return switch (type) { - case "hugging-face-embedder" -> new HuggingFaceEmbedder(spec, state); + case "hugging-face-embedder" -> new HuggingFaceEmbedder((ApplicationContainerCluster)ancestor, spec, state); case "hugging-face-tokenizer" -> new HuggingFaceTokenizer(spec, state); - case "bert-embedder" -> new BertEmbedder(spec, state); - case "colbert-embedder" -> new ColBertEmbedder(spec, state); + case "colbert-embedder" -> new ColBertEmbedder((ApplicationContainerCluster)ancestor, spec, state); + case "bert-embedder" -> new BertEmbedder((ApplicationContainerCluster)ancestor, spec, state); default -> throw new IllegalArgumentException("Unknown component type '%s'".formatted(type)); }; } else { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/clients/ContainerDocumentApi.java b/config-model/src/main/java/com/yahoo/vespa/model/clients/ContainerDocumentApi.java index 0795fdf41d6..762c670039c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/clients/ContainerDocumentApi.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/clients/ContainerDocumentApi.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.clients; +import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig; import com.yahoo.osgi.provider.model.ComponentModel; import com.yahoo.vespa.model.container.ContainerCluster; @@ -10,6 +11,7 @@ import com.yahoo.vespa.model.container.component.BindingPattern; import com.yahoo.vespa.model.container.component.Handler; import com.yahoo.vespa.model.container.component.SystemBindingPattern; import com.yahoo.vespa.model.container.component.UserBindingPattern; +import org.w3c.dom.Element; import java.nio.file.Path; import java.util.Collection; @@ -29,10 +31,11 @@ public class ContainerDocumentApi { private final boolean ignoreUndefinedFields; - public ContainerDocumentApi(ContainerCluster<?> cluster, HandlerOptions handlerOptions, boolean ignoreUndefinedFields, Set<Integer> portOverride) { + public ContainerDocumentApi(DeployState ds, ContainerCluster<?> cluster, HandlerOptions handlerOptions, + boolean ignoreUndefinedFields, Set<Integer> portOverride) { this.ignoreUndefinedFields = ignoreUndefinedFields; addRestApiHandler(cluster, handlerOptions, portOverride); - addFeedHandler(cluster, handlerOptions, portOverride); + addFeedHandler(ds, cluster, handlerOptions, portOverride); addVespaClientContainerBundle(cluster); } @@ -40,9 +43,9 @@ public class ContainerDocumentApi { c.addPlatformBundle(VESPACLIENT_CONTAINER_BUNDLE); } - private static void addFeedHandler(ContainerCluster<?> cluster, HandlerOptions handlerOptions, Set<Integer> portOverride) { + private static void addFeedHandler(DeployState ds, ContainerCluster<?> cluster, HandlerOptions handlerOptions, Set<Integer> portOverride) { String bindingSuffix = ContainerCluster.RESERVED_URI_PREFIX + "/feedapi"; - var executor = new Threadpool("feedapi-handler", handlerOptions.feedApiThreadpoolOptions); + var executor = new Threadpool(ds, "feedapi-handler", handlerOptions.feedApiThreadpoolOptions); var handler = newVespaClientHandler("com.yahoo.vespa.http.server.FeedHandler", bindingSuffix, handlerOptions, executor, portOverride); cluster.addComponent(handler); @@ -104,9 +107,9 @@ public class ContainerDocumentApi { public static final class HandlerOptions { private final Collection<String> bindings; - private final ContainerThreadpool.UserOptions feedApiThreadpoolOptions; + private final Element feedApiThreadpoolOptions; - public HandlerOptions(Collection<String> bindings, ContainerThreadpool.UserOptions feedApiThreadpoolOptions) { + public HandlerOptions(Collection<String> bindings, Element feedApiThreadpoolOptions) { this.bindings = Collections.unmodifiableCollection(bindings); this.feedApiThreadpoolOptions = feedApiThreadpoolOptions; } @@ -114,9 +117,7 @@ public class ContainerDocumentApi { private static class Threadpool extends ContainerThreadpool { - Threadpool(String name, ContainerThreadpool.UserOptions threadpoolOptions) { - super(name, threadpoolOptions); - } + Threadpool(DeployState ds, String name, Element xml) { super(ds, name, xml); } @Override protected void setDefaultConfigValues(ContainerThreadpoolConfig.Builder builder) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java index b9021912244..ac679cc406c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java @@ -8,14 +8,14 @@ import com.yahoo.component.ComponentId; import com.yahoo.component.ComponentSpecification; import com.yahoo.config.FileReference; import com.yahoo.config.application.api.ComponentInfo; +import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ApplicationClusterInfo; -import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.Model; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.config.provision.AllocatedHosts; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostSpec; import com.yahoo.container.bundle.BundleInstantiationSpecification; import com.yahoo.container.di.config.ApplicationBundlesConfig; @@ -43,10 +43,12 @@ import com.yahoo.vespa.model.filedistribution.UserConfiguredFiles; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.logging.Level; import java.util.stream.Collectors; import static com.yahoo.vespa.model.container.docproc.DocprocChains.DOCUMENT_TYPE_MANAGER_CLASS; @@ -82,6 +84,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private final Set<FileReference> applicationBundles = new LinkedHashSet<>(); private final Set<String> previousHosts; + private final OnnxModelCost.Calculator onnxModelCost; + private final DeployLogger logger; private ContainerModelEvaluation modelEvaluation; @@ -92,6 +96,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private int zookeeperSessionTimeoutSeconds = 30; private final int transport_events_before_wakeup; private final int transport_connections_per_target; + private final boolean dynamicHeapSize; /** The heap size % of total memory available to the JVM process. */ private final int heapSizePercentageOfAvailableMemory; @@ -100,9 +105,12 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private List<ApplicationClusterEndpoint> endpoints = List.of(); + private final UserConfiguredUrls userConfiguredUrls = new UserConfiguredUrls(); + public ApplicationContainerCluster(TreeConfigProducer<?> parent, String configSubId, String clusterId, DeployState deployState) { super(parent, configSubId, clusterId, deployState, true, 10); this.tlsClientAuthority = deployState.tlsClientAuthority(); + dynamicHeapSize = deployState.featureFlags().dynamicHeapSize(); previousHosts = Collections.unmodifiableSet(deployState.getPreviousModel().stream() .map(Model::allocatedHosts) .map(AllocatedHosts::getHosts) @@ -125,8 +133,13 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0 ? Math.min(99, deployState.featureFlags().heapSizePercentage()) : defaultHeapSizePercentageOfAvailableMemory; + onnxModelCost = deployState.onnxModelCost().newCalculator( + deployState.getApplicationPackage(), deployState.getDeployLogger()); + logger = deployState.getDeployLogger(); } + public UserConfiguredUrls userConfiguredUrls() { return userConfiguredUrls; } + @Override protected void doPrepare(DeployState deployState) { super.doPrepare(deployState); @@ -147,7 +160,10 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat if (containers.isEmpty()) return; // Files referenced from user configs to all components. - UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(), deployState.getDeployLogger()); + UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(), + deployState.getDeployLogger(), + deployState.featureFlags(), + userConfiguredUrls); for (Component<?, ?> component : getAllComponents()) { files.register(component); } @@ -182,19 +198,25 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat public void setMemoryPercentage(Integer memoryPercentage) { this.memoryPercentage = memoryPercentage; } @Override - public Optional<Integer> getMemoryPercentage() { - if (memoryPercentage != null) return Optional.of(memoryPercentage); + public Optional<JvmMemoryPercentage> getMemoryPercentage() { + if (memoryPercentage != null) return Optional.of(JvmMemoryPercentage.of(memoryPercentage)); if (isHostedVespa()) { int availableMemoryPercentage = getHostClusterId().isPresent() ? heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster : heapSizePercentageOfAvailableMemory; - if (getContainers().isEmpty()) return Optional.of(availableMemoryPercentage); // Node memory is not known + if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known // Node memory is known so convert available memory percentage to node memory percentage - double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb(); - double availableMemory = totalMemory - Host.memoryOverheadGb; - return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage)); + double totalMemory = dynamicHeapSize + ? getContainers().stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow() + : getContainers().get(0).getHostResource().realResources().memoryGb(); + double jvmHeapDeductionGb = dynamicHeapSize ? onnxModelCost.aggregatedModelCostInBytes() / (1024D * 1024 * 1024) : 0; + double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb); + int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage); + logger.log(Level.FINE, () -> "memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f" + .formatted(memoryPercentage, availableMemory, totalMemory, availableMemoryPercentage, jvmHeapDeductionGb)); + return Optional.of(JvmMemoryPercentage.of(memoryPercentage, availableMemory)); } return Optional.empty(); } @@ -203,49 +225,23 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private void createEndpoints(DeployState deployState) { if (!deployState.isHosted()) return; if (deployState.getProperties().applicationId().instance().isTester()) return; + // Add endpoints provided by the controller + List<String> hosts = getContainers().stream().map(AbstractService::getHostName).sorted().toList(); List<ApplicationClusterEndpoint> endpoints = new ArrayList<>(); - - List<String> hosts = getContainers().stream() - .map(AbstractService::getHostName) - .sorted() - .toList(); - - Set<ContainerEndpoint> endpointsFromController = deployState.getEndpoints(); - // Add zone-scoped endpoints if not provided by the controller - // TODO(mpolden): Remove this when controller always includes zone-scope endpoints, and config models < 8.230 are gone - if (endpointsFromController.stream().noneMatch(endpoint -> endpoint.scope() == ApplicationClusterEndpoint.Scope.zone)) { - for (String suffix : deployState.getProperties().zoneDnsSuffixes()) { - ApplicationClusterEndpoint.DnsName l4Name = ApplicationClusterEndpoint.DnsName.sharedL4NameFrom( - deployState.zone().system(), - ClusterSpec.Id.from(getName()), - deployState.getProperties().applicationId(), - suffix); - endpoints.add(ApplicationClusterEndpoint.builder() - .zoneScope() - .sharedL4Routing() - .dnsName(l4Name) - .hosts(hosts) - .clusterId(getName()) - .authMethod(ApplicationClusterEndpoint.AuthMethod.mtls) - .build()); - } - } - - // Include all endpoints provided by controller - endpointsFromController.stream() - .filter(ce -> ce.clusterId().equals(getName())) - .forEach(ce -> ce.names().forEach( - name -> endpoints.add(ApplicationClusterEndpoint.builder() - .scope(ce.scope()) - .weight(ce.weight().orElse(1)) // Default to weight=1 if not set - .routingMethod(ce.routingMethod()) - .dnsName(ApplicationClusterEndpoint.DnsName.from(name)) - .hosts(hosts) - .clusterId(getName()) - .authMethod(ce.authMethod()) - .build()) - )); - this.endpoints = List.copyOf(endpoints); + deployState.getEndpoints().stream() + .filter(ce -> ce.clusterId().equals(getName())) + .forEach(ce -> ce.names().forEach( + name -> endpoints.add(ApplicationClusterEndpoint.builder() + .scope(ce.scope()) + .weight(ce.weight().orElse(1)) + .routingMethod(ce.routingMethod()) + .dnsName(ApplicationClusterEndpoint.DnsName.from(name)) + .hosts(hosts) + .clusterId(getName()) + .authMethod(ce.authMethod()) + .build()) + )); + this.endpoints = Collections.unmodifiableList(endpoints); } @Override @@ -299,12 +295,15 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public void getConfig(QrStartConfig.Builder builder) { super.getConfig(builder); + var memoryPct = getMemoryPercentage().orElse(null); + int heapsize = memoryPct != null && memoryPct.availableMemoryGb().isPresent() + ? (int) (memoryPct.availableMemoryGb().getAsDouble() * 1024) : 1536; builder.jvm.verbosegc(true) .availableProcessors(0) .compressedClassSpaceSize(0) - .minHeapsize(1536) - .heapsize(1536); - getMemoryPercentage().ifPresent(percentage -> builder.jvm.heapSizeAsPercentageOfPhysicalMemory(percentage)); + .minHeapsize(heapsize) + .heapsize(heapsize); + if (memoryPct != null) builder.jvm.heapSizeAsPercentageOfPhysicalMemory(memoryPct.percentage()); } @Override @@ -373,6 +372,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public String name() { return getName(); } + public OnnxModelCost.Calculator onnxModelCost() { return onnxModelCost; } + public static class MbusParams { // the amount of the maxpendingbytes to process concurrently, typically 0.2 (20%) final Double maxConcurrentFactor; @@ -390,4 +391,14 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat } } + public static class UserConfiguredUrls { + + private final Set<String> urls = new HashSet<>(); + + public void add(String url) { urls.add(url); } + + public Set<String> all() { return urls; } + + } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java index 6bbc24e8739..3d4ec51c8d2 100755 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java @@ -62,6 +62,7 @@ import com.yahoo.vespa.model.container.search.ContainerSearch; import com.yahoo.vespa.model.container.search.searchchain.SearchChains; import com.yahoo.vespa.model.content.Content; import com.yahoo.vespa.model.search.SearchCluster; + import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; @@ -71,6 +72,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.OptionalDouble; import java.util.Set; import java.util.TreeSet; @@ -142,7 +144,7 @@ public abstract class ContainerCluster<CONTAINER extends Container> private ContainerDocproc containerDocproc; private ContainerDocumentApi containerDocumentApi; private SecretStore secretStore; - private final ContainerThreadpool defaultHandlerThreadpool = new Handler.DefaultHandlerThreadpool(); + private final ContainerThreadpool defaultHandlerThreadpool; private boolean rpcServerEnabled = true; private boolean httpServerEnabled = true; @@ -185,6 +187,7 @@ public abstract class ContainerCluster<CONTAINER extends Container> addCommonVespaBundles(); addSimpleComponent(VoidRequestLog.class); addComponent(new DefaultThreadpoolProvider(this, defaultPoolNumThreads)); + defaultHandlerThreadpool = new Handler.DefaultHandlerThreadpool(deployState, null); addComponent(defaultHandlerThreadpool); addSimpleComponent(com.yahoo.concurrent.classlock.ClassLocking.class); addSimpleComponent("com.yahoo.container.jdisc.metric.MetricConsumerProviderProvider"); @@ -718,5 +721,11 @@ public abstract class ContainerCluster<CONTAINER extends Container> * Returns the percentage of host physical memory this application has specified for nodes in this cluster, * or empty if this is not specified by the application. */ - public Optional<Integer> getMemoryPercentage() { return Optional.empty(); } + public record JvmMemoryPercentage(int percentage, OptionalDouble availableMemoryGb) { + static JvmMemoryPercentage of(int percentage) { return new JvmMemoryPercentage(percentage, OptionalDouble.empty()); } + static JvmMemoryPercentage of(int percentage, double availableMemoryGb) { + return new JvmMemoryPercentage(percentage, OptionalDouble.of(availableMemoryGb)); + } + } + public Optional<JvmMemoryPercentage> getMemoryPercentage() { return Optional.empty(); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java index 906ef739ef1..1b47f59653e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java @@ -45,10 +45,6 @@ public class ContainerModelEvaluation implements private final RankProfileList rankProfileList; private final FileDistributedOnnxModels onnxModels; // For cluster specific ONNX model settings - public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList) { - this(cluster, rankProfileList, null); - } - public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList, FileDistributedOnnxModels onnxModels) { this.rankProfileList = Objects.requireNonNull(rankProfileList, "rankProfileList cannot be null"); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java index fb4e62f5cd1..4b85c384951 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java @@ -1,16 +1,17 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container; +import com.yahoo.config.model.builder.xml.XmlHelper; +import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.bundle.BundleInstantiationSpecification; import com.yahoo.container.handler.threadpool.ContainerThreadPool; import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig; import com.yahoo.container.handler.threadpool.ContainerThreadpoolImpl; import com.yahoo.osgi.provider.model.ComponentModel; -import com.yahoo.text.XML; import com.yahoo.vespa.model.container.component.SimpleComponent; import org.w3c.dom.Element; -import java.util.Optional; +import java.util.logging.Level; /** * Component definition for a {@link java.util.concurrent.Executor} using {@link ContainerThreadPool}. @@ -20,18 +21,50 @@ import java.util.Optional; public abstract class ContainerThreadpool extends SimpleComponent implements ContainerThreadpoolConfig.Producer { private final String name; - private final UserOptions userOptions; + private final UserOptions options; - public ContainerThreadpool(String name, UserOptions userOptions) { + record UserOptions(Double max, Double min, Double queue){} + + protected ContainerThreadpool(DeployState ds, String name, Element parent) { super(new ComponentModel( BundleInstantiationSpecification.fromStrings( "threadpool@" + name, ContainerThreadpoolImpl.class.getName(), null))); this.name = name; - this.userOptions = userOptions; + var threadpoolElem = XmlHelper.getOptionalChild(parent, "threadpool").orElse(null); + if (threadpoolElem == null) options = new UserOptions(null, null, null); + else { + // TODO Vespa 9 Remove min-threads, max-threads and queue-size + Double max = null; + Double min = null; + Double queue = null; + var minElem = XmlHelper.getOptionalChild(threadpoolElem, "min-threads").orElse(null); + if (minElem != null) ds.getDeployLogger().logApplicationPackage(Level.WARNING, "For <threadpool>: <min-threads> is deprecated, use <threads> instead"); + var maxElem = XmlHelper.getOptionalChild(threadpoolElem, "max-threads").orElse(null); + if (maxElem != null) ds.getDeployLogger().logApplicationPackage(Level.WARNING, "For <threadpool>: <max-threads> is deprecated, use <threads> with 'boost' instead"); + var queueElem = XmlHelper.getOptionalChild(threadpoolElem, "queue").orElse(null); + var queueSizeElem = XmlHelper.getOptionalChild(threadpoolElem, "queue-size").orElse(null); + if (queueSizeElem != null) ds.getDeployLogger().logApplicationPackage(Level.WARNING, "For <threadpool>: <queue-size> is deprecated, use <queue> instead"); + var threadsElem = XmlHelper.getOptionalChild(threadpoolElem, "threads").orElse(null); + if (threadsElem != null) { + min = parseMultiplier(threadsElem.getTextContent()); + max = threadsElem.hasAttribute("boost") ? parseMultiplier(threadsElem.getAttribute("boost")) : min; + } else if (minElem != null) { + min = parseFixed(minElem.getTextContent()); + } + if (max == null && maxElem != null) { + max = parseFixed(maxElem.getTextContent()); + } + if (queueElem != null) queue = parseMultiplier(queueElem.getTextContent()); + else if (queueSizeElem != null) queue = parseFixed(queueSizeElem.getTextContent()); + options = new UserOptions(max, min, queue); + } } + private static Double parseMultiplier(String text) { return -parseFixed(text); } + private static Double parseFixed(String text) { return Double.parseDouble(text); } + // Must be implemented by subclasses to set values that may be overridden by user options. protected abstract void setDefaultConfigValues(ContainerThreadpoolConfig.Builder builder); @@ -40,35 +73,20 @@ public abstract class ContainerThreadpool extends SimpleComponent implements Con setDefaultConfigValues(builder); builder.name(this.name); - if (userOptions != null) { - builder.maxThreads(userOptions.maxThreads); - builder.minThreads(userOptions.minThreads); - builder.queueSize(userOptions.queueSize); + if (options.max() != null) { + int max = (int) Math.round(options.max()); + if (options.max() != 0 && max == 0) max = options.max() > 0 ? 1 : -1; + builder.maxThreads(max); } - } - - public static class UserOptions { - private final int maxThreads; - private final int minThreads; - private final int queueSize; - - private UserOptions(int maxThreads, int minThreads, int queueSize) { - this.maxThreads = maxThreads; - this.minThreads = minThreads; - this.queueSize = queueSize; - } - - public static Optional<UserOptions> fromXml(Element xml) { - Element element = XML.getChild(xml, "threadpool"); - if (element == null) return Optional.empty(); - return Optional.of(new UserOptions( - intOption(element, "max-threads"), - intOption(element, "min-threads"), - intOption(element, "queue-size"))); + if (options.min() != null) { + int min = (int) Math.round(options.min()); + if (options.min() != 0 && min == 0) min = options.min() > 0 ? 1 : -1; + builder.minThreads(min); } - - private static int intOption(Element element, String name) { - return Integer.parseInt(XML.getChild(element, name).getTextContent()); + if (options.queue() != null) { + int queue = (int) Math.round(options.queue()); + if (options.queue() != 0 && queue == 0) queue = options.queue() > 0 ? 1 : -1; + builder.queueSize(queue); } } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java index 205848e1b67..d02b7d0de5f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java @@ -5,10 +5,9 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.BertBaseEmbedderConfig; -import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.w3c.dom.Element; -import static com.yahoo.text.XML.getChild; import static com.yahoo.text.XML.getChildValue; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; @@ -17,8 +16,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI */ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConfig.Producer { - private final ModelReference model; - private final ModelReference vocab; + private final ModelReference modelRef; + private final ModelReference vocabRef; private final Integer maxTokens; private final String transformerInputIds; private final String transformerAttentionMask; @@ -33,10 +32,11 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf private final Integer onnxGpuDevice; - public BertEmbedder(Element xml, DeployState state) { + public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml); - model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state); - vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state); + var model = Model.fromXml(state, xml, "transformer-model").orElseThrow(); + modelRef = model.modelReference(); + vocabRef = Model.fromXml(state, xml, "tokenizer-vocab").orElseThrow().modelReference(); maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null); transformerInputIds = getChildValue(xml, "transformer-input-ids").orElse(null); transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null); @@ -49,11 +49,12 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); + model.registerOnnxModelCost(cluster); } @Override public void getConfig(BertBaseEmbedderConfig.Builder b) { - b.transformerModel(model).tokenizerVocab(vocab); + b.transformerModel(modelRef).tokenizerVocab(vocabRef); if (maxTokens != null) b.transformerMaxTokens(maxTokens); if (transformerInputIds != null) b.transformerInputIds(transformerInputIds); if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java index c0fdfe3dc64..66e3b1c9dfd 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java @@ -5,13 +5,9 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.ColBertEmbedderConfig; -import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; -import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.w3c.dom.Element; -import java.util.Optional; - -import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChild; import static com.yahoo.text.XML.getChildValue; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; @@ -20,8 +16,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI * @author bergum */ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderConfig.Producer { - private final ModelReference model; - private final ModelReference vocab; + private final ModelReference modelRef; + private final ModelReference vocabRef; private final Integer maxQueryTokens; @@ -40,13 +36,13 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo private final Integer onnxIntraopThreads; private final Integer onnxGpuDevice; - public ColBertEmbedder(Element xml, DeployState state) { + public ColBertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.ColBertEmbedder", INTEGRATION_BUNDLE_NAME, xml); - var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); - model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); - vocab = getOptionalChild(xml, "tokenizer-model") - .map(elem -> ModelIdResolver.resolveToModelReference(elem, state)) - .orElseGet(() -> resolveDefaultVocab(transformerModelElem, state)); + var model = Model.fromXml(state, xml, "transformer-model").orElseThrow(); + modelRef = model.modelReference(); + vocabRef = Model.fromXml(state, xml, "tokenizer-model") + .map(Model::modelReference) + .orElseGet(() -> resolveDefaultVocab(model, state)); maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null); maxQueryTokens = getChildValue(xml, "max-query-tokens").map(Integer::parseInt).orElse(null); maxDocumentTokens = getChildValue(xml, "max-document-tokens").map(Integer::parseInt).orElse(null); @@ -60,21 +56,20 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); - + model.registerOnnxModelCost(cluster); } - private static ModelReference resolveDefaultVocab(Element model, DeployState state) { - if (state.isHosted() && model.hasAttribute("model-id")) { - var implicitVocabId = model.getAttribute("model-id") + "-vocab"; - return ModelIdResolver.resolveToModelReference( - "tokenizer-model", Optional.of(implicitVocabId), Optional.empty(), Optional.empty(), state); + private static ModelReference resolveDefaultVocab(Model model, DeployState state) { + var modelId = model.modelId().orElse(null); + if (state.isHosted() && modelId != null) { + return Model.fromParams(state, model.name(), modelId + "-vocab", null, null).modelReference(); } throw new IllegalArgumentException("'tokenizer-model' must be specified"); } @Override public void getConfig(ColBertEmbedderConfig.Builder b) { - b.transformerModel(model).tokenizerPath(vocab); + b.transformerModel(modelRef).tokenizerPath(vocabRef); if (maxTokens != null) b.transformerMaxTokens(maxTokens); if (transformerInputIds != null) b.transformerInputIds(transformerInputIds); if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java index 31031aa5bf2..969db6553e6 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java @@ -1,9 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.component; +import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig; import com.yahoo.osgi.provider.model.ComponentModel; import com.yahoo.vespa.model.container.ContainerThreadpool; +import org.w3c.dom.Element; import java.util.ArrayList; import java.util.Arrays; @@ -76,8 +78,8 @@ public class Handler extends Component<Component<?, ?>, ComponentModel> { */ public static class DefaultHandlerThreadpool extends ContainerThreadpool { - public DefaultHandlerThreadpool() { - super("default-handler-common", null); + public DefaultHandlerThreadpool(DeployState ds, Element options) { + super(ds, "default-handler-common", options); } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java index f4017339699..af47bee137a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java @@ -5,12 +5,9 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; -import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.w3c.dom.Element; -import java.util.Optional; - -import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChild; import static com.yahoo.text.XML.getChildValue; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; @@ -19,8 +16,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI * @author bjorncs */ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEmbedderConfig.Producer { - private final ModelReference model; - private final ModelReference vocab; + private final ModelReference modelRef; + private final ModelReference vocabRef; private final Integer maxTokens; private final String transformerInputIds; private final String transformerAttentionMask; @@ -33,13 +30,13 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm private final Integer onnxGpuDevice; private final String poolingStrategy; - public HuggingFaceEmbedder(Element xml, DeployState state) { + public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml); - var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); - model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); - vocab = getOptionalChild(xml, "tokenizer-model") - .map(elem -> ModelIdResolver.resolveToModelReference(elem, state)) - .orElseGet(() -> resolveDefaultVocab(transformerModelElem, state)); + var model = Model.fromXml(state, xml, "transformer-model").orElseThrow(); + modelRef = model.modelReference(); + vocabRef = Model.fromXml(state, xml, "tokenizer-model") + .map(Model::modelReference) + .orElseGet(() -> resolveDefaultVocab(model, state)); maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null); transformerInputIds = getChildValue(xml, "transformer-input-ids").orElse(null); transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null); @@ -51,20 +48,20 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null); + model.registerOnnxModelCost(cluster); } - private static ModelReference resolveDefaultVocab(Element model, DeployState state) { - if (state.isHosted() && model.hasAttribute("model-id")) { - var implicitVocabId = model.getAttribute("model-id") + "-vocab"; - return ModelIdResolver.resolveToModelReference( - "tokenizer-model", Optional.of(implicitVocabId), Optional.empty(), Optional.empty(), state); + private static ModelReference resolveDefaultVocab(Model model, DeployState state) { + var modelId = model.modelId().orElse(null); + if (state.isHosted() && modelId != null) { + return Model.fromParams(state, model.name(), modelId + "-vocab", null, null).modelReference(); } throw new IllegalArgumentException("'tokenizer-model' must be specified"); } @Override public void getConfig(HuggingFaceEmbedderConfig.Builder b) { - b.transformerModel(model).tokenizerPath(vocab); + b.transformerModel(modelRef).tokenizerPath(vocabRef); if (maxTokens != null) b.transformerMaxTokens(maxTokens); if (transformerInputIds != null) b.transformerInputIds(transformerInputIds); if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java index 0bf5491e872..e9ac93caa68 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java @@ -7,7 +7,6 @@ import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig; import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Padding; import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Truncation; import com.yahoo.text.XML; -import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; import java.util.Map; @@ -26,7 +25,7 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT super("com.yahoo.language.huggingface.HuggingFaceTokenizer", LINGUISTICS_BUNDLE_NAME, xml); for (Element element : XML.getChildren(xml, "model")) { var lang = element.hasAttribute("language") ? element.getAttribute("language") : "unknown"; - langToModel.put(lang, ModelIdResolver.resolveToModelReference(element, state)); + langToModel.put(lang, Model.fromXml(state, element).modelReference()); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java new file mode 100644 index 00000000000..76d93c38aee --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java @@ -0,0 +1,69 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.container.component; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.application.api.ApplicationFile; +import com.yahoo.config.model.builder.xml.XmlHelper; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.path.Path; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; +import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import org.w3c.dom.Element; + +import java.net.URI; +import java.util.Objects; +import java.util.Optional; + +/** + * Represents a model, e.g ONNX model for an embedder. + * + * @author bjorncs + */ +class Model { + private final String paramName; + private final String modelId; + private final URI url; + private final ApplicationFile file; + private final ModelReference ref; + + private Model(DeployState ds, String paramName, String modelId, URI url, Path file) { + this.paramName = Objects.requireNonNull(paramName); + if (modelId == null && url == null && file == null) + throw new IllegalArgumentException("At least one of 'model-id', 'url' or 'path' must be specified"); + this.modelId = modelId; + this.url = url; + this.file = file != null ? ds.getApplicationPackage().getFile(file) : null; + this.ref = ModelIdResolver.resolveToModelReference( + paramName, Optional.ofNullable(modelId), Optional.ofNullable(url).map(URI::toString), + Optional.ofNullable(file).map(Path::toString), ds); + } + + static Model fromParams(DeployState ds, String paramName, String modelId, URI url, Path file) { + return new Model(ds, paramName, modelId, url, file); + } + + static Optional<Model> fromXml(DeployState ds, Element parent, String name) { + return XmlHelper.getOptionalChild(parent, name).map(e -> fromXml(ds, e)); + } + + static Model fromXml(DeployState ds, Element model) { + var modelId = XmlHelper.getOptionalAttribute(model, "model-id").orElse(null); + var url = XmlHelper.getOptionalAttribute(model, "url").map(URI::create).orElse(null); + var path = XmlHelper.getOptionalAttribute(model, "path").map(Path::fromString).orElse(null); + return new Model(ds, model.getTagName(), modelId, url, path); + } + + void registerOnnxModelCost(ApplicationContainerCluster c) { + var resolvedUrl = resolvedUrl().orElse(null); + if (file != null) c.onnxModelCost().registerModel(file); + else if (resolvedUrl != null) c.onnxModelCost().registerModel(resolvedUrl); + } + + String name() { return paramName; } + Optional<String> modelId() { return Optional.ofNullable(modelId); } + Optional<URI> url() { return Optional.ofNullable(url); } + Optional<URI> resolvedUrl() { return ref.url().map(u -> URI.create(u.value())); } + Optional<ApplicationFile> file() { return Optional.ofNullable(file); } + ModelReference modelReference() { return ref; } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java index c0431d01784..2354298779d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java @@ -29,8 +29,6 @@ public class ChainedComponent<T extends ChainedComponentModel> extends Component private ComponentId namespace() { var owner = getParent().getParent(); - return (owner instanceof Chain) ? - ((Chain) owner).getGlobalComponentId() : - null; + return (owner instanceof Chain<?> chain) ? chain.getGlobalComponentId() : null; } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java index f0296d49472..3261d454b4f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.search; +import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.QrSearchersConfig; import com.yahoo.prelude.semantics.SemanticRulesConfig; @@ -56,12 +57,14 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> private QueryProfiles queryProfiles; private SemanticRules semanticRules; private PageTemplates pageTemplates; + private ApplicationPackage app; public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) { super(chains); this.globalPhase = deployState.featureFlags().enableGlobalPhase(); this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher(); this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState); + this.app = deployState.getApplicationPackage(); this.owningCluster = cluster; owningCluster.addComponent(Component.fromClassAndBundle(CompiledQueryProfileRegistry.class, SEARCH_AND_DOCPROC_BUNDLE)); @@ -96,6 +99,9 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue; var factory = new RankProfilesEvaluatorComponent(documentDb); if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) { + var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels(); + onnxModels.asMap().forEach( + (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath()))); owningCluster.addComponent(factory); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 35b0213bf59..1874b5fa19a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -39,6 +39,8 @@ import com.yahoo.container.jdisc.DataplaneProxyService; import com.yahoo.container.logging.AccessLog; import com.yahoo.container.logging.FileConnectionLog; import com.yahoo.io.IOUtils; +import com.yahoo.jdisc.http.filter.security.cloud.config.CloudTokenDataPlaneFilterConfig; +import com.yahoo.jdisc.http.filter.security.cloud.config.CloudTokenDataPlaneFilterConfig.Builder; import com.yahoo.jdisc.http.server.jetty.DataplaneProxyCredentials; import com.yahoo.jdisc.http.server.jetty.VoidRequestLog; import com.yahoo.osgi.provider.model.ComponentModel; @@ -68,7 +70,6 @@ import com.yahoo.vespa.model.container.Container; import com.yahoo.vespa.model.container.ContainerCluster; import com.yahoo.vespa.model.container.ContainerModel; import com.yahoo.vespa.model.container.ContainerModelEvaluation; -import com.yahoo.vespa.model.container.ContainerThreadpool; import com.yahoo.vespa.model.container.DataplaneProxy; import com.yahoo.vespa.model.container.IdentityProvider; import com.yahoo.vespa.model.container.PlatformBundles; @@ -240,10 +241,9 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private void addParameterStoreValidationHandler(ApplicationContainerCluster cluster, DeployState deployState) { + if ( ! deployState.isHosted()) return; // Always add platform bundle. Cannot be controlled by a feature flag as platform bundle cannot change. - if(deployState.isHosted()) { - cluster.addPlatformBundle(PlatformBundles.absoluteBundlePath("jdisc-cloud-aws")); - } + cluster.addPlatformBundle(PlatformBundles.absoluteBundlePath("jdisc-cloud-aws")); if (deployState.zone().system().isPublic()) { BindingPattern bindingPattern = SystemBindingPattern.fromHttpPath("/validate-secret-store"); Handler handler = new Handler( @@ -459,7 +459,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private static void addCloudDataPlaneFilter(DeployState deployState, ApplicationContainerCluster cluster) { if (!deployState.isHosted() || !deployState.zone().system().isPublic()) return; - var dataplanePort = getMtlsDataplanePort(deployState, cluster); + var dataplanePort = getMtlsDataplanePort(deployState); // Setup secure filter chain var secureChain = new HttpFilterChain("cloud-data-plane-secure", HttpFilterChain.Type.SYSTEM); secureChain.addInnerComponent(new CloudDataPlaneFilter(cluster, deployState)); @@ -594,7 +594,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { String serverName = server.getComponentId().getName(); // If the deployment contains certificate/private key reference, setup TLS port - var builder = HostedSslConnectorFactory.builder(serverName, getMtlsDataplanePort(state, cluster)) + var builder = HostedSslConnectorFactory.builder(serverName, getMtlsDataplanePort(state)) .proxyProtocol(true, state.getProperties().featureFlags().enableProxyProtocolMixedMode()) .tlsCiphersOverride(state.getProperties().tlsCiphersOverride()) .endpointConnectionTtl(state.getProperties().endpointConnectionTtl()); @@ -627,19 +627,19 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private void addCloudTokenSupport(DeployState state, ApplicationContainerCluster cluster) { var server = cluster.getHttp().getHttpServer().get(); - if (!enableTokenSupport(state, cluster)) return; + if (!enableTokenSupport(state)) return; Set<String> tokenEndpoints = tokenEndpoints(state).stream() .map(ContainerEndpoint::names) .flatMap(Collection::stream) .collect(Collectors.toSet()); var endpointCert = state.endpointCertificateSecrets().orElseThrow(); - int tokenPort = getTokenDataplanePort(state, cluster).orElseThrow(); + int tokenPort = getTokenDataplanePort(state).orElseThrow(); // Set up component to generate proxy cert if token support is enabled cluster.addSimpleComponent(DataplaneProxyCredentials.class); cluster.addSimpleComponent(DataplaneProxyService.class); var dataplaneProxy = new DataplaneProxy( - getMtlsDataplanePort(state, cluster), + getMtlsDataplanePort(state), tokenPort, endpointCert.certificate(), endpointCert.key(), @@ -659,13 +659,24 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { // Setup token filter chain var tokenChain = new HttpFilterChain("cloud-token-data-plane-secure", HttpFilterChain.Type.SYSTEM); - tokenChain.addInnerComponent(new CloudTokenDataPlaneFilter(cluster, state)); + var tokenFilter = new CloudTokenDataPlaneFilter(cluster, state); + tokenChain.addInnerComponent(tokenFilter); cluster.getHttp().getFilterChains().add(tokenChain); // Set as default filter for token port cluster.getHttp().getHttpServer().orElseThrow().getConnectorFactories().stream() .filter(c -> c.getListenPort() == tokenPort).findAny().orElseThrow() .setDefaultRequestFilterChain(tokenChain.getComponentId()); + + // Set up handler that tells what fingerprints are known to the container + class CloudTokenDataPlaneHandler extends Handler implements CloudTokenDataPlaneFilterConfig.Producer { + CloudTokenDataPlaneHandler() { + super(new ComponentModel("com.yahoo.jdisc.http.filter.security.cloud.CloudTokenDataPlaneHandler", null, "jdisc-security-filters", null)); + addServerBindings(SystemBindingPattern.fromHttpPortAndPath(Defaults.getDefaults().vespaWebServicePort(), "/data-plane-tokens/v1")); + } + @Override public void getConfig(Builder builder) { tokenFilter.getConfig(builder); } + } + cluster.addComponent(new CloudTokenDataPlaneHandler()); } // Returns the client certificates of the clients defined for an application cluster @@ -710,7 +721,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { } private Http buildHttp(DeployState deployState, ApplicationContainerCluster cluster, Element httpElement, ConfigModelContext context) { - Http http = new HttpBuilder(portBindingOverride(deployState, context, cluster)).build(deployState, cluster, httpElement); + Http http = new HttpBuilder(portBindingOverride(deployState, context)).build(deployState, cluster, httpElement); if (networking == Networking.disable) http.removeAllServers(); @@ -778,6 +789,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { !container.getHostResource().realResources().gpuResources().isZero()); onnxModel.setGpuDevice(gpuDevice, hasGpu); } + cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath())); } cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models)); @@ -815,7 +827,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { cluster.addSearchAndDocprocBundles(); addIncludes(processingElement); cluster.setProcessingChains(new DomProcessingBuilder(null).build(deployState, cluster, processingElement), - serverBindings(deployState, context, processingElement, ProcessingChains.defaultBindings, cluster).toArray(BindingPattern[]::new)); + serverBindings(deployState, context, processingElement, ProcessingChains.defaultBindings).toArray(BindingPattern[]::new)); validateAndAddConfiguredComponents(deployState, cluster, processingElement, "renderer", ContainerModelBuilder::validateRendererElement); } @@ -840,7 +852,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private void addUserHandlers(DeployState deployState, ApplicationContainerCluster cluster, Element spec, ConfigModelContext context) { for (Element component: XML.getChildren(spec, "handler")) { cluster.addComponent( - new DomHandlerBuilder(cluster, portBindingOverride(deployState, context, cluster)).build(deployState, cluster, component)); + new DomHandlerBuilder(cluster, portBindingOverride(deployState, context)).build(deployState, cluster, component)); } } @@ -1128,28 +1140,28 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private void addSearchHandler(DeployState deployState, ApplicationContainerCluster cluster, Element searchElement, ConfigModelContext context) { var bindingPatterns = List.<BindingPattern>of(SearchHandler.DEFAULT_BINDING); if (isHostedTenantApplication(context)) { - bindingPatterns = SearchHandler.bindingPattern(getDataplanePorts(deployState, cluster)); + bindingPatterns = SearchHandler.bindingPattern(getDataplanePorts(deployState)); } - SearchHandler searchHandler = new SearchHandler(cluster, - serverBindings(deployState, context, searchElement, bindingPatterns, cluster), - ContainerThreadpool.UserOptions.fromXml(searchElement).orElse(null)); + SearchHandler searchHandler = new SearchHandler(deployState, cluster, + serverBindings(deployState, context, searchElement, bindingPatterns), + searchElement); cluster.addComponent(searchHandler); // Add as child to SearchHandler to get the correct chains config. searchHandler.addComponent(Component.fromClassAndBundle(SearchHandler.EXECUTION_FACTORY, PlatformBundles.SEARCH_AND_DOCPROC_BUNDLE)); } - private List<BindingPattern> serverBindings(DeployState deployState, ConfigModelContext context, Element searchElement, Collection<BindingPattern> defaultBindings, ApplicationContainerCluster cluster) { + private List<BindingPattern> serverBindings(DeployState deployState, ConfigModelContext context, Element searchElement, Collection<BindingPattern> defaultBindings) { List<Element> bindings = XML.getChildren(searchElement, "binding"); if (bindings.isEmpty()) return List.copyOf(defaultBindings); - return toBindingList(deployState, context, bindings, cluster); + return toBindingList(deployState, context, bindings); } - private List<BindingPattern> toBindingList(DeployState deployState, ConfigModelContext context, List<Element> bindingElements, ApplicationContainerCluster cluster) { + private List<BindingPattern> toBindingList(DeployState deployState, ConfigModelContext context, List<Element> bindingElements) { List<BindingPattern> result = new ArrayList<>(); - var portOverride = isHostedTenantApplication(context) ? getDataplanePorts(deployState, cluster) : Set.<Integer>of(); + var portOverride = isHostedTenantApplication(context) ? getDataplanePorts(deployState) : Set.<Integer>of(); for (Element element: bindingElements) { String text = element.getTextContent().trim(); if (!text.isEmpty()) @@ -1173,13 +1185,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { ContainerDocumentApi.HandlerOptions documentApiOptions = DocumentApiOptionsBuilder.build(documentApiElement); Element ignoreUndefinedFields = XML.getChild(documentApiElement, "ignore-undefined-fields"); - return new ContainerDocumentApi(cluster, documentApiOptions, - "true".equals(XML.getValue(ignoreUndefinedFields)), portBindingOverride(deployState, context, cluster)); + return new ContainerDocumentApi(deployState, cluster, documentApiOptions, + "true".equals(XML.getValue(ignoreUndefinedFields)), portBindingOverride(deployState, context)); } - private Set<Integer> portBindingOverride(DeployState deployState, ConfigModelContext context, ApplicationContainerCluster cluster) { + private Set<Integer> portBindingOverride(DeployState deployState, ConfigModelContext context) { return isHostedTenantApplication(context) - ? getDataplanePorts(deployState, cluster) + ? getDataplanePorts(deployState) : Set.<Integer>of(); } @@ -1438,18 +1450,18 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { } - private static Set<Integer> getDataplanePorts(DeployState ds, ApplicationContainerCluster cluster) { - var tokenPort = getTokenDataplanePort(ds, cluster); - var mtlsPort = getMtlsDataplanePort(ds, cluster); + private static Set<Integer> getDataplanePorts(DeployState ds) { + var tokenPort = getTokenDataplanePort(ds); + var mtlsPort = getMtlsDataplanePort(ds); return tokenPort.isPresent() ? Set.of(mtlsPort, tokenPort.getAsInt()) : Set.of(mtlsPort); } - private static int getMtlsDataplanePort(DeployState ds, ApplicationContainerCluster cluster) { - return enableTokenSupport(ds, cluster) ? 8443 : 4443; + private static int getMtlsDataplanePort(DeployState ds) { + return enableTokenSupport(ds) ? 8443 : 4443; } - private static OptionalInt getTokenDataplanePort(DeployState ds, ApplicationContainerCluster cluster) { - return enableTokenSupport(ds, cluster) ? OptionalInt.of(8444) : OptionalInt.empty(); + private static OptionalInt getTokenDataplanePort(DeployState ds) { + return enableTokenSupport(ds) ? OptionalInt.of(8444) : OptionalInt.empty(); } private static Set<ContainerEndpoint> tokenEndpoints(DeployState deployState) { @@ -1458,7 +1470,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { .collect(Collectors.toSet()); } - private static boolean enableTokenSupport(DeployState state, ApplicationContainerCluster cluster) { + private static boolean enableTokenSupport(DeployState state) { Set<ContainerEndpoint> tokenEndpoints = tokenEndpoints(state); return state.isHosted() && state.zone().system().isPublic() && ! tokenEndpoints.isEmpty(); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java index bb1d0af1db9..cdbe62720b9 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.model.container.xml; import com.yahoo.text.XML; import com.yahoo.vespa.model.clients.ContainerDocumentApi; -import com.yahoo.vespa.model.container.ContainerThreadpool; import org.w3c.dom.Element; import java.util.ArrayList; @@ -19,13 +18,7 @@ public class DocumentApiOptionsBuilder { private static final Logger log = Logger.getLogger(DocumentApiOptionsBuilder.class.getName()); public static ContainerDocumentApi.HandlerOptions build(Element spec) { - return new ContainerDocumentApi.HandlerOptions(getBindings(spec), threadpoolOptions(spec, "http-client-api")); - } - - private static ContainerThreadpool.UserOptions threadpoolOptions(Element spec, String elementName) { - Element element = XML.getChild(spec, elementName); - if (element == null) return null; - return ContainerThreadpool.UserOptions.fromXml(element).orElse(null); + return new ContainerDocumentApi.HandlerOptions(getBindings(spec), XML.getChild(spec, "http-client-api")); } private static List<String> getBindings(Element spec) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java index be3ca0b8aa9..14216dd8855 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.model.container.xml; import com.yahoo.config.ModelReference; import com.yahoo.config.UrlReference; -import com.yahoo.config.model.builder.xml.XmlHelper; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.text.XML; import org.w3c.dom.Element; @@ -88,13 +87,6 @@ public class ModelIdResolver { } } - - public static ModelReference resolveToModelReference(Element elem, DeployState state) { - return resolveToModelReference( - elem.getTagName(), XmlHelper.getOptionalAttribute(elem, "model-id"), - XmlHelper.getOptionalAttribute(elem, "url"), XmlHelper.getOptionalAttribute(elem, "path"), state); - } - public static ModelReference resolveToModelReference( String paramName, Optional<String> id, Optional<String> url, Optional<String> path, DeployState state) { if (id.isEmpty()) return createModelReference(Optional.empty(), url, path, state); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java index 6cfef153fee..3cd296c1469 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.xml; +import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.bundle.BundleInstantiationSpecification; import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig; import com.yahoo.vespa.model.container.ApplicationContainerCluster; @@ -9,6 +10,7 @@ import com.yahoo.vespa.model.container.component.BindingPattern; import com.yahoo.vespa.model.container.component.SystemBindingPattern; import com.yahoo.vespa.model.container.component.chain.ProcessingHandler; import com.yahoo.vespa.model.container.search.searchchain.SearchChains; +import org.w3c.dom.Element; import java.util.Collection; import java.util.List; @@ -30,10 +32,11 @@ class SearchHandler extends ProcessingHandler<SearchChains> { static final BundleInstantiationSpecification HANDLER_SPEC = fromSearchAndDocproc(HANDLER_CLASSNAME); static final BindingPattern DEFAULT_BINDING = SystemBindingPattern.fromHttpPath("/search/*"); - SearchHandler(ApplicationContainerCluster cluster, + SearchHandler(DeployState ds, + ApplicationContainerCluster cluster, List<BindingPattern> bindings, - ContainerThreadpool.UserOptions threadpoolOptions) { - super(cluster.getSearchChains(), HANDLER_SPEC, new Threadpool(threadpoolOptions)); + Element threadpoolOptions) { + super(cluster.getSearchChains(), HANDLER_SPEC, new Threadpool(ds, threadpoolOptions)); bindings.forEach(this::addServerBindings); } @@ -46,8 +49,8 @@ class SearchHandler extends ProcessingHandler<SearchChains> { private static class Threadpool extends ContainerThreadpool { - Threadpool(UserOptions options) { - super("search-handler", options); + Threadpool(DeployState ds, Element options) { + super(ds, "search-handler", options); } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java index bb72eda7d04..d18309ef0af 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java @@ -256,7 +256,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem for (ContainerModel containerModel : containers) { Optional<String> hostClusterId = containerModel.getCluster().getHostClusterId(); if (hostClusterId.isPresent() && hostClusterId.get().equals(clusterId) && containerModel.getCluster().getMemoryPercentage().isPresent()) { - return containerModel.getCluster().getMemoryPercentage().get() * 0.01; + return containerModel.getCluster().getMemoryPercentage().get().percentage() * 0.01; } } return 0.0; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java b/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java index 8bed5e64bf5..8352a011b88 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/filedistribution/UserConfiguredFiles.java @@ -5,12 +5,14 @@ import com.yahoo.config.FileReference; import com.yahoo.config.ModelReference; import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.application.api.FileRegistry; +import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.UserConfigRepo; import com.yahoo.path.Path; import com.yahoo.vespa.config.ConfigDefinition; import com.yahoo.vespa.config.ConfigDefinitionKey; import com.yahoo.vespa.config.ConfigPayloadBuilder; + import com.yahoo.yolean.Exceptions; import java.io.File; @@ -21,19 +23,28 @@ import java.util.Map; import java.util.Optional; import java.util.logging.Level; +import static com.yahoo.vespa.model.container.ApplicationContainerCluster.UserConfiguredUrls; + /** * Utility methods for registering file distribution of files/paths/urls/models defined by the user. * * @author gjoranv + * @author hmusum */ public class UserConfiguredFiles implements Serializable { private final FileRegistry fileRegistry; private final DeployLogger logger; + private final UserConfiguredUrls userConfiguredUrls; + private final String unknownConfigDefinition; - public UserConfiguredFiles(FileRegistry fileRegistry, DeployLogger logger) { + public UserConfiguredFiles(FileRegistry fileRegistry, DeployLogger logger, + ModelContext.FeatureFlags featureFlags, + UserConfiguredUrls userConfiguredUrls) { this.fileRegistry = fileRegistry; this.logger = logger; + this.userConfiguredUrls = userConfiguredUrls; + this.unknownConfigDefinition = featureFlags.unknownConfigDefinition(); } /** @@ -47,7 +58,7 @@ public class UserConfiguredFiles implements Serializable { try { register(builder, registeredFiles, key); } catch (IllegalArgumentException e) { - throw new IllegalArgumentException("Unable to register file specified in services.xml for config '" + key + "': " + + throw new IllegalArgumentException("Invalid config in services.xml for '" + key + "': " + Exceptions.toMessageString(e)); } } @@ -56,9 +67,12 @@ public class UserConfiguredFiles implements Serializable { private void register(ConfigPayloadBuilder builder, Map<Path, FileReference> registeredFiles, ConfigDefinitionKey key) { ConfigDefinition configDefinition = builder.getConfigDefinition(); if (configDefinition == null) { - // TODO: throw new IllegalArgumentException("Unable to find config definition for " + builder); - logger.logApplicationPackage(Level.INFO, "Unable to find config definition " + key + - ". Will not register files for file distribution for this config"); + String message = "Unable to find config definition " + key + ". Will not register files for file distribution for this config"; + switch (unknownConfigDefinition) { + case "log" -> logger.logApplicationPackage(Level.INFO, message); + case "warning" -> logger.logApplicationPackage(Level.WARNING, message); + case "fail" -> throw new IllegalArgumentException("Unable to find config definition for " + key); + } return; } @@ -113,8 +127,7 @@ public class UserConfiguredFiles implements Serializable { ConfigPayloadBuilder fileEntry = builder.getObject(name); if (isEmptyOptionalPath(entry, fileEntry)) continue; if (fileEntry.getValue() == null || fileEntry.getValue().equals(".")) - throw new IllegalArgumentException("Unable to register file for field '" + name + - "': Invalid config value '" + fileEntry.getValue() + "'"); + throw new IllegalArgumentException("Invalid config value '" + fileEntry.getValue() + "' for field '" + name); registerFileEntry(fileEntry, registeredFiles, isModelType); } } @@ -133,7 +146,10 @@ public class UserConfiguredFiles implements Serializable { Path path; if (isModelType) { var modelReference = ModelReference.valueOf(builder.getValue()); - if (modelReference.path().isEmpty()) return; + if (modelReference.path().isEmpty()) { + modelReference.url().ifPresent(url -> userConfiguredUrls.add(url.value())); + return; + } path = Path.fromString(modelReference.path().get().value()); } else { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java b/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java index 7c86267c1b6..39a8e16fad5 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/ml/OnnxModelProbe.java @@ -29,6 +29,7 @@ import java.util.Map; public class OnnxModelProbe { private static final String binary = "vespa-analyze-onnx-model"; + private static final ObjectMapper jsonParser = new ObjectMapper(); static TensorType probeModel(ApplicationPackage app, Path modelPath, String outputName, Map<String, TensorType> inputTypes) { TensorType outputType = TensorType.empty; @@ -41,8 +42,9 @@ public class OnnxModelProbe { // Otherwise, run vespa-analyze-onnx-model if the model is available if (outputType.equals(TensorType.empty) && app.getFile(modelPath).exists()) { String jsonInput = createJsonInput(app.getFileReference(modelPath).getAbsolutePath(), inputTypes); - String jsonOutput = callVespaAnalyzeOnnxModel(jsonInput); + var jsonOutput = callVespaAnalyzeOnnxModel(jsonInput); outputType = outputTypeFromJson(jsonOutput, outputName); + writeMemoryStats(app, modelPath, MemoryStats.fromJson(jsonOutput)); if ( ! outputType.equals(TensorType.empty)) { writeProbedOutputType(app, modelPath, contextKey, outputType); } @@ -53,6 +55,16 @@ public class OnnxModelProbe { return outputType; } + private static void writeMemoryStats(ApplicationPackage app, Path modelPath, MemoryStats memoryStats) throws IOException { + String path = app.getFileReference(memoryStatsPath(modelPath)).getAbsolutePath(); + IOUtils.writeFile(path, memoryStats.toJson().toPrettyString(), false); + } + + private static Path memoryStatsPath(Path modelPath) { + var fileName = OnnxModelInfo.asValidIdentifier(modelPath.getRelative()) + ".memory_stats"; + return ApplicationPackage.MODELS_GENERATED_REPLICATED_DIR.append(fileName); + } + private static String createContextKey(String onnxName, Map<String, TensorType> inputTypes) { StringBuilder key = new StringBuilder().append(onnxName).append(":"); inputTypes.entrySet().stream().sorted(Map.Entry.comparingByKey()) @@ -95,9 +107,7 @@ public class OnnxModelProbe { return TensorType.empty; } - private static TensorType outputTypeFromJson(String json, String outputName) throws IOException { - ObjectMapper m = new ObjectMapper(); - JsonNode root = m.readTree(json); + private static TensorType outputTypeFromJson(JsonNode root, String outputName) throws IOException { if ( ! root.isObject() || ! root.has("outputs")) { return TensorType.empty; } @@ -123,7 +133,7 @@ public class OnnxModelProbe { return out.toString(); } - private static String callVespaAnalyzeOnnxModel(String jsonInput) throws IOException, InterruptedException { + private static JsonNode callVespaAnalyzeOnnxModel(String jsonInput) throws IOException, InterruptedException { StringBuilder output = new StringBuilder(); ProcessBuilder processBuilder = new ProcessBuilder(binary, "--probe-types"); @@ -148,7 +158,16 @@ public class OnnxModelProbe { throw new IllegalArgumentException("Error from '" + binary + "'. Return code: " + returnCode + ". " + "Output: '" + output + "'"); } - return output.toString(); + return jsonParser.readTree(output.toString()); + } + + public record MemoryStats(long vmSize, long vmRss) { + static MemoryStats fromJson(JsonNode json) { + return new MemoryStats(json.get("vm_size").asLong(), json.get("vm_rss").asLong()); + } + JsonNode toJson() { + return jsonParser.createObjectNode().put("vm_size", vmSize).put("vm_rss", vmRss); + } } } diff --git a/config-model/src/main/resources/schema/containercluster.rnc b/config-model/src/main/resources/schema/containercluster.rnc index 74f6b5b003c..c10b5a66e06 100644 --- a/config-model/src/main/resources/schema/containercluster.rnc +++ b/config-model/src/main/resources/schema/containercluster.rnc @@ -126,9 +126,15 @@ SslProvider = element ssl-provider { } Threadpool = element threadpool { - element max-threads { xsd:nonNegativeInteger } & - element min-threads { xsd:nonNegativeInteger } & - element queue-size { xsd:nonNegativeInteger } + (( + # TODO Vespa 9 Remove max-threads / min-threads / queue-size + element max-threads { xsd:nonNegativeInteger } & + element min-threads { xsd:nonNegativeInteger } & + element queue-size { xsd:nonNegativeInteger } + )|( + element threads { xsd:double { minExclusive = "0.0" } & attribute boost { xsd:double { minExclusive = "0.0" } }? }? & + element queue { xsd:double { minInclusive = "0.0" } }? + )) } Clients = element clients { |