aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main/java/com/yahoo/vespa/model/container
diff options
context:
space:
mode:
authorbjormel <bjormel@yahooinc.com>2023-10-01 12:23:12 +0000
committerbjormel <bjormel@yahooinc.com>2023-10-01 12:23:12 +0000
commite9058b555d4dfea2f6c872d9a677e8678b569569 (patch)
treefa1b67c6e39712c1e0d9f308b0dd55573b43f913 /config-model/src/main/java/com/yahoo/vespa/model/container
parent0ad931fa86658904fe9212b014d810236b0e00e4 (diff)
parent16030193ec04ee41e98779a3d7ee6a6c1d0d0d6f (diff)
Merge branch 'master' into bjormel/aws-main-controller
Diffstat (limited to 'config-model/src/main/java/com/yahoo/vespa/model/container')
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java119
-rwxr-xr-xconfig-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java13
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java82
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java17
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java35
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java33
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java3
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java69
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java4
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java78
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java9
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java8
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java13
16 files changed, 300 insertions, 199 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
index b9021912244..ac679cc406c 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
@@ -8,14 +8,14 @@ import com.yahoo.component.ComponentId;
import com.yahoo.component.ComponentSpecification;
import com.yahoo.config.FileReference;
import com.yahoo.config.application.api.ComponentInfo;
+import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.config.model.api.ApplicationClusterEndpoint;
import com.yahoo.config.model.api.ApplicationClusterInfo;
-import com.yahoo.config.model.api.ContainerEndpoint;
import com.yahoo.config.model.api.Model;
+import com.yahoo.config.model.api.OnnxModelCost;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.config.provision.AllocatedHosts;
-import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.container.bundle.BundleInstantiationSpecification;
import com.yahoo.container.di.config.ApplicationBundlesConfig;
@@ -43,10 +43,12 @@ import com.yahoo.vespa.model.filedistribution.UserConfiguredFiles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.logging.Level;
import java.util.stream.Collectors;
import static com.yahoo.vespa.model.container.docproc.DocprocChains.DOCUMENT_TYPE_MANAGER_CLASS;
@@ -82,6 +84,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private final Set<FileReference> applicationBundles = new LinkedHashSet<>();
private final Set<String> previousHosts;
+ private final OnnxModelCost.Calculator onnxModelCost;
+ private final DeployLogger logger;
private ContainerModelEvaluation modelEvaluation;
@@ -92,6 +96,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private int zookeeperSessionTimeoutSeconds = 30;
private final int transport_events_before_wakeup;
private final int transport_connections_per_target;
+ private final boolean dynamicHeapSize;
/** The heap size % of total memory available to the JVM process. */
private final int heapSizePercentageOfAvailableMemory;
@@ -100,9 +105,12 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private List<ApplicationClusterEndpoint> endpoints = List.of();
+ private final UserConfiguredUrls userConfiguredUrls = new UserConfiguredUrls();
+
public ApplicationContainerCluster(TreeConfigProducer<?> parent, String configSubId, String clusterId, DeployState deployState) {
super(parent, configSubId, clusterId, deployState, true, 10);
this.tlsClientAuthority = deployState.tlsClientAuthority();
+ dynamicHeapSize = deployState.featureFlags().dynamicHeapSize();
previousHosts = Collections.unmodifiableSet(deployState.getPreviousModel().stream()
.map(Model::allocatedHosts)
.map(AllocatedHosts::getHosts)
@@ -125,8 +133,13 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0
? Math.min(99, deployState.featureFlags().heapSizePercentage())
: defaultHeapSizePercentageOfAvailableMemory;
+ onnxModelCost = deployState.onnxModelCost().newCalculator(
+ deployState.getApplicationPackage(), deployState.getDeployLogger());
+ logger = deployState.getDeployLogger();
}
+ public UserConfiguredUrls userConfiguredUrls() { return userConfiguredUrls; }
+
@Override
protected void doPrepare(DeployState deployState) {
super.doPrepare(deployState);
@@ -147,7 +160,10 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
if (containers.isEmpty()) return;
// Files referenced from user configs to all components.
- UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(), deployState.getDeployLogger());
+ UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(),
+ deployState.getDeployLogger(),
+ deployState.featureFlags(),
+ userConfiguredUrls);
for (Component<?, ?> component : getAllComponents()) {
files.register(component);
}
@@ -182,19 +198,25 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
public void setMemoryPercentage(Integer memoryPercentage) { this.memoryPercentage = memoryPercentage; }
@Override
- public Optional<Integer> getMemoryPercentage() {
- if (memoryPercentage != null) return Optional.of(memoryPercentage);
+ public Optional<JvmMemoryPercentage> getMemoryPercentage() {
+ if (memoryPercentage != null) return Optional.of(JvmMemoryPercentage.of(memoryPercentage));
if (isHostedVespa()) {
int availableMemoryPercentage = getHostClusterId().isPresent() ?
heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster :
heapSizePercentageOfAvailableMemory;
- if (getContainers().isEmpty()) return Optional.of(availableMemoryPercentage); // Node memory is not known
+ if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known
// Node memory is known so convert available memory percentage to node memory percentage
- double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb();
- double availableMemory = totalMemory - Host.memoryOverheadGb;
- return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage));
+ double totalMemory = dynamicHeapSize
+ ? getContainers().stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow()
+ : getContainers().get(0).getHostResource().realResources().memoryGb();
+ double jvmHeapDeductionGb = dynamicHeapSize ? onnxModelCost.aggregatedModelCostInBytes() / (1024D * 1024 * 1024) : 0;
+ double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb);
+ int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage);
+ logger.log(Level.FINE, () -> "memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f"
+ .formatted(memoryPercentage, availableMemory, totalMemory, availableMemoryPercentage, jvmHeapDeductionGb));
+ return Optional.of(JvmMemoryPercentage.of(memoryPercentage, availableMemory));
}
return Optional.empty();
}
@@ -203,49 +225,23 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private void createEndpoints(DeployState deployState) {
if (!deployState.isHosted()) return;
if (deployState.getProperties().applicationId().instance().isTester()) return;
+ // Add endpoints provided by the controller
+ List<String> hosts = getContainers().stream().map(AbstractService::getHostName).sorted().toList();
List<ApplicationClusterEndpoint> endpoints = new ArrayList<>();
-
- List<String> hosts = getContainers().stream()
- .map(AbstractService::getHostName)
- .sorted()
- .toList();
-
- Set<ContainerEndpoint> endpointsFromController = deployState.getEndpoints();
- // Add zone-scoped endpoints if not provided by the controller
- // TODO(mpolden): Remove this when controller always includes zone-scope endpoints, and config models < 8.230 are gone
- if (endpointsFromController.stream().noneMatch(endpoint -> endpoint.scope() == ApplicationClusterEndpoint.Scope.zone)) {
- for (String suffix : deployState.getProperties().zoneDnsSuffixes()) {
- ApplicationClusterEndpoint.DnsName l4Name = ApplicationClusterEndpoint.DnsName.sharedL4NameFrom(
- deployState.zone().system(),
- ClusterSpec.Id.from(getName()),
- deployState.getProperties().applicationId(),
- suffix);
- endpoints.add(ApplicationClusterEndpoint.builder()
- .zoneScope()
- .sharedL4Routing()
- .dnsName(l4Name)
- .hosts(hosts)
- .clusterId(getName())
- .authMethod(ApplicationClusterEndpoint.AuthMethod.mtls)
- .build());
- }
- }
-
- // Include all endpoints provided by controller
- endpointsFromController.stream()
- .filter(ce -> ce.clusterId().equals(getName()))
- .forEach(ce -> ce.names().forEach(
- name -> endpoints.add(ApplicationClusterEndpoint.builder()
- .scope(ce.scope())
- .weight(ce.weight().orElse(1)) // Default to weight=1 if not set
- .routingMethod(ce.routingMethod())
- .dnsName(ApplicationClusterEndpoint.DnsName.from(name))
- .hosts(hosts)
- .clusterId(getName())
- .authMethod(ce.authMethod())
- .build())
- ));
- this.endpoints = List.copyOf(endpoints);
+ deployState.getEndpoints().stream()
+ .filter(ce -> ce.clusterId().equals(getName()))
+ .forEach(ce -> ce.names().forEach(
+ name -> endpoints.add(ApplicationClusterEndpoint.builder()
+ .scope(ce.scope())
+ .weight(ce.weight().orElse(1))
+ .routingMethod(ce.routingMethod())
+ .dnsName(ApplicationClusterEndpoint.DnsName.from(name))
+ .hosts(hosts)
+ .clusterId(getName())
+ .authMethod(ce.authMethod())
+ .build())
+ ));
+ this.endpoints = Collections.unmodifiableList(endpoints);
}
@Override
@@ -299,12 +295,15 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
@Override
public void getConfig(QrStartConfig.Builder builder) {
super.getConfig(builder);
+ var memoryPct = getMemoryPercentage().orElse(null);
+ int heapsize = memoryPct != null && memoryPct.availableMemoryGb().isPresent()
+ ? (int) (memoryPct.availableMemoryGb().getAsDouble() * 1024) : 1536;
builder.jvm.verbosegc(true)
.availableProcessors(0)
.compressedClassSpaceSize(0)
- .minHeapsize(1536)
- .heapsize(1536);
- getMemoryPercentage().ifPresent(percentage -> builder.jvm.heapSizeAsPercentageOfPhysicalMemory(percentage));
+ .minHeapsize(heapsize)
+ .heapsize(heapsize);
+ if (memoryPct != null) builder.jvm.heapSizeAsPercentageOfPhysicalMemory(memoryPct.percentage());
}
@Override
@@ -373,6 +372,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
@Override
public String name() { return getName(); }
+ public OnnxModelCost.Calculator onnxModelCost() { return onnxModelCost; }
+
public static class MbusParams {
// the amount of the maxpendingbytes to process concurrently, typically 0.2 (20%)
final Double maxConcurrentFactor;
@@ -390,4 +391,14 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
}
}
+ public static class UserConfiguredUrls {
+
+ private final Set<String> urls = new HashSet<>();
+
+ public void add(String url) { urls.add(url); }
+
+ public Set<String> all() { return urls; }
+
+ }
+
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java
index 6bbc24e8739..3d4ec51c8d2 100755
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java
@@ -62,6 +62,7 @@ import com.yahoo.vespa.model.container.search.ContainerSearch;
import com.yahoo.vespa.model.container.search.searchchain.SearchChains;
import com.yahoo.vespa.model.content.Content;
import com.yahoo.vespa.model.search.SearchCluster;
+
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
@@ -71,6 +72,7 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
+import java.util.OptionalDouble;
import java.util.Set;
import java.util.TreeSet;
@@ -142,7 +144,7 @@ public abstract class ContainerCluster<CONTAINER extends Container>
private ContainerDocproc containerDocproc;
private ContainerDocumentApi containerDocumentApi;
private SecretStore secretStore;
- private final ContainerThreadpool defaultHandlerThreadpool = new Handler.DefaultHandlerThreadpool();
+ private final ContainerThreadpool defaultHandlerThreadpool;
private boolean rpcServerEnabled = true;
private boolean httpServerEnabled = true;
@@ -185,6 +187,7 @@ public abstract class ContainerCluster<CONTAINER extends Container>
addCommonVespaBundles();
addSimpleComponent(VoidRequestLog.class);
addComponent(new DefaultThreadpoolProvider(this, defaultPoolNumThreads));
+ defaultHandlerThreadpool = new Handler.DefaultHandlerThreadpool(deployState, null);
addComponent(defaultHandlerThreadpool);
addSimpleComponent(com.yahoo.concurrent.classlock.ClassLocking.class);
addSimpleComponent("com.yahoo.container.jdisc.metric.MetricConsumerProviderProvider");
@@ -718,5 +721,11 @@ public abstract class ContainerCluster<CONTAINER extends Container>
* Returns the percentage of host physical memory this application has specified for nodes in this cluster,
* or empty if this is not specified by the application.
*/
- public Optional<Integer> getMemoryPercentage() { return Optional.empty(); }
+ public record JvmMemoryPercentage(int percentage, OptionalDouble availableMemoryGb) {
+ static JvmMemoryPercentage of(int percentage) { return new JvmMemoryPercentage(percentage, OptionalDouble.empty()); }
+ static JvmMemoryPercentage of(int percentage, double availableMemoryGb) {
+ return new JvmMemoryPercentage(percentage, OptionalDouble.of(availableMemoryGb));
+ }
+ }
+ public Optional<JvmMemoryPercentage> getMemoryPercentage() { return Optional.empty(); }
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
index 906ef739ef1..1b47f59653e 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
@@ -45,10 +45,6 @@ public class ContainerModelEvaluation implements
private final RankProfileList rankProfileList;
private final FileDistributedOnnxModels onnxModels; // For cluster specific ONNX model settings
- public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList) {
- this(cluster, rankProfileList, null);
- }
-
public ContainerModelEvaluation(ApplicationContainerCluster cluster,
RankProfileList rankProfileList, FileDistributedOnnxModels onnxModels) {
this.rankProfileList = Objects.requireNonNull(rankProfileList, "rankProfileList cannot be null");
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java
index fb4e62f5cd1..4b85c384951 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java
@@ -1,16 +1,17 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.container;
+import com.yahoo.config.model.builder.xml.XmlHelper;
+import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.container.bundle.BundleInstantiationSpecification;
import com.yahoo.container.handler.threadpool.ContainerThreadPool;
import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig;
import com.yahoo.container.handler.threadpool.ContainerThreadpoolImpl;
import com.yahoo.osgi.provider.model.ComponentModel;
-import com.yahoo.text.XML;
import com.yahoo.vespa.model.container.component.SimpleComponent;
import org.w3c.dom.Element;
-import java.util.Optional;
+import java.util.logging.Level;
/**
* Component definition for a {@link java.util.concurrent.Executor} using {@link ContainerThreadPool}.
@@ -20,18 +21,50 @@ import java.util.Optional;
public abstract class ContainerThreadpool extends SimpleComponent implements ContainerThreadpoolConfig.Producer {
private final String name;
- private final UserOptions userOptions;
+ private final UserOptions options;
- public ContainerThreadpool(String name, UserOptions userOptions) {
+ record UserOptions(Double max, Double min, Double queue){}
+
+ protected ContainerThreadpool(DeployState ds, String name, Element parent) {
super(new ComponentModel(
BundleInstantiationSpecification.fromStrings(
"threadpool@" + name,
ContainerThreadpoolImpl.class.getName(),
null)));
this.name = name;
- this.userOptions = userOptions;
+ var threadpoolElem = XmlHelper.getOptionalChild(parent, "threadpool").orElse(null);
+ if (threadpoolElem == null) options = new UserOptions(null, null, null);
+ else {
+ // TODO Vespa 9 Remove min-threads, max-threads and queue-size
+ Double max = null;
+ Double min = null;
+ Double queue = null;
+ var minElem = XmlHelper.getOptionalChild(threadpoolElem, "min-threads").orElse(null);
+ if (minElem != null) ds.getDeployLogger().logApplicationPackage(Level.WARNING, "For <threadpool>: <min-threads> is deprecated, use <threads> instead");
+ var maxElem = XmlHelper.getOptionalChild(threadpoolElem, "max-threads").orElse(null);
+ if (maxElem != null) ds.getDeployLogger().logApplicationPackage(Level.WARNING, "For <threadpool>: <max-threads> is deprecated, use <threads> with 'boost' instead");
+ var queueElem = XmlHelper.getOptionalChild(threadpoolElem, "queue").orElse(null);
+ var queueSizeElem = XmlHelper.getOptionalChild(threadpoolElem, "queue-size").orElse(null);
+ if (queueSizeElem != null) ds.getDeployLogger().logApplicationPackage(Level.WARNING, "For <threadpool>: <queue-size> is deprecated, use <queue> instead");
+ var threadsElem = XmlHelper.getOptionalChild(threadpoolElem, "threads").orElse(null);
+ if (threadsElem != null) {
+ min = parseMultiplier(threadsElem.getTextContent());
+ max = threadsElem.hasAttribute("boost") ? parseMultiplier(threadsElem.getAttribute("boost")) : min;
+ } else if (minElem != null) {
+ min = parseFixed(minElem.getTextContent());
+ }
+ if (max == null && maxElem != null) {
+ max = parseFixed(maxElem.getTextContent());
+ }
+ if (queueElem != null) queue = parseMultiplier(queueElem.getTextContent());
+ else if (queueSizeElem != null) queue = parseFixed(queueSizeElem.getTextContent());
+ options = new UserOptions(max, min, queue);
+ }
}
+ private static Double parseMultiplier(String text) { return -parseFixed(text); }
+ private static Double parseFixed(String text) { return Double.parseDouble(text); }
+
// Must be implemented by subclasses to set values that may be overridden by user options.
protected abstract void setDefaultConfigValues(ContainerThreadpoolConfig.Builder builder);
@@ -40,35 +73,20 @@ public abstract class ContainerThreadpool extends SimpleComponent implements Con
setDefaultConfigValues(builder);
builder.name(this.name);
- if (userOptions != null) {
- builder.maxThreads(userOptions.maxThreads);
- builder.minThreads(userOptions.minThreads);
- builder.queueSize(userOptions.queueSize);
+ if (options.max() != null) {
+ int max = (int) Math.round(options.max());
+ if (options.max() != 0 && max == 0) max = options.max() > 0 ? 1 : -1;
+ builder.maxThreads(max);
}
- }
-
- public static class UserOptions {
- private final int maxThreads;
- private final int minThreads;
- private final int queueSize;
-
- private UserOptions(int maxThreads, int minThreads, int queueSize) {
- this.maxThreads = maxThreads;
- this.minThreads = minThreads;
- this.queueSize = queueSize;
- }
-
- public static Optional<UserOptions> fromXml(Element xml) {
- Element element = XML.getChild(xml, "threadpool");
- if (element == null) return Optional.empty();
- return Optional.of(new UserOptions(
- intOption(element, "max-threads"),
- intOption(element, "min-threads"),
- intOption(element, "queue-size")));
+ if (options.min() != null) {
+ int min = (int) Math.round(options.min());
+ if (options.min() != 0 && min == 0) min = options.min() > 0 ? 1 : -1;
+ builder.minThreads(min);
}
-
- private static int intOption(Element element, String name) {
- return Integer.parseInt(XML.getChild(element, name).getTextContent());
+ if (options.queue() != null) {
+ int queue = (int) Math.round(options.queue());
+ if (options.queue() != 0 && queue == 0) queue = options.queue() > 0 ? 1 : -1;
+ builder.queueSize(queue);
}
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
index 205848e1b67..d02b7d0de5f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
@@ -5,10 +5,9 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.BertBaseEmbedderConfig;
-import com.yahoo.vespa.model.container.xml.ModelIdResolver;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import org.w3c.dom.Element;
-import static com.yahoo.text.XML.getChild;
import static com.yahoo.text.XML.getChildValue;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME;
@@ -17,8 +16,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI
*/
public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConfig.Producer {
- private final ModelReference model;
- private final ModelReference vocab;
+ private final ModelReference modelRef;
+ private final ModelReference vocabRef;
private final Integer maxTokens;
private final String transformerInputIds;
private final String transformerAttentionMask;
@@ -33,10 +32,11 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
private final Integer onnxGpuDevice;
- public BertEmbedder(Element xml, DeployState state) {
+ public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml);
- model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state);
- vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state);
+ var model = Model.fromXml(state, xml, "transformer-model").orElseThrow();
+ modelRef = model.modelReference();
+ vocabRef = Model.fromXml(state, xml, "tokenizer-vocab").orElseThrow().modelReference();
maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null);
transformerInputIds = getChildValue(xml, "transformer-input-ids").orElse(null);
transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null);
@@ -49,11 +49,12 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf
onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
+ model.registerOnnxModelCost(cluster);
}
@Override
public void getConfig(BertBaseEmbedderConfig.Builder b) {
- b.transformerModel(model).tokenizerVocab(vocab);
+ b.transformerModel(modelRef).tokenizerVocab(vocabRef);
if (maxTokens != null) b.transformerMaxTokens(maxTokens);
if (transformerInputIds != null) b.transformerInputIds(transformerInputIds);
if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
index c0fdfe3dc64..66e3b1c9dfd 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java
@@ -5,13 +5,9 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.ColBertEmbedderConfig;
-import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
-import com.yahoo.vespa.model.container.xml.ModelIdResolver;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import org.w3c.dom.Element;
-import java.util.Optional;
-
-import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChild;
import static com.yahoo.text.XML.getChildValue;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME;
@@ -20,8 +16,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI
* @author bergum
*/
public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderConfig.Producer {
- private final ModelReference model;
- private final ModelReference vocab;
+ private final ModelReference modelRef;
+ private final ModelReference vocabRef;
private final Integer maxQueryTokens;
@@ -40,13 +36,13 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo
private final Integer onnxIntraopThreads;
private final Integer onnxGpuDevice;
- public ColBertEmbedder(Element xml, DeployState state) {
+ public ColBertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.ColBertEmbedder", INTEGRATION_BUNDLE_NAME, xml);
- var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow();
- model = ModelIdResolver.resolveToModelReference(transformerModelElem, state);
- vocab = getOptionalChild(xml, "tokenizer-model")
- .map(elem -> ModelIdResolver.resolveToModelReference(elem, state))
- .orElseGet(() -> resolveDefaultVocab(transformerModelElem, state));
+ var model = Model.fromXml(state, xml, "transformer-model").orElseThrow();
+ modelRef = model.modelReference();
+ vocabRef = Model.fromXml(state, xml, "tokenizer-model")
+ .map(Model::modelReference)
+ .orElseGet(() -> resolveDefaultVocab(model, state));
maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null);
maxQueryTokens = getChildValue(xml, "max-query-tokens").map(Integer::parseInt).orElse(null);
maxDocumentTokens = getChildValue(xml, "max-document-tokens").map(Integer::parseInt).orElse(null);
@@ -60,21 +56,20 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo
onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
-
+ model.registerOnnxModelCost(cluster);
}
- private static ModelReference resolveDefaultVocab(Element model, DeployState state) {
- if (state.isHosted() && model.hasAttribute("model-id")) {
- var implicitVocabId = model.getAttribute("model-id") + "-vocab";
- return ModelIdResolver.resolveToModelReference(
- "tokenizer-model", Optional.of(implicitVocabId), Optional.empty(), Optional.empty(), state);
+ private static ModelReference resolveDefaultVocab(Model model, DeployState state) {
+ var modelId = model.modelId().orElse(null);
+ if (state.isHosted() && modelId != null) {
+ return Model.fromParams(state, model.name(), modelId + "-vocab", null, null).modelReference();
}
throw new IllegalArgumentException("'tokenizer-model' must be specified");
}
@Override
public void getConfig(ColBertEmbedderConfig.Builder b) {
- b.transformerModel(model).tokenizerPath(vocab);
+ b.transformerModel(modelRef).tokenizerPath(vocabRef);
if (maxTokens != null) b.transformerMaxTokens(maxTokens);
if (transformerInputIds != null) b.transformerInputIds(transformerInputIds);
if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java
index 31031aa5bf2..969db6553e6 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java
@@ -1,9 +1,11 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.container.component;
+import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig;
import com.yahoo.osgi.provider.model.ComponentModel;
import com.yahoo.vespa.model.container.ContainerThreadpool;
+import org.w3c.dom.Element;
import java.util.ArrayList;
import java.util.Arrays;
@@ -76,8 +78,8 @@ public class Handler extends Component<Component<?, ?>, ComponentModel> {
*/
public static class DefaultHandlerThreadpool extends ContainerThreadpool {
- public DefaultHandlerThreadpool() {
- super("default-handler-common", null);
+ public DefaultHandlerThreadpool(DeployState ds, Element options) {
+ super(ds, "default-handler-common", options);
}
@Override
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
index f4017339699..af47bee137a 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
@@ -5,12 +5,9 @@ package com.yahoo.vespa.model.container.component;
import com.yahoo.config.ModelReference;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
-import com.yahoo.vespa.model.container.xml.ModelIdResolver;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
import org.w3c.dom.Element;
-import java.util.Optional;
-
-import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChild;
import static com.yahoo.text.XML.getChildValue;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME;
@@ -19,8 +16,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI
* @author bjorncs
*/
public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEmbedderConfig.Producer {
- private final ModelReference model;
- private final ModelReference vocab;
+ private final ModelReference modelRef;
+ private final ModelReference vocabRef;
private final Integer maxTokens;
private final String transformerInputIds;
private final String transformerAttentionMask;
@@ -33,13 +30,13 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
private final Integer onnxGpuDevice;
private final String poolingStrategy;
- public HuggingFaceEmbedder(Element xml, DeployState state) {
+ public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) {
super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml);
- var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow();
- model = ModelIdResolver.resolveToModelReference(transformerModelElem, state);
- vocab = getOptionalChild(xml, "tokenizer-model")
- .map(elem -> ModelIdResolver.resolveToModelReference(elem, state))
- .orElseGet(() -> resolveDefaultVocab(transformerModelElem, state));
+ var model = Model.fromXml(state, xml, "transformer-model").orElseThrow();
+ modelRef = model.modelReference();
+ vocabRef = Model.fromXml(state, xml, "tokenizer-model")
+ .map(Model::modelReference)
+ .orElseGet(() -> resolveDefaultVocab(model, state));
maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null);
transformerInputIds = getChildValue(xml, "transformer-input-ids").orElse(null);
transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null);
@@ -51,20 +48,20 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm
onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null);
+ model.registerOnnxModelCost(cluster);
}
- private static ModelReference resolveDefaultVocab(Element model, DeployState state) {
- if (state.isHosted() && model.hasAttribute("model-id")) {
- var implicitVocabId = model.getAttribute("model-id") + "-vocab";
- return ModelIdResolver.resolveToModelReference(
- "tokenizer-model", Optional.of(implicitVocabId), Optional.empty(), Optional.empty(), state);
+ private static ModelReference resolveDefaultVocab(Model model, DeployState state) {
+ var modelId = model.modelId().orElse(null);
+ if (state.isHosted() && modelId != null) {
+ return Model.fromParams(state, model.name(), modelId + "-vocab", null, null).modelReference();
}
throw new IllegalArgumentException("'tokenizer-model' must be specified");
}
@Override
public void getConfig(HuggingFaceEmbedderConfig.Builder b) {
- b.transformerModel(model).tokenizerPath(vocab);
+ b.transformerModel(modelRef).tokenizerPath(vocabRef);
if (maxTokens != null) b.transformerMaxTokens(maxTokens);
if (transformerInputIds != null) b.transformerInputIds(transformerInputIds);
if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
index 0bf5491e872..e9ac93caa68 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
@@ -7,7 +7,6 @@ import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig;
import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Padding;
import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Truncation;
import com.yahoo.text.XML;
-import com.yahoo.vespa.model.container.xml.ModelIdResolver;
import org.w3c.dom.Element;
import java.util.Map;
@@ -26,7 +25,7 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT
super("com.yahoo.language.huggingface.HuggingFaceTokenizer", LINGUISTICS_BUNDLE_NAME, xml);
for (Element element : XML.getChildren(xml, "model")) {
var lang = element.hasAttribute("language") ? element.getAttribute("language") : "unknown";
- langToModel.put(lang, ModelIdResolver.resolveToModelReference(element, state));
+ langToModel.put(lang, Model.fromXml(state, element).modelReference());
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java
new file mode 100644
index 00000000000..76d93c38aee
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java
@@ -0,0 +1,69 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.container.component;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.application.api.ApplicationFile;
+import com.yahoo.config.model.builder.xml.XmlHelper;
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.path.Path;
+import com.yahoo.vespa.model.container.ApplicationContainerCluster;
+import com.yahoo.vespa.model.container.xml.ModelIdResolver;
+import org.w3c.dom.Element;
+
+import java.net.URI;
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * Represents a model, e.g ONNX model for an embedder.
+ *
+ * @author bjorncs
+ */
+class Model {
+ private final String paramName;
+ private final String modelId;
+ private final URI url;
+ private final ApplicationFile file;
+ private final ModelReference ref;
+
+ private Model(DeployState ds, String paramName, String modelId, URI url, Path file) {
+ this.paramName = Objects.requireNonNull(paramName);
+ if (modelId == null && url == null && file == null)
+ throw new IllegalArgumentException("At least one of 'model-id', 'url' or 'path' must be specified");
+ this.modelId = modelId;
+ this.url = url;
+ this.file = file != null ? ds.getApplicationPackage().getFile(file) : null;
+ this.ref = ModelIdResolver.resolveToModelReference(
+ paramName, Optional.ofNullable(modelId), Optional.ofNullable(url).map(URI::toString),
+ Optional.ofNullable(file).map(Path::toString), ds);
+ }
+
+ static Model fromParams(DeployState ds, String paramName, String modelId, URI url, Path file) {
+ return new Model(ds, paramName, modelId, url, file);
+ }
+
+ static Optional<Model> fromXml(DeployState ds, Element parent, String name) {
+ return XmlHelper.getOptionalChild(parent, name).map(e -> fromXml(ds, e));
+ }
+
+ static Model fromXml(DeployState ds, Element model) {
+ var modelId = XmlHelper.getOptionalAttribute(model, "model-id").orElse(null);
+ var url = XmlHelper.getOptionalAttribute(model, "url").map(URI::create).orElse(null);
+ var path = XmlHelper.getOptionalAttribute(model, "path").map(Path::fromString).orElse(null);
+ return new Model(ds, model.getTagName(), modelId, url, path);
+ }
+
+ void registerOnnxModelCost(ApplicationContainerCluster c) {
+ var resolvedUrl = resolvedUrl().orElse(null);
+ if (file != null) c.onnxModelCost().registerModel(file);
+ else if (resolvedUrl != null) c.onnxModelCost().registerModel(resolvedUrl);
+ }
+
+ String name() { return paramName; }
+ Optional<String> modelId() { return Optional.ofNullable(modelId); }
+ Optional<URI> url() { return Optional.ofNullable(url); }
+ Optional<URI> resolvedUrl() { return ref.url().map(u -> URI.create(u.value())); }
+ Optional<ApplicationFile> file() { return Optional.ofNullable(file); }
+ ModelReference modelReference() { return ref; }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java
index c0431d01784..2354298779d 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java
@@ -29,8 +29,6 @@ public class ChainedComponent<T extends ChainedComponentModel> extends Component
private ComponentId namespace() {
var owner = getParent().getParent();
- return (owner instanceof Chain) ?
- ((Chain) owner).getGlobalComponentId() :
- null;
+ return (owner instanceof Chain<?> chain) ? chain.getGlobalComponentId() : null;
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
index f0296d49472..3261d454b4f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.container.search;
+import com.yahoo.config.application.api.ApplicationPackage;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.prelude.semantics.SemanticRulesConfig;
@@ -56,12 +57,14 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
private QueryProfiles queryProfiles;
private SemanticRules semanticRules;
private PageTemplates pageTemplates;
+ private ApplicationPackage app;
public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) {
super(chains);
this.globalPhase = deployState.featureFlags().enableGlobalPhase();
this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher();
this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState);
+ this.app = deployState.getApplicationPackage();
this.owningCluster = cluster;
owningCluster.addComponent(Component.fromClassAndBundle(CompiledQueryProfileRegistry.class, SEARCH_AND_DOCPROC_BUNDLE));
@@ -96,6 +99,9 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains>
if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue;
var factory = new RankProfilesEvaluatorComponent(documentDb);
if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) {
+ var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels();
+ onnxModels.asMap().forEach(
+ (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath())));
owningCluster.addComponent(factory);
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 35b0213bf59..1874b5fa19a 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -39,6 +39,8 @@ import com.yahoo.container.jdisc.DataplaneProxyService;
import com.yahoo.container.logging.AccessLog;
import com.yahoo.container.logging.FileConnectionLog;
import com.yahoo.io.IOUtils;
+import com.yahoo.jdisc.http.filter.security.cloud.config.CloudTokenDataPlaneFilterConfig;
+import com.yahoo.jdisc.http.filter.security.cloud.config.CloudTokenDataPlaneFilterConfig.Builder;
import com.yahoo.jdisc.http.server.jetty.DataplaneProxyCredentials;
import com.yahoo.jdisc.http.server.jetty.VoidRequestLog;
import com.yahoo.osgi.provider.model.ComponentModel;
@@ -68,7 +70,6 @@ import com.yahoo.vespa.model.container.Container;
import com.yahoo.vespa.model.container.ContainerCluster;
import com.yahoo.vespa.model.container.ContainerModel;
import com.yahoo.vespa.model.container.ContainerModelEvaluation;
-import com.yahoo.vespa.model.container.ContainerThreadpool;
import com.yahoo.vespa.model.container.DataplaneProxy;
import com.yahoo.vespa.model.container.IdentityProvider;
import com.yahoo.vespa.model.container.PlatformBundles;
@@ -240,10 +241,9 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
private void addParameterStoreValidationHandler(ApplicationContainerCluster cluster, DeployState deployState) {
+ if ( ! deployState.isHosted()) return;
// Always add platform bundle. Cannot be controlled by a feature flag as platform bundle cannot change.
- if(deployState.isHosted()) {
- cluster.addPlatformBundle(PlatformBundles.absoluteBundlePath("jdisc-cloud-aws"));
- }
+ cluster.addPlatformBundle(PlatformBundles.absoluteBundlePath("jdisc-cloud-aws"));
if (deployState.zone().system().isPublic()) {
BindingPattern bindingPattern = SystemBindingPattern.fromHttpPath("/validate-secret-store");
Handler handler = new Handler(
@@ -459,7 +459,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
private static void addCloudDataPlaneFilter(DeployState deployState, ApplicationContainerCluster cluster) {
if (!deployState.isHosted() || !deployState.zone().system().isPublic()) return;
- var dataplanePort = getMtlsDataplanePort(deployState, cluster);
+ var dataplanePort = getMtlsDataplanePort(deployState);
// Setup secure filter chain
var secureChain = new HttpFilterChain("cloud-data-plane-secure", HttpFilterChain.Type.SYSTEM);
secureChain.addInnerComponent(new CloudDataPlaneFilter(cluster, deployState));
@@ -594,7 +594,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
String serverName = server.getComponentId().getName();
// If the deployment contains certificate/private key reference, setup TLS port
- var builder = HostedSslConnectorFactory.builder(serverName, getMtlsDataplanePort(state, cluster))
+ var builder = HostedSslConnectorFactory.builder(serverName, getMtlsDataplanePort(state))
.proxyProtocol(true, state.getProperties().featureFlags().enableProxyProtocolMixedMode())
.tlsCiphersOverride(state.getProperties().tlsCiphersOverride())
.endpointConnectionTtl(state.getProperties().endpointConnectionTtl());
@@ -627,19 +627,19 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
private void addCloudTokenSupport(DeployState state, ApplicationContainerCluster cluster) {
var server = cluster.getHttp().getHttpServer().get();
- if (!enableTokenSupport(state, cluster)) return;
+ if (!enableTokenSupport(state)) return;
Set<String> tokenEndpoints = tokenEndpoints(state).stream()
.map(ContainerEndpoint::names)
.flatMap(Collection::stream)
.collect(Collectors.toSet());
var endpointCert = state.endpointCertificateSecrets().orElseThrow();
- int tokenPort = getTokenDataplanePort(state, cluster).orElseThrow();
+ int tokenPort = getTokenDataplanePort(state).orElseThrow();
// Set up component to generate proxy cert if token support is enabled
cluster.addSimpleComponent(DataplaneProxyCredentials.class);
cluster.addSimpleComponent(DataplaneProxyService.class);
var dataplaneProxy = new DataplaneProxy(
- getMtlsDataplanePort(state, cluster),
+ getMtlsDataplanePort(state),
tokenPort,
endpointCert.certificate(),
endpointCert.key(),
@@ -659,13 +659,24 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
// Setup token filter chain
var tokenChain = new HttpFilterChain("cloud-token-data-plane-secure", HttpFilterChain.Type.SYSTEM);
- tokenChain.addInnerComponent(new CloudTokenDataPlaneFilter(cluster, state));
+ var tokenFilter = new CloudTokenDataPlaneFilter(cluster, state);
+ tokenChain.addInnerComponent(tokenFilter);
cluster.getHttp().getFilterChains().add(tokenChain);
// Set as default filter for token port
cluster.getHttp().getHttpServer().orElseThrow().getConnectorFactories().stream()
.filter(c -> c.getListenPort() == tokenPort).findAny().orElseThrow()
.setDefaultRequestFilterChain(tokenChain.getComponentId());
+
+ // Set up handler that tells what fingerprints are known to the container
+ class CloudTokenDataPlaneHandler extends Handler implements CloudTokenDataPlaneFilterConfig.Producer {
+ CloudTokenDataPlaneHandler() {
+ super(new ComponentModel("com.yahoo.jdisc.http.filter.security.cloud.CloudTokenDataPlaneHandler", null, "jdisc-security-filters", null));
+ addServerBindings(SystemBindingPattern.fromHttpPortAndPath(Defaults.getDefaults().vespaWebServicePort(), "/data-plane-tokens/v1"));
+ }
+ @Override public void getConfig(Builder builder) { tokenFilter.getConfig(builder); }
+ }
+ cluster.addComponent(new CloudTokenDataPlaneHandler());
}
// Returns the client certificates of the clients defined for an application cluster
@@ -710,7 +721,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
}
private Http buildHttp(DeployState deployState, ApplicationContainerCluster cluster, Element httpElement, ConfigModelContext context) {
- Http http = new HttpBuilder(portBindingOverride(deployState, context, cluster)).build(deployState, cluster, httpElement);
+ Http http = new HttpBuilder(portBindingOverride(deployState, context)).build(deployState, cluster, httpElement);
if (networking == Networking.disable)
http.removeAllServers();
@@ -778,6 +789,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
!container.getHostResource().realResources().gpuResources().isZero());
onnxModel.setGpuDevice(gpuDevice, hasGpu);
}
+ cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath()));
}
cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models));
@@ -815,7 +827,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
cluster.addSearchAndDocprocBundles();
addIncludes(processingElement);
cluster.setProcessingChains(new DomProcessingBuilder(null).build(deployState, cluster, processingElement),
- serverBindings(deployState, context, processingElement, ProcessingChains.defaultBindings, cluster).toArray(BindingPattern[]::new));
+ serverBindings(deployState, context, processingElement, ProcessingChains.defaultBindings).toArray(BindingPattern[]::new));
validateAndAddConfiguredComponents(deployState, cluster, processingElement, "renderer", ContainerModelBuilder::validateRendererElement);
}
@@ -840,7 +852,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
private void addUserHandlers(DeployState deployState, ApplicationContainerCluster cluster, Element spec, ConfigModelContext context) {
for (Element component: XML.getChildren(spec, "handler")) {
cluster.addComponent(
- new DomHandlerBuilder(cluster, portBindingOverride(deployState, context, cluster)).build(deployState, cluster, component));
+ new DomHandlerBuilder(cluster, portBindingOverride(deployState, context)).build(deployState, cluster, component));
}
}
@@ -1128,28 +1140,28 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
private void addSearchHandler(DeployState deployState, ApplicationContainerCluster cluster, Element searchElement, ConfigModelContext context) {
var bindingPatterns = List.<BindingPattern>of(SearchHandler.DEFAULT_BINDING);
if (isHostedTenantApplication(context)) {
- bindingPatterns = SearchHandler.bindingPattern(getDataplanePorts(deployState, cluster));
+ bindingPatterns = SearchHandler.bindingPattern(getDataplanePorts(deployState));
}
- SearchHandler searchHandler = new SearchHandler(cluster,
- serverBindings(deployState, context, searchElement, bindingPatterns, cluster),
- ContainerThreadpool.UserOptions.fromXml(searchElement).orElse(null));
+ SearchHandler searchHandler = new SearchHandler(deployState, cluster,
+ serverBindings(deployState, context, searchElement, bindingPatterns),
+ searchElement);
cluster.addComponent(searchHandler);
// Add as child to SearchHandler to get the correct chains config.
searchHandler.addComponent(Component.fromClassAndBundle(SearchHandler.EXECUTION_FACTORY, PlatformBundles.SEARCH_AND_DOCPROC_BUNDLE));
}
- private List<BindingPattern> serverBindings(DeployState deployState, ConfigModelContext context, Element searchElement, Collection<BindingPattern> defaultBindings, ApplicationContainerCluster cluster) {
+ private List<BindingPattern> serverBindings(DeployState deployState, ConfigModelContext context, Element searchElement, Collection<BindingPattern> defaultBindings) {
List<Element> bindings = XML.getChildren(searchElement, "binding");
if (bindings.isEmpty())
return List.copyOf(defaultBindings);
- return toBindingList(deployState, context, bindings, cluster);
+ return toBindingList(deployState, context, bindings);
}
- private List<BindingPattern> toBindingList(DeployState deployState, ConfigModelContext context, List<Element> bindingElements, ApplicationContainerCluster cluster) {
+ private List<BindingPattern> toBindingList(DeployState deployState, ConfigModelContext context, List<Element> bindingElements) {
List<BindingPattern> result = new ArrayList<>();
- var portOverride = isHostedTenantApplication(context) ? getDataplanePorts(deployState, cluster) : Set.<Integer>of();
+ var portOverride = isHostedTenantApplication(context) ? getDataplanePorts(deployState) : Set.<Integer>of();
for (Element element: bindingElements) {
String text = element.getTextContent().trim();
if (!text.isEmpty())
@@ -1173,13 +1185,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
ContainerDocumentApi.HandlerOptions documentApiOptions = DocumentApiOptionsBuilder.build(documentApiElement);
Element ignoreUndefinedFields = XML.getChild(documentApiElement, "ignore-undefined-fields");
- return new ContainerDocumentApi(cluster, documentApiOptions,
- "true".equals(XML.getValue(ignoreUndefinedFields)), portBindingOverride(deployState, context, cluster));
+ return new ContainerDocumentApi(deployState, cluster, documentApiOptions,
+ "true".equals(XML.getValue(ignoreUndefinedFields)), portBindingOverride(deployState, context));
}
- private Set<Integer> portBindingOverride(DeployState deployState, ConfigModelContext context, ApplicationContainerCluster cluster) {
+ private Set<Integer> portBindingOverride(DeployState deployState, ConfigModelContext context) {
return isHostedTenantApplication(context)
- ? getDataplanePorts(deployState, cluster)
+ ? getDataplanePorts(deployState)
: Set.<Integer>of();
}
@@ -1438,18 +1450,18 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
}
- private static Set<Integer> getDataplanePorts(DeployState ds, ApplicationContainerCluster cluster) {
- var tokenPort = getTokenDataplanePort(ds, cluster);
- var mtlsPort = getMtlsDataplanePort(ds, cluster);
+ private static Set<Integer> getDataplanePorts(DeployState ds) {
+ var tokenPort = getTokenDataplanePort(ds);
+ var mtlsPort = getMtlsDataplanePort(ds);
return tokenPort.isPresent() ? Set.of(mtlsPort, tokenPort.getAsInt()) : Set.of(mtlsPort);
}
- private static int getMtlsDataplanePort(DeployState ds, ApplicationContainerCluster cluster) {
- return enableTokenSupport(ds, cluster) ? 8443 : 4443;
+ private static int getMtlsDataplanePort(DeployState ds) {
+ return enableTokenSupport(ds) ? 8443 : 4443;
}
- private static OptionalInt getTokenDataplanePort(DeployState ds, ApplicationContainerCluster cluster) {
- return enableTokenSupport(ds, cluster) ? OptionalInt.of(8444) : OptionalInt.empty();
+ private static OptionalInt getTokenDataplanePort(DeployState ds) {
+ return enableTokenSupport(ds) ? OptionalInt.of(8444) : OptionalInt.empty();
}
private static Set<ContainerEndpoint> tokenEndpoints(DeployState deployState) {
@@ -1458,7 +1470,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
.collect(Collectors.toSet());
}
- private static boolean enableTokenSupport(DeployState state, ApplicationContainerCluster cluster) {
+ private static boolean enableTokenSupport(DeployState state) {
Set<ContainerEndpoint> tokenEndpoints = tokenEndpoints(state);
return state.isHosted() && state.zone().system().isPublic() && ! tokenEndpoints.isEmpty();
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java
index bb1d0af1db9..cdbe62720b9 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.model.container.xml;
import com.yahoo.text.XML;
import com.yahoo.vespa.model.clients.ContainerDocumentApi;
-import com.yahoo.vespa.model.container.ContainerThreadpool;
import org.w3c.dom.Element;
import java.util.ArrayList;
@@ -19,13 +18,7 @@ public class DocumentApiOptionsBuilder {
private static final Logger log = Logger.getLogger(DocumentApiOptionsBuilder.class.getName());
public static ContainerDocumentApi.HandlerOptions build(Element spec) {
- return new ContainerDocumentApi.HandlerOptions(getBindings(spec), threadpoolOptions(spec, "http-client-api"));
- }
-
- private static ContainerThreadpool.UserOptions threadpoolOptions(Element spec, String elementName) {
- Element element = XML.getChild(spec, elementName);
- if (element == null) return null;
- return ContainerThreadpool.UserOptions.fromXml(element).orElse(null);
+ return new ContainerDocumentApi.HandlerOptions(getBindings(spec), XML.getChild(spec, "http-client-api"));
}
private static List<String> getBindings(Element spec) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java
index be3ca0b8aa9..14216dd8855 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.model.container.xml;
import com.yahoo.config.ModelReference;
import com.yahoo.config.UrlReference;
-import com.yahoo.config.model.builder.xml.XmlHelper;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.text.XML;
import org.w3c.dom.Element;
@@ -88,13 +87,6 @@ public class ModelIdResolver {
}
}
-
- public static ModelReference resolveToModelReference(Element elem, DeployState state) {
- return resolveToModelReference(
- elem.getTagName(), XmlHelper.getOptionalAttribute(elem, "model-id"),
- XmlHelper.getOptionalAttribute(elem, "url"), XmlHelper.getOptionalAttribute(elem, "path"), state);
- }
-
public static ModelReference resolveToModelReference(
String paramName, Optional<String> id, Optional<String> url, Optional<String> path, DeployState state) {
if (id.isEmpty()) return createModelReference(Optional.empty(), url, path, state);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java
index 6cfef153fee..3cd296c1469 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.container.xml;
+import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.container.bundle.BundleInstantiationSpecification;
import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig;
import com.yahoo.vespa.model.container.ApplicationContainerCluster;
@@ -9,6 +10,7 @@ import com.yahoo.vespa.model.container.component.BindingPattern;
import com.yahoo.vespa.model.container.component.SystemBindingPattern;
import com.yahoo.vespa.model.container.component.chain.ProcessingHandler;
import com.yahoo.vespa.model.container.search.searchchain.SearchChains;
+import org.w3c.dom.Element;
import java.util.Collection;
import java.util.List;
@@ -30,10 +32,11 @@ class SearchHandler extends ProcessingHandler<SearchChains> {
static final BundleInstantiationSpecification HANDLER_SPEC = fromSearchAndDocproc(HANDLER_CLASSNAME);
static final BindingPattern DEFAULT_BINDING = SystemBindingPattern.fromHttpPath("/search/*");
- SearchHandler(ApplicationContainerCluster cluster,
+ SearchHandler(DeployState ds,
+ ApplicationContainerCluster cluster,
List<BindingPattern> bindings,
- ContainerThreadpool.UserOptions threadpoolOptions) {
- super(cluster.getSearchChains(), HANDLER_SPEC, new Threadpool(threadpoolOptions));
+ Element threadpoolOptions) {
+ super(cluster.getSearchChains(), HANDLER_SPEC, new Threadpool(ds, threadpoolOptions));
bindings.forEach(this::addServerBindings);
}
@@ -46,8 +49,8 @@ class SearchHandler extends ProcessingHandler<SearchChains> {
private static class Threadpool extends ContainerThreadpool {
- Threadpool(UserOptions options) {
- super("search-handler", options);
+ Threadpool(DeployState ds, Element options) {
+ super(ds, "search-handler", options);
}
@Override