diff options
author | bjormel <bjormel@yahooinc.com> | 2023-10-01 12:23:12 +0000 |
---|---|---|
committer | bjormel <bjormel@yahooinc.com> | 2023-10-01 12:23:12 +0000 |
commit | e9058b555d4dfea2f6c872d9a677e8678b569569 (patch) | |
tree | fa1b67c6e39712c1e0d9f308b0dd55573b43f913 /config-model/src/main/java/com/yahoo/vespa/model/container | |
parent | 0ad931fa86658904fe9212b014d810236b0e00e4 (diff) | |
parent | 16030193ec04ee41e98779a3d7ee6a6c1d0d0d6f (diff) |
Merge branch 'master' into bjormel/aws-main-controller
Diffstat (limited to 'config-model/src/main/java/com/yahoo/vespa/model/container')
16 files changed, 300 insertions, 199 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java index b9021912244..ac679cc406c 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java @@ -8,14 +8,14 @@ import com.yahoo.component.ComponentId; import com.yahoo.component.ComponentSpecification; import com.yahoo.config.FileReference; import com.yahoo.config.application.api.ComponentInfo; +import com.yahoo.config.application.api.DeployLogger; import com.yahoo.config.model.api.ApplicationClusterEndpoint; import com.yahoo.config.model.api.ApplicationClusterInfo; -import com.yahoo.config.model.api.ContainerEndpoint; import com.yahoo.config.model.api.Model; +import com.yahoo.config.model.api.OnnxModelCost; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.config.provision.AllocatedHosts; -import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostSpec; import com.yahoo.container.bundle.BundleInstantiationSpecification; import com.yahoo.container.di.config.ApplicationBundlesConfig; @@ -43,10 +43,12 @@ import com.yahoo.vespa.model.filedistribution.UserConfiguredFiles; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.logging.Level; import java.util.stream.Collectors; import static com.yahoo.vespa.model.container.docproc.DocprocChains.DOCUMENT_TYPE_MANAGER_CLASS; @@ -82,6 +84,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private final Set<FileReference> applicationBundles = new LinkedHashSet<>(); private final Set<String> previousHosts; + private final OnnxModelCost.Calculator onnxModelCost; + private final DeployLogger logger; private ContainerModelEvaluation modelEvaluation; @@ -92,6 +96,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private int zookeeperSessionTimeoutSeconds = 30; private final int transport_events_before_wakeup; private final int transport_connections_per_target; + private final boolean dynamicHeapSize; /** The heap size % of total memory available to the JVM process. */ private final int heapSizePercentageOfAvailableMemory; @@ -100,9 +105,12 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private List<ApplicationClusterEndpoint> endpoints = List.of(); + private final UserConfiguredUrls userConfiguredUrls = new UserConfiguredUrls(); + public ApplicationContainerCluster(TreeConfigProducer<?> parent, String configSubId, String clusterId, DeployState deployState) { super(parent, configSubId, clusterId, deployState, true, 10); this.tlsClientAuthority = deployState.tlsClientAuthority(); + dynamicHeapSize = deployState.featureFlags().dynamicHeapSize(); previousHosts = Collections.unmodifiableSet(deployState.getPreviousModel().stream() .map(Model::allocatedHosts) .map(AllocatedHosts::getHosts) @@ -125,8 +133,13 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0 ? Math.min(99, deployState.featureFlags().heapSizePercentage()) : defaultHeapSizePercentageOfAvailableMemory; + onnxModelCost = deployState.onnxModelCost().newCalculator( + deployState.getApplicationPackage(), deployState.getDeployLogger()); + logger = deployState.getDeployLogger(); } + public UserConfiguredUrls userConfiguredUrls() { return userConfiguredUrls; } + @Override protected void doPrepare(DeployState deployState) { super.doPrepare(deployState); @@ -147,7 +160,10 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat if (containers.isEmpty()) return; // Files referenced from user configs to all components. - UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(), deployState.getDeployLogger()); + UserConfiguredFiles files = new UserConfiguredFiles(deployState.getFileRegistry(), + deployState.getDeployLogger(), + deployState.featureFlags(), + userConfiguredUrls); for (Component<?, ?> component : getAllComponents()) { files.register(component); } @@ -182,19 +198,25 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat public void setMemoryPercentage(Integer memoryPercentage) { this.memoryPercentage = memoryPercentage; } @Override - public Optional<Integer> getMemoryPercentage() { - if (memoryPercentage != null) return Optional.of(memoryPercentage); + public Optional<JvmMemoryPercentage> getMemoryPercentage() { + if (memoryPercentage != null) return Optional.of(JvmMemoryPercentage.of(memoryPercentage)); if (isHostedVespa()) { int availableMemoryPercentage = getHostClusterId().isPresent() ? heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster : heapSizePercentageOfAvailableMemory; - if (getContainers().isEmpty()) return Optional.of(availableMemoryPercentage); // Node memory is not known + if (getContainers().isEmpty()) return Optional.of(JvmMemoryPercentage.of(availableMemoryPercentage)); // Node memory is not known // Node memory is known so convert available memory percentage to node memory percentage - double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb(); - double availableMemory = totalMemory - Host.memoryOverheadGb; - return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage)); + double totalMemory = dynamicHeapSize + ? getContainers().stream().mapToDouble(c -> c.getHostResource().realResources().memoryGb()).min().orElseThrow() + : getContainers().get(0).getHostResource().realResources().memoryGb(); + double jvmHeapDeductionGb = dynamicHeapSize ? onnxModelCost.aggregatedModelCostInBytes() / (1024D * 1024 * 1024) : 0; + double availableMemory = Math.max(0, totalMemory - Host.memoryOverheadGb - jvmHeapDeductionGb); + int memoryPercentage = (int) (availableMemory / totalMemory * availableMemoryPercentage); + logger.log(Level.FINE, () -> "memoryPercentage=%d, availableMemory=%f, totalMemory=%f, availableMemoryPercentage=%d, jvmHeapDeductionGb=%f" + .formatted(memoryPercentage, availableMemory, totalMemory, availableMemoryPercentage, jvmHeapDeductionGb)); + return Optional.of(JvmMemoryPercentage.of(memoryPercentage, availableMemory)); } return Optional.empty(); } @@ -203,49 +225,23 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private void createEndpoints(DeployState deployState) { if (!deployState.isHosted()) return; if (deployState.getProperties().applicationId().instance().isTester()) return; + // Add endpoints provided by the controller + List<String> hosts = getContainers().stream().map(AbstractService::getHostName).sorted().toList(); List<ApplicationClusterEndpoint> endpoints = new ArrayList<>(); - - List<String> hosts = getContainers().stream() - .map(AbstractService::getHostName) - .sorted() - .toList(); - - Set<ContainerEndpoint> endpointsFromController = deployState.getEndpoints(); - // Add zone-scoped endpoints if not provided by the controller - // TODO(mpolden): Remove this when controller always includes zone-scope endpoints, and config models < 8.230 are gone - if (endpointsFromController.stream().noneMatch(endpoint -> endpoint.scope() == ApplicationClusterEndpoint.Scope.zone)) { - for (String suffix : deployState.getProperties().zoneDnsSuffixes()) { - ApplicationClusterEndpoint.DnsName l4Name = ApplicationClusterEndpoint.DnsName.sharedL4NameFrom( - deployState.zone().system(), - ClusterSpec.Id.from(getName()), - deployState.getProperties().applicationId(), - suffix); - endpoints.add(ApplicationClusterEndpoint.builder() - .zoneScope() - .sharedL4Routing() - .dnsName(l4Name) - .hosts(hosts) - .clusterId(getName()) - .authMethod(ApplicationClusterEndpoint.AuthMethod.mtls) - .build()); - } - } - - // Include all endpoints provided by controller - endpointsFromController.stream() - .filter(ce -> ce.clusterId().equals(getName())) - .forEach(ce -> ce.names().forEach( - name -> endpoints.add(ApplicationClusterEndpoint.builder() - .scope(ce.scope()) - .weight(ce.weight().orElse(1)) // Default to weight=1 if not set - .routingMethod(ce.routingMethod()) - .dnsName(ApplicationClusterEndpoint.DnsName.from(name)) - .hosts(hosts) - .clusterId(getName()) - .authMethod(ce.authMethod()) - .build()) - )); - this.endpoints = List.copyOf(endpoints); + deployState.getEndpoints().stream() + .filter(ce -> ce.clusterId().equals(getName())) + .forEach(ce -> ce.names().forEach( + name -> endpoints.add(ApplicationClusterEndpoint.builder() + .scope(ce.scope()) + .weight(ce.weight().orElse(1)) + .routingMethod(ce.routingMethod()) + .dnsName(ApplicationClusterEndpoint.DnsName.from(name)) + .hosts(hosts) + .clusterId(getName()) + .authMethod(ce.authMethod()) + .build()) + )); + this.endpoints = Collections.unmodifiableList(endpoints); } @Override @@ -299,12 +295,15 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public void getConfig(QrStartConfig.Builder builder) { super.getConfig(builder); + var memoryPct = getMemoryPercentage().orElse(null); + int heapsize = memoryPct != null && memoryPct.availableMemoryGb().isPresent() + ? (int) (memoryPct.availableMemoryGb().getAsDouble() * 1024) : 1536; builder.jvm.verbosegc(true) .availableProcessors(0) .compressedClassSpaceSize(0) - .minHeapsize(1536) - .heapsize(1536); - getMemoryPercentage().ifPresent(percentage -> builder.jvm.heapSizeAsPercentageOfPhysicalMemory(percentage)); + .minHeapsize(heapsize) + .heapsize(heapsize); + if (memoryPct != null) builder.jvm.heapSizeAsPercentageOfPhysicalMemory(memoryPct.percentage()); } @Override @@ -373,6 +372,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public String name() { return getName(); } + public OnnxModelCost.Calculator onnxModelCost() { return onnxModelCost; } + public static class MbusParams { // the amount of the maxpendingbytes to process concurrently, typically 0.2 (20%) final Double maxConcurrentFactor; @@ -390,4 +391,14 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat } } + public static class UserConfiguredUrls { + + private final Set<String> urls = new HashSet<>(); + + public void add(String url) { urls.add(url); } + + public Set<String> all() { return urls; } + + } + } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java index 6bbc24e8739..3d4ec51c8d2 100755 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerCluster.java @@ -62,6 +62,7 @@ import com.yahoo.vespa.model.container.search.ContainerSearch; import com.yahoo.vespa.model.container.search.searchchain.SearchChains; import com.yahoo.vespa.model.content.Content; import com.yahoo.vespa.model.search.SearchCluster; + import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; @@ -71,6 +72,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.OptionalDouble; import java.util.Set; import java.util.TreeSet; @@ -142,7 +144,7 @@ public abstract class ContainerCluster<CONTAINER extends Container> private ContainerDocproc containerDocproc; private ContainerDocumentApi containerDocumentApi; private SecretStore secretStore; - private final ContainerThreadpool defaultHandlerThreadpool = new Handler.DefaultHandlerThreadpool(); + private final ContainerThreadpool defaultHandlerThreadpool; private boolean rpcServerEnabled = true; private boolean httpServerEnabled = true; @@ -185,6 +187,7 @@ public abstract class ContainerCluster<CONTAINER extends Container> addCommonVespaBundles(); addSimpleComponent(VoidRequestLog.class); addComponent(new DefaultThreadpoolProvider(this, defaultPoolNumThreads)); + defaultHandlerThreadpool = new Handler.DefaultHandlerThreadpool(deployState, null); addComponent(defaultHandlerThreadpool); addSimpleComponent(com.yahoo.concurrent.classlock.ClassLocking.class); addSimpleComponent("com.yahoo.container.jdisc.metric.MetricConsumerProviderProvider"); @@ -718,5 +721,11 @@ public abstract class ContainerCluster<CONTAINER extends Container> * Returns the percentage of host physical memory this application has specified for nodes in this cluster, * or empty if this is not specified by the application. */ - public Optional<Integer> getMemoryPercentage() { return Optional.empty(); } + public record JvmMemoryPercentage(int percentage, OptionalDouble availableMemoryGb) { + static JvmMemoryPercentage of(int percentage) { return new JvmMemoryPercentage(percentage, OptionalDouble.empty()); } + static JvmMemoryPercentage of(int percentage, double availableMemoryGb) { + return new JvmMemoryPercentage(percentage, OptionalDouble.of(availableMemoryGb)); + } + } + public Optional<JvmMemoryPercentage> getMemoryPercentage() { return Optional.empty(); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java index 906ef739ef1..1b47f59653e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java @@ -45,10 +45,6 @@ public class ContainerModelEvaluation implements private final RankProfileList rankProfileList; private final FileDistributedOnnxModels onnxModels; // For cluster specific ONNX model settings - public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList) { - this(cluster, rankProfileList, null); - } - public ContainerModelEvaluation(ApplicationContainerCluster cluster, RankProfileList rankProfileList, FileDistributedOnnxModels onnxModels) { this.rankProfileList = Objects.requireNonNull(rankProfileList, "rankProfileList cannot be null"); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java index fb4e62f5cd1..4b85c384951 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerThreadpool.java @@ -1,16 +1,17 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container; +import com.yahoo.config.model.builder.xml.XmlHelper; +import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.bundle.BundleInstantiationSpecification; import com.yahoo.container.handler.threadpool.ContainerThreadPool; import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig; import com.yahoo.container.handler.threadpool.ContainerThreadpoolImpl; import com.yahoo.osgi.provider.model.ComponentModel; -import com.yahoo.text.XML; import com.yahoo.vespa.model.container.component.SimpleComponent; import org.w3c.dom.Element; -import java.util.Optional; +import java.util.logging.Level; /** * Component definition for a {@link java.util.concurrent.Executor} using {@link ContainerThreadPool}. @@ -20,18 +21,50 @@ import java.util.Optional; public abstract class ContainerThreadpool extends SimpleComponent implements ContainerThreadpoolConfig.Producer { private final String name; - private final UserOptions userOptions; + private final UserOptions options; - public ContainerThreadpool(String name, UserOptions userOptions) { + record UserOptions(Double max, Double min, Double queue){} + + protected ContainerThreadpool(DeployState ds, String name, Element parent) { super(new ComponentModel( BundleInstantiationSpecification.fromStrings( "threadpool@" + name, ContainerThreadpoolImpl.class.getName(), null))); this.name = name; - this.userOptions = userOptions; + var threadpoolElem = XmlHelper.getOptionalChild(parent, "threadpool").orElse(null); + if (threadpoolElem == null) options = new UserOptions(null, null, null); + else { + // TODO Vespa 9 Remove min-threads, max-threads and queue-size + Double max = null; + Double min = null; + Double queue = null; + var minElem = XmlHelper.getOptionalChild(threadpoolElem, "min-threads").orElse(null); + if (minElem != null) ds.getDeployLogger().logApplicationPackage(Level.WARNING, "For <threadpool>: <min-threads> is deprecated, use <threads> instead"); + var maxElem = XmlHelper.getOptionalChild(threadpoolElem, "max-threads").orElse(null); + if (maxElem != null) ds.getDeployLogger().logApplicationPackage(Level.WARNING, "For <threadpool>: <max-threads> is deprecated, use <threads> with 'boost' instead"); + var queueElem = XmlHelper.getOptionalChild(threadpoolElem, "queue").orElse(null); + var queueSizeElem = XmlHelper.getOptionalChild(threadpoolElem, "queue-size").orElse(null); + if (queueSizeElem != null) ds.getDeployLogger().logApplicationPackage(Level.WARNING, "For <threadpool>: <queue-size> is deprecated, use <queue> instead"); + var threadsElem = XmlHelper.getOptionalChild(threadpoolElem, "threads").orElse(null); + if (threadsElem != null) { + min = parseMultiplier(threadsElem.getTextContent()); + max = threadsElem.hasAttribute("boost") ? parseMultiplier(threadsElem.getAttribute("boost")) : min; + } else if (minElem != null) { + min = parseFixed(minElem.getTextContent()); + } + if (max == null && maxElem != null) { + max = parseFixed(maxElem.getTextContent()); + } + if (queueElem != null) queue = parseMultiplier(queueElem.getTextContent()); + else if (queueSizeElem != null) queue = parseFixed(queueSizeElem.getTextContent()); + options = new UserOptions(max, min, queue); + } } + private static Double parseMultiplier(String text) { return -parseFixed(text); } + private static Double parseFixed(String text) { return Double.parseDouble(text); } + // Must be implemented by subclasses to set values that may be overridden by user options. protected abstract void setDefaultConfigValues(ContainerThreadpoolConfig.Builder builder); @@ -40,35 +73,20 @@ public abstract class ContainerThreadpool extends SimpleComponent implements Con setDefaultConfigValues(builder); builder.name(this.name); - if (userOptions != null) { - builder.maxThreads(userOptions.maxThreads); - builder.minThreads(userOptions.minThreads); - builder.queueSize(userOptions.queueSize); + if (options.max() != null) { + int max = (int) Math.round(options.max()); + if (options.max() != 0 && max == 0) max = options.max() > 0 ? 1 : -1; + builder.maxThreads(max); } - } - - public static class UserOptions { - private final int maxThreads; - private final int minThreads; - private final int queueSize; - - private UserOptions(int maxThreads, int minThreads, int queueSize) { - this.maxThreads = maxThreads; - this.minThreads = minThreads; - this.queueSize = queueSize; - } - - public static Optional<UserOptions> fromXml(Element xml) { - Element element = XML.getChild(xml, "threadpool"); - if (element == null) return Optional.empty(); - return Optional.of(new UserOptions( - intOption(element, "max-threads"), - intOption(element, "min-threads"), - intOption(element, "queue-size"))); + if (options.min() != null) { + int min = (int) Math.round(options.min()); + if (options.min() != 0 && min == 0) min = options.min() > 0 ? 1 : -1; + builder.minThreads(min); } - - private static int intOption(Element element, String name) { - return Integer.parseInt(XML.getChild(element, name).getTextContent()); + if (options.queue() != null) { + int queue = (int) Math.round(options.queue()); + if (options.queue() != 0 && queue == 0) queue = options.queue() > 0 ? 1 : -1; + builder.queueSize(queue); } } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java index 205848e1b67..d02b7d0de5f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java @@ -5,10 +5,9 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.BertBaseEmbedderConfig; -import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.w3c.dom.Element; -import static com.yahoo.text.XML.getChild; import static com.yahoo.text.XML.getChildValue; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; @@ -17,8 +16,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI */ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConfig.Producer { - private final ModelReference model; - private final ModelReference vocab; + private final ModelReference modelRef; + private final ModelReference vocabRef; private final Integer maxTokens; private final String transformerInputIds; private final String transformerAttentionMask; @@ -33,10 +32,11 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf private final Integer onnxGpuDevice; - public BertEmbedder(Element xml, DeployState state) { + public BertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml); - model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state); - vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state); + var model = Model.fromXml(state, xml, "transformer-model").orElseThrow(); + modelRef = model.modelReference(); + vocabRef = Model.fromXml(state, xml, "tokenizer-vocab").orElseThrow().modelReference(); maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null); transformerInputIds = getChildValue(xml, "transformer-input-ids").orElse(null); transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null); @@ -49,11 +49,12 @@ public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConf onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); + model.registerOnnxModelCost(cluster); } @Override public void getConfig(BertBaseEmbedderConfig.Builder b) { - b.transformerModel(model).tokenizerVocab(vocab); + b.transformerModel(modelRef).tokenizerVocab(vocabRef); if (maxTokens != null) b.transformerMaxTokens(maxTokens); if (transformerInputIds != null) b.transformerInputIds(transformerInputIds); if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java index c0fdfe3dc64..66e3b1c9dfd 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/ColBertEmbedder.java @@ -5,13 +5,9 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.ColBertEmbedderConfig; -import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; -import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.w3c.dom.Element; -import java.util.Optional; - -import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChild; import static com.yahoo.text.XML.getChildValue; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; @@ -20,8 +16,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI * @author bergum */ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderConfig.Producer { - private final ModelReference model; - private final ModelReference vocab; + private final ModelReference modelRef; + private final ModelReference vocabRef; private final Integer maxQueryTokens; @@ -40,13 +36,13 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo private final Integer onnxIntraopThreads; private final Integer onnxGpuDevice; - public ColBertEmbedder(Element xml, DeployState state) { + public ColBertEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.ColBertEmbedder", INTEGRATION_BUNDLE_NAME, xml); - var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); - model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); - vocab = getOptionalChild(xml, "tokenizer-model") - .map(elem -> ModelIdResolver.resolveToModelReference(elem, state)) - .orElseGet(() -> resolveDefaultVocab(transformerModelElem, state)); + var model = Model.fromXml(state, xml, "transformer-model").orElseThrow(); + modelRef = model.modelReference(); + vocabRef = Model.fromXml(state, xml, "tokenizer-model") + .map(Model::modelReference) + .orElseGet(() -> resolveDefaultVocab(model, state)); maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null); maxQueryTokens = getChildValue(xml, "max-query-tokens").map(Integer::parseInt).orElse(null); maxDocumentTokens = getChildValue(xml, "max-document-tokens").map(Integer::parseInt).orElse(null); @@ -60,21 +56,20 @@ public class ColBertEmbedder extends TypedComponent implements ColBertEmbedderCo onnxInteropThreads = getChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); - + model.registerOnnxModelCost(cluster); } - private static ModelReference resolveDefaultVocab(Element model, DeployState state) { - if (state.isHosted() && model.hasAttribute("model-id")) { - var implicitVocabId = model.getAttribute("model-id") + "-vocab"; - return ModelIdResolver.resolveToModelReference( - "tokenizer-model", Optional.of(implicitVocabId), Optional.empty(), Optional.empty(), state); + private static ModelReference resolveDefaultVocab(Model model, DeployState state) { + var modelId = model.modelId().orElse(null); + if (state.isHosted() && modelId != null) { + return Model.fromParams(state, model.name(), modelId + "-vocab", null, null).modelReference(); } throw new IllegalArgumentException("'tokenizer-model' must be specified"); } @Override public void getConfig(ColBertEmbedderConfig.Builder b) { - b.transformerModel(model).tokenizerPath(vocab); + b.transformerModel(modelRef).tokenizerPath(vocabRef); if (maxTokens != null) b.transformerMaxTokens(maxTokens); if (transformerInputIds != null) b.transformerInputIds(transformerInputIds); if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java index 31031aa5bf2..969db6553e6 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Handler.java @@ -1,9 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.component; +import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig; import com.yahoo.osgi.provider.model.ComponentModel; import com.yahoo.vespa.model.container.ContainerThreadpool; +import org.w3c.dom.Element; import java.util.ArrayList; import java.util.Arrays; @@ -76,8 +78,8 @@ public class Handler extends Component<Component<?, ?>, ComponentModel> { */ public static class DefaultHandlerThreadpool extends ContainerThreadpool { - public DefaultHandlerThreadpool() { - super("default-handler-common", null); + public DefaultHandlerThreadpool(DeployState ds, Element options) { + super(ds, "default-handler-common", options); } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java index f4017339699..af47bee137a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java @@ -5,12 +5,9 @@ package com.yahoo.vespa.model.container.component; import com.yahoo.config.ModelReference; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; -import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; import org.w3c.dom.Element; -import java.util.Optional; - -import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChild; import static com.yahoo.text.XML.getChildValue; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; @@ -19,8 +16,8 @@ import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATI * @author bjorncs */ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEmbedderConfig.Producer { - private final ModelReference model; - private final ModelReference vocab; + private final ModelReference modelRef; + private final ModelReference vocabRef; private final Integer maxTokens; private final String transformerInputIds; private final String transformerAttentionMask; @@ -33,13 +30,13 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm private final Integer onnxGpuDevice; private final String poolingStrategy; - public HuggingFaceEmbedder(Element xml, DeployState state) { + public HuggingFaceEmbedder(ApplicationContainerCluster cluster, Element xml, DeployState state) { super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml); - var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); - model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); - vocab = getOptionalChild(xml, "tokenizer-model") - .map(elem -> ModelIdResolver.resolveToModelReference(elem, state)) - .orElseGet(() -> resolveDefaultVocab(transformerModelElem, state)); + var model = Model.fromXml(state, xml, "transformer-model").orElseThrow(); + modelRef = model.modelReference(); + vocabRef = Model.fromXml(state, xml, "tokenizer-model") + .map(Model::modelReference) + .orElseGet(() -> resolveDefaultVocab(model, state)); maxTokens = getChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null); transformerInputIds = getChildValue(xml, "transformer-input-ids").orElse(null); transformerAttentionMask = getChildValue(xml, "transformer-attention-mask").orElse(null); @@ -51,20 +48,20 @@ public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEm onnxIntraopThreads = getChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); onnxGpuDevice = getChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); poolingStrategy = getChildValue(xml, "pooling-strategy").orElse(null); + model.registerOnnxModelCost(cluster); } - private static ModelReference resolveDefaultVocab(Element model, DeployState state) { - if (state.isHosted() && model.hasAttribute("model-id")) { - var implicitVocabId = model.getAttribute("model-id") + "-vocab"; - return ModelIdResolver.resolveToModelReference( - "tokenizer-model", Optional.of(implicitVocabId), Optional.empty(), Optional.empty(), state); + private static ModelReference resolveDefaultVocab(Model model, DeployState state) { + var modelId = model.modelId().orElse(null); + if (state.isHosted() && modelId != null) { + return Model.fromParams(state, model.name(), modelId + "-vocab", null, null).modelReference(); } throw new IllegalArgumentException("'tokenizer-model' must be specified"); } @Override public void getConfig(HuggingFaceEmbedderConfig.Builder b) { - b.transformerModel(model).tokenizerPath(vocab); + b.transformerModel(modelRef).tokenizerPath(vocabRef); if (maxTokens != null) b.transformerMaxTokens(maxTokens); if (transformerInputIds != null) b.transformerInputIds(transformerInputIds); if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java index 0bf5491e872..e9ac93caa68 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java @@ -7,7 +7,6 @@ import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig; import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Padding; import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig.Truncation; import com.yahoo.text.XML; -import com.yahoo.vespa.model.container.xml.ModelIdResolver; import org.w3c.dom.Element; import java.util.Map; @@ -26,7 +25,7 @@ public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceT super("com.yahoo.language.huggingface.HuggingFaceTokenizer", LINGUISTICS_BUNDLE_NAME, xml); for (Element element : XML.getChildren(xml, "model")) { var lang = element.hasAttribute("language") ? element.getAttribute("language") : "unknown"; - langToModel.put(lang, ModelIdResolver.resolveToModelReference(element, state)); + langToModel.put(lang, Model.fromXml(state, element).modelReference()); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java new file mode 100644 index 00000000000..76d93c38aee --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/Model.java @@ -0,0 +1,69 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.container.component; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.application.api.ApplicationFile; +import com.yahoo.config.model.builder.xml.XmlHelper; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.path.Path; +import com.yahoo.vespa.model.container.ApplicationContainerCluster; +import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import org.w3c.dom.Element; + +import java.net.URI; +import java.util.Objects; +import java.util.Optional; + +/** + * Represents a model, e.g ONNX model for an embedder. + * + * @author bjorncs + */ +class Model { + private final String paramName; + private final String modelId; + private final URI url; + private final ApplicationFile file; + private final ModelReference ref; + + private Model(DeployState ds, String paramName, String modelId, URI url, Path file) { + this.paramName = Objects.requireNonNull(paramName); + if (modelId == null && url == null && file == null) + throw new IllegalArgumentException("At least one of 'model-id', 'url' or 'path' must be specified"); + this.modelId = modelId; + this.url = url; + this.file = file != null ? ds.getApplicationPackage().getFile(file) : null; + this.ref = ModelIdResolver.resolveToModelReference( + paramName, Optional.ofNullable(modelId), Optional.ofNullable(url).map(URI::toString), + Optional.ofNullable(file).map(Path::toString), ds); + } + + static Model fromParams(DeployState ds, String paramName, String modelId, URI url, Path file) { + return new Model(ds, paramName, modelId, url, file); + } + + static Optional<Model> fromXml(DeployState ds, Element parent, String name) { + return XmlHelper.getOptionalChild(parent, name).map(e -> fromXml(ds, e)); + } + + static Model fromXml(DeployState ds, Element model) { + var modelId = XmlHelper.getOptionalAttribute(model, "model-id").orElse(null); + var url = XmlHelper.getOptionalAttribute(model, "url").map(URI::create).orElse(null); + var path = XmlHelper.getOptionalAttribute(model, "path").map(Path::fromString).orElse(null); + return new Model(ds, model.getTagName(), modelId, url, path); + } + + void registerOnnxModelCost(ApplicationContainerCluster c) { + var resolvedUrl = resolvedUrl().orElse(null); + if (file != null) c.onnxModelCost().registerModel(file); + else if (resolvedUrl != null) c.onnxModelCost().registerModel(resolvedUrl); + } + + String name() { return paramName; } + Optional<String> modelId() { return Optional.ofNullable(modelId); } + Optional<URI> url() { return Optional.ofNullable(url); } + Optional<URI> resolvedUrl() { return ref.url().map(u -> URI.create(u.value())); } + Optional<ApplicationFile> file() { return Optional.ofNullable(file); } + ModelReference modelReference() { return ref; } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java index c0431d01784..2354298779d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/chain/ChainedComponent.java @@ -29,8 +29,6 @@ public class ChainedComponent<T extends ChainedComponentModel> extends Component private ComponentId namespace() { var owner = getParent().getParent(); - return (owner instanceof Chain) ? - ((Chain) owner).getGlobalComponentId() : - null; + return (owner instanceof Chain<?> chain) ? chain.getGlobalComponentId() : null; } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java index f0296d49472..3261d454b4f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/search/ContainerSearch.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.search; +import com.yahoo.config.application.api.ApplicationPackage; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.QrSearchersConfig; import com.yahoo.prelude.semantics.SemanticRulesConfig; @@ -56,12 +57,14 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> private QueryProfiles queryProfiles; private SemanticRules semanticRules; private PageTemplates pageTemplates; + private ApplicationPackage app; public ContainerSearch(DeployState deployState, ApplicationContainerCluster cluster, SearchChains chains) { super(chains); this.globalPhase = deployState.featureFlags().enableGlobalPhase(); this.useReconfigurableDispatcher = deployState.featureFlags().useReconfigurableDispatcher(); this.schemasWithGlobalPhase = getSchemasWithGlobalPhase(deployState); + this.app = deployState.getApplicationPackage(); this.owningCluster = cluster; owningCluster.addComponent(Component.fromClassAndBundle(CompiledQueryProfileRegistry.class, SEARCH_AND_DOCPROC_BUNDLE)); @@ -96,6 +99,9 @@ public class ContainerSearch extends ContainerSubsystem<SearchChains> if ( ! schemasWithGlobalPhase.contains(documentDb.getSchemaName())) continue; var factory = new RankProfilesEvaluatorComponent(documentDb); if ( ! owningCluster.getComponentsMap().containsKey(factory.getComponentId())) { + var onnxModels = documentDb.getDerivedConfiguration().getRankProfileList().getOnnxModels(); + onnxModels.asMap().forEach( + (__, model) -> owningCluster.onnxModelCost().registerModel(app.getFile(model.getFilePath()))); owningCluster.addComponent(factory); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 35b0213bf59..1874b5fa19a 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -39,6 +39,8 @@ import com.yahoo.container.jdisc.DataplaneProxyService; import com.yahoo.container.logging.AccessLog; import com.yahoo.container.logging.FileConnectionLog; import com.yahoo.io.IOUtils; +import com.yahoo.jdisc.http.filter.security.cloud.config.CloudTokenDataPlaneFilterConfig; +import com.yahoo.jdisc.http.filter.security.cloud.config.CloudTokenDataPlaneFilterConfig.Builder; import com.yahoo.jdisc.http.server.jetty.DataplaneProxyCredentials; import com.yahoo.jdisc.http.server.jetty.VoidRequestLog; import com.yahoo.osgi.provider.model.ComponentModel; @@ -68,7 +70,6 @@ import com.yahoo.vespa.model.container.Container; import com.yahoo.vespa.model.container.ContainerCluster; import com.yahoo.vespa.model.container.ContainerModel; import com.yahoo.vespa.model.container.ContainerModelEvaluation; -import com.yahoo.vespa.model.container.ContainerThreadpool; import com.yahoo.vespa.model.container.DataplaneProxy; import com.yahoo.vespa.model.container.IdentityProvider; import com.yahoo.vespa.model.container.PlatformBundles; @@ -240,10 +241,9 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private void addParameterStoreValidationHandler(ApplicationContainerCluster cluster, DeployState deployState) { + if ( ! deployState.isHosted()) return; // Always add platform bundle. Cannot be controlled by a feature flag as platform bundle cannot change. - if(deployState.isHosted()) { - cluster.addPlatformBundle(PlatformBundles.absoluteBundlePath("jdisc-cloud-aws")); - } + cluster.addPlatformBundle(PlatformBundles.absoluteBundlePath("jdisc-cloud-aws")); if (deployState.zone().system().isPublic()) { BindingPattern bindingPattern = SystemBindingPattern.fromHttpPath("/validate-secret-store"); Handler handler = new Handler( @@ -459,7 +459,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private static void addCloudDataPlaneFilter(DeployState deployState, ApplicationContainerCluster cluster) { if (!deployState.isHosted() || !deployState.zone().system().isPublic()) return; - var dataplanePort = getMtlsDataplanePort(deployState, cluster); + var dataplanePort = getMtlsDataplanePort(deployState); // Setup secure filter chain var secureChain = new HttpFilterChain("cloud-data-plane-secure", HttpFilterChain.Type.SYSTEM); secureChain.addInnerComponent(new CloudDataPlaneFilter(cluster, deployState)); @@ -594,7 +594,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { String serverName = server.getComponentId().getName(); // If the deployment contains certificate/private key reference, setup TLS port - var builder = HostedSslConnectorFactory.builder(serverName, getMtlsDataplanePort(state, cluster)) + var builder = HostedSslConnectorFactory.builder(serverName, getMtlsDataplanePort(state)) .proxyProtocol(true, state.getProperties().featureFlags().enableProxyProtocolMixedMode()) .tlsCiphersOverride(state.getProperties().tlsCiphersOverride()) .endpointConnectionTtl(state.getProperties().endpointConnectionTtl()); @@ -627,19 +627,19 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private void addCloudTokenSupport(DeployState state, ApplicationContainerCluster cluster) { var server = cluster.getHttp().getHttpServer().get(); - if (!enableTokenSupport(state, cluster)) return; + if (!enableTokenSupport(state)) return; Set<String> tokenEndpoints = tokenEndpoints(state).stream() .map(ContainerEndpoint::names) .flatMap(Collection::stream) .collect(Collectors.toSet()); var endpointCert = state.endpointCertificateSecrets().orElseThrow(); - int tokenPort = getTokenDataplanePort(state, cluster).orElseThrow(); + int tokenPort = getTokenDataplanePort(state).orElseThrow(); // Set up component to generate proxy cert if token support is enabled cluster.addSimpleComponent(DataplaneProxyCredentials.class); cluster.addSimpleComponent(DataplaneProxyService.class); var dataplaneProxy = new DataplaneProxy( - getMtlsDataplanePort(state, cluster), + getMtlsDataplanePort(state), tokenPort, endpointCert.certificate(), endpointCert.key(), @@ -659,13 +659,24 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { // Setup token filter chain var tokenChain = new HttpFilterChain("cloud-token-data-plane-secure", HttpFilterChain.Type.SYSTEM); - tokenChain.addInnerComponent(new CloudTokenDataPlaneFilter(cluster, state)); + var tokenFilter = new CloudTokenDataPlaneFilter(cluster, state); + tokenChain.addInnerComponent(tokenFilter); cluster.getHttp().getFilterChains().add(tokenChain); // Set as default filter for token port cluster.getHttp().getHttpServer().orElseThrow().getConnectorFactories().stream() .filter(c -> c.getListenPort() == tokenPort).findAny().orElseThrow() .setDefaultRequestFilterChain(tokenChain.getComponentId()); + + // Set up handler that tells what fingerprints are known to the container + class CloudTokenDataPlaneHandler extends Handler implements CloudTokenDataPlaneFilterConfig.Producer { + CloudTokenDataPlaneHandler() { + super(new ComponentModel("com.yahoo.jdisc.http.filter.security.cloud.CloudTokenDataPlaneHandler", null, "jdisc-security-filters", null)); + addServerBindings(SystemBindingPattern.fromHttpPortAndPath(Defaults.getDefaults().vespaWebServicePort(), "/data-plane-tokens/v1")); + } + @Override public void getConfig(Builder builder) { tokenFilter.getConfig(builder); } + } + cluster.addComponent(new CloudTokenDataPlaneHandler()); } // Returns the client certificates of the clients defined for an application cluster @@ -710,7 +721,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { } private Http buildHttp(DeployState deployState, ApplicationContainerCluster cluster, Element httpElement, ConfigModelContext context) { - Http http = new HttpBuilder(portBindingOverride(deployState, context, cluster)).build(deployState, cluster, httpElement); + Http http = new HttpBuilder(portBindingOverride(deployState, context)).build(deployState, cluster, httpElement); if (networking == Networking.disable) http.removeAllServers(); @@ -778,6 +789,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { !container.getHostResource().realResources().gpuResources().isZero()); onnxModel.setGpuDevice(gpuDevice, hasGpu); } + cluster.onnxModelCost().registerModel(context.getApplicationPackage().getFile(onnxModel.getFilePath())); } cluster.setModelEvaluation(new ContainerModelEvaluation(cluster, profiles, models)); @@ -815,7 +827,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { cluster.addSearchAndDocprocBundles(); addIncludes(processingElement); cluster.setProcessingChains(new DomProcessingBuilder(null).build(deployState, cluster, processingElement), - serverBindings(deployState, context, processingElement, ProcessingChains.defaultBindings, cluster).toArray(BindingPattern[]::new)); + serverBindings(deployState, context, processingElement, ProcessingChains.defaultBindings).toArray(BindingPattern[]::new)); validateAndAddConfiguredComponents(deployState, cluster, processingElement, "renderer", ContainerModelBuilder::validateRendererElement); } @@ -840,7 +852,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private void addUserHandlers(DeployState deployState, ApplicationContainerCluster cluster, Element spec, ConfigModelContext context) { for (Element component: XML.getChildren(spec, "handler")) { cluster.addComponent( - new DomHandlerBuilder(cluster, portBindingOverride(deployState, context, cluster)).build(deployState, cluster, component)); + new DomHandlerBuilder(cluster, portBindingOverride(deployState, context)).build(deployState, cluster, component)); } } @@ -1128,28 +1140,28 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { private void addSearchHandler(DeployState deployState, ApplicationContainerCluster cluster, Element searchElement, ConfigModelContext context) { var bindingPatterns = List.<BindingPattern>of(SearchHandler.DEFAULT_BINDING); if (isHostedTenantApplication(context)) { - bindingPatterns = SearchHandler.bindingPattern(getDataplanePorts(deployState, cluster)); + bindingPatterns = SearchHandler.bindingPattern(getDataplanePorts(deployState)); } - SearchHandler searchHandler = new SearchHandler(cluster, - serverBindings(deployState, context, searchElement, bindingPatterns, cluster), - ContainerThreadpool.UserOptions.fromXml(searchElement).orElse(null)); + SearchHandler searchHandler = new SearchHandler(deployState, cluster, + serverBindings(deployState, context, searchElement, bindingPatterns), + searchElement); cluster.addComponent(searchHandler); // Add as child to SearchHandler to get the correct chains config. searchHandler.addComponent(Component.fromClassAndBundle(SearchHandler.EXECUTION_FACTORY, PlatformBundles.SEARCH_AND_DOCPROC_BUNDLE)); } - private List<BindingPattern> serverBindings(DeployState deployState, ConfigModelContext context, Element searchElement, Collection<BindingPattern> defaultBindings, ApplicationContainerCluster cluster) { + private List<BindingPattern> serverBindings(DeployState deployState, ConfigModelContext context, Element searchElement, Collection<BindingPattern> defaultBindings) { List<Element> bindings = XML.getChildren(searchElement, "binding"); if (bindings.isEmpty()) return List.copyOf(defaultBindings); - return toBindingList(deployState, context, bindings, cluster); + return toBindingList(deployState, context, bindings); } - private List<BindingPattern> toBindingList(DeployState deployState, ConfigModelContext context, List<Element> bindingElements, ApplicationContainerCluster cluster) { + private List<BindingPattern> toBindingList(DeployState deployState, ConfigModelContext context, List<Element> bindingElements) { List<BindingPattern> result = new ArrayList<>(); - var portOverride = isHostedTenantApplication(context) ? getDataplanePorts(deployState, cluster) : Set.<Integer>of(); + var portOverride = isHostedTenantApplication(context) ? getDataplanePorts(deployState) : Set.<Integer>of(); for (Element element: bindingElements) { String text = element.getTextContent().trim(); if (!text.isEmpty()) @@ -1173,13 +1185,13 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { ContainerDocumentApi.HandlerOptions documentApiOptions = DocumentApiOptionsBuilder.build(documentApiElement); Element ignoreUndefinedFields = XML.getChild(documentApiElement, "ignore-undefined-fields"); - return new ContainerDocumentApi(cluster, documentApiOptions, - "true".equals(XML.getValue(ignoreUndefinedFields)), portBindingOverride(deployState, context, cluster)); + return new ContainerDocumentApi(deployState, cluster, documentApiOptions, + "true".equals(XML.getValue(ignoreUndefinedFields)), portBindingOverride(deployState, context)); } - private Set<Integer> portBindingOverride(DeployState deployState, ConfigModelContext context, ApplicationContainerCluster cluster) { + private Set<Integer> portBindingOverride(DeployState deployState, ConfigModelContext context) { return isHostedTenantApplication(context) - ? getDataplanePorts(deployState, cluster) + ? getDataplanePorts(deployState) : Set.<Integer>of(); } @@ -1438,18 +1450,18 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { } - private static Set<Integer> getDataplanePorts(DeployState ds, ApplicationContainerCluster cluster) { - var tokenPort = getTokenDataplanePort(ds, cluster); - var mtlsPort = getMtlsDataplanePort(ds, cluster); + private static Set<Integer> getDataplanePorts(DeployState ds) { + var tokenPort = getTokenDataplanePort(ds); + var mtlsPort = getMtlsDataplanePort(ds); return tokenPort.isPresent() ? Set.of(mtlsPort, tokenPort.getAsInt()) : Set.of(mtlsPort); } - private static int getMtlsDataplanePort(DeployState ds, ApplicationContainerCluster cluster) { - return enableTokenSupport(ds, cluster) ? 8443 : 4443; + private static int getMtlsDataplanePort(DeployState ds) { + return enableTokenSupport(ds) ? 8443 : 4443; } - private static OptionalInt getTokenDataplanePort(DeployState ds, ApplicationContainerCluster cluster) { - return enableTokenSupport(ds, cluster) ? OptionalInt.of(8444) : OptionalInt.empty(); + private static OptionalInt getTokenDataplanePort(DeployState ds) { + return enableTokenSupport(ds) ? OptionalInt.of(8444) : OptionalInt.empty(); } private static Set<ContainerEndpoint> tokenEndpoints(DeployState deployState) { @@ -1458,7 +1470,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { .collect(Collectors.toSet()); } - private static boolean enableTokenSupport(DeployState state, ApplicationContainerCluster cluster) { + private static boolean enableTokenSupport(DeployState state) { Set<ContainerEndpoint> tokenEndpoints = tokenEndpoints(state); return state.isHosted() && state.zone().system().isPublic() && ! tokenEndpoints.isEmpty(); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java index bb1d0af1db9..cdbe62720b9 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/DocumentApiOptionsBuilder.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.model.container.xml; import com.yahoo.text.XML; import com.yahoo.vespa.model.clients.ContainerDocumentApi; -import com.yahoo.vespa.model.container.ContainerThreadpool; import org.w3c.dom.Element; import java.util.ArrayList; @@ -19,13 +18,7 @@ public class DocumentApiOptionsBuilder { private static final Logger log = Logger.getLogger(DocumentApiOptionsBuilder.class.getName()); public static ContainerDocumentApi.HandlerOptions build(Element spec) { - return new ContainerDocumentApi.HandlerOptions(getBindings(spec), threadpoolOptions(spec, "http-client-api")); - } - - private static ContainerThreadpool.UserOptions threadpoolOptions(Element spec, String elementName) { - Element element = XML.getChild(spec, elementName); - if (element == null) return null; - return ContainerThreadpool.UserOptions.fromXml(element).orElse(null); + return new ContainerDocumentApi.HandlerOptions(getBindings(spec), XML.getChild(spec, "http-client-api")); } private static List<String> getBindings(Element spec) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java index be3ca0b8aa9..14216dd8855 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.model.container.xml; import com.yahoo.config.ModelReference; import com.yahoo.config.UrlReference; -import com.yahoo.config.model.builder.xml.XmlHelper; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.text.XML; import org.w3c.dom.Element; @@ -88,13 +87,6 @@ public class ModelIdResolver { } } - - public static ModelReference resolveToModelReference(Element elem, DeployState state) { - return resolveToModelReference( - elem.getTagName(), XmlHelper.getOptionalAttribute(elem, "model-id"), - XmlHelper.getOptionalAttribute(elem, "url"), XmlHelper.getOptionalAttribute(elem, "path"), state); - } - public static ModelReference resolveToModelReference( String paramName, Optional<String> id, Optional<String> url, Optional<String> path, DeployState state) { if (id.isEmpty()) return createModelReference(Optional.empty(), url, path, state); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java index 6cfef153fee..3cd296c1469 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/SearchHandler.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.xml; +import com.yahoo.config.model.deploy.DeployState; import com.yahoo.container.bundle.BundleInstantiationSpecification; import com.yahoo.container.handler.threadpool.ContainerThreadpoolConfig; import com.yahoo.vespa.model.container.ApplicationContainerCluster; @@ -9,6 +10,7 @@ import com.yahoo.vespa.model.container.component.BindingPattern; import com.yahoo.vespa.model.container.component.SystemBindingPattern; import com.yahoo.vespa.model.container.component.chain.ProcessingHandler; import com.yahoo.vespa.model.container.search.searchchain.SearchChains; +import org.w3c.dom.Element; import java.util.Collection; import java.util.List; @@ -30,10 +32,11 @@ class SearchHandler extends ProcessingHandler<SearchChains> { static final BundleInstantiationSpecification HANDLER_SPEC = fromSearchAndDocproc(HANDLER_CLASSNAME); static final BindingPattern DEFAULT_BINDING = SystemBindingPattern.fromHttpPath("/search/*"); - SearchHandler(ApplicationContainerCluster cluster, + SearchHandler(DeployState ds, + ApplicationContainerCluster cluster, List<BindingPattern> bindings, - ContainerThreadpool.UserOptions threadpoolOptions) { - super(cluster.getSearchChains(), HANDLER_SPEC, new Threadpool(threadpoolOptions)); + Element threadpoolOptions) { + super(cluster.getSearchChains(), HANDLER_SPEC, new Threadpool(ds, threadpoolOptions)); bindings.forEach(this::addServerBindings); } @@ -46,8 +49,8 @@ class SearchHandler extends ProcessingHandler<SearchChains> { private static class Threadpool extends ContainerThreadpool { - Threadpool(UserOptions options) { - super("search-handler", options); + Threadpool(DeployState ds, Element options) { + super(ds, "search-handler", options); } @Override |