diff options
author | Ola Aunrønning <olaa@yahooinc.com> | 2023-06-08 10:58:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-08 10:58:17 +0200 |
commit | 79de2d78433c11932357d0c244704b16fc87da21 (patch) | |
tree | 382a06ce4e1217e395fbba25c1ce74a1afaadbe5 /config-model | |
parent | 96d3814b80a693ee46640ae89a88fdb2d78dcd40 (diff) | |
parent | d7568f0d450df2287657ac18c37955a1867496f5 (diff) |
Merge branch 'master' into olaa/dataplane-proxy-config
Diffstat (limited to 'config-model')
50 files changed, 1032 insertions, 379 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/ConfigModelContext.java b/config-model/src/main/java/com/yahoo/config/model/ConfigModelContext.java index 13d87b852e4..b4b3dccd440 100644 --- a/config-model/src/main/java/com/yahoo/config/model/ConfigModelContext.java +++ b/config-model/src/main/java/com/yahoo/config/model/ConfigModelContext.java @@ -2,16 +2,21 @@ package com.yahoo.config.model; import com.yahoo.config.application.api.ApplicationPackage; +import com.yahoo.config.application.api.Bcp.Group; import com.yahoo.config.application.api.DeployLogger; +import com.yahoo.config.application.api.DeploymentSpec; import com.yahoo.config.model.api.ModelContext; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.TreeConfigProducer; import com.yahoo.config.provision.ClusterInfo; +import com.yahoo.config.provision.ClusterInfo.Builder; +import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.model.VespaModel; import java.time.Duration; import java.util.Comparator; +import java.util.Optional; import java.util.stream.Stream; /** @@ -72,14 +77,22 @@ public final class ConfigModelContext { /** Returns a cluster info builder pre-populated with info known in this context. */ public ClusterInfo.Builder clusterInfo() { - var instance = getApplicationPackage().getDeploymentSpec().instance(properties().applicationId().instance()); - if ( ! instance.isPresent()) return new ClusterInfo.Builder(); - var maxDeadline = instance.get().bcp().groups().stream() - .filter(group -> group.memberRegions().contains(properties().zone().region())) - .map(group -> group.deadline()) - .min(Comparator.comparing(deadline -> deadline)) - .orElse(Duration.ofMinutes(0)); - return new ClusterInfo.Builder().bcpDeadline(maxDeadline); + DeploymentSpec spec = getApplicationPackage().getDeploymentSpec(); + ClusterInfo.Builder builder = new ClusterInfo.Builder(); + spec.hostTTL(properties().applicationId().instance(), deployState.zone().environment(), deployState.zone().region()) + .ifPresent(ttl -> { + ZoneId zoneId = ZoneId.from(deployState.zone().environment(), deployState.zone().region()); + if (spec.cloudAccount(deployState.zone().cloud().name(), properties().applicationId().instance(), zoneId).isUnspecified()) + throw new IllegalArgumentException("deployment spec specifies host TTL for " + zoneId + + " but no cloud account is specified for this zone"); + }); + spec.instance(properties().applicationId().instance()) + .flatMap(instance -> instance.bcp().groups().stream() + .filter(group -> group.memberRegions().contains(properties().zone().region())) + .map(Group::deadline) + .min(Comparator.naturalOrder())) + .ifPresent(builder::bcpDeadline); + return builder; } /** diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java index 56f999f85b4..0e39b7b5c3a 100644 --- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java +++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java @@ -78,7 +78,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea private int rpc_num_targets = 2; private int rpc_events_before_wakeup = 1; private int mbus_network_threads = 1; - private int heapSizePercentage = ApplicationContainerCluster.defaultHeapSizePercentageOfTotalNodeMemory; + private int heapSizePercentage = ApplicationContainerCluster.defaultHeapSizePercentageOfAvailableMemory; private Architecture adminClusterNodeResourcesArchitecture = Architecture.getDefault(); private boolean useRestrictedDataPlaneBindings = false; private Optional<CloudAccount> cloudAccount = Optional.empty(); diff --git a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java index 4b993f8e244..585e69d9141 100644 --- a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java +++ b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java @@ -160,7 +160,7 @@ public class InMemoryProvisioner implements HostProvisioner { public List<HostSpec> prepare(ClusterSpec cluster, Capacity requested, ProvisionLogger logger) { provisioned.add(cluster.id(), requested); clusters.add(cluster); - if (environment == Environment.dev) { + if (environment == Environment.dev && ! requested.isRequired()) { requested = requested.withLimits(requested.minResources().withNodes(1), requested.maxResources().withNodes(1)); } @@ -233,13 +233,8 @@ public class InMemoryProvisioner implements HostProvisioner { // Minimal capacity policies private NodeResources decideResources(NodeResources resources) { - if (resources.vcpuIsUnspecified()) - resources = resources.withVcpu(defaultNodeResources.vcpu()); - if (resources.memoryGbIsUnspecified()) - resources = resources.withMemoryGb(defaultNodeResources.memoryGb()); - if (resources.diskGbIsUnspecified()) - resources = resources.withDiskGb(defaultNodeResources.diskGb()); - return resources; + if (defaultNodeResources.isUnspecified()) return resources; + return resources.withUnspecifiedNumbersFrom(defaultNodeResources); } private List<HostSpec> allocateHostGroup(ClusterSpec clusterGroup, NodeResources requestedResourcesOrUnspecified, diff --git a/config-model/src/main/java/com/yahoo/vespa/model/Host.java b/config-model/src/main/java/com/yahoo/vespa/model/Host.java index 047a6ef9bd5..581f20cbfe9 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/Host.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/Host.java @@ -8,13 +8,17 @@ import com.yahoo.config.model.producer.TreeConfigProducer; import java.util.Objects; /** - * A physical host, running a set of services. + * A node with an identity, with some dedicated compute resources, running a set of services. * The identity of a host is its hostname. Hosts are comparable on their host name. * * @author gjoranv */ public final class Host extends TreeConfigProducer<AnyConfigProducer> implements SentinelConfig.Producer, Comparable<Host> { + // Memory needed for auxiliary processes always running on the node (config-proxy, metrics-proxy). + // Keep in sync with node-repository/ClusterModel. + public static final double memoryOverheadGb = 0.7; + private ConfigSentinel configSentinel = null; private final String hostname; private final boolean runsConfigServer; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java index 9e867a4c3bc..28ff8dff620 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java @@ -25,6 +25,7 @@ import com.yahoo.config.model.api.ValidationParameters; import com.yahoo.config.model.application.provider.ApplicationPackageXmlFilesValidator; import com.yahoo.config.model.builder.xml.ConfigModelBuilder; import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.config.provision.QuotaExceededException; import com.yahoo.config.provision.TransientException; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.config.VespaVersion; @@ -222,7 +223,7 @@ public class VespaModelFactory implements ModelFactory { Exceptions.toMessageString(e)); else rethrowUnlessIgnoreErrors(e, validationParameters.ignoreValidationErrors()); - } catch (IllegalArgumentException | TransientException e) { + } catch (IllegalArgumentException | TransientException | QuotaExceededException e) { rethrowUnlessIgnoreErrors(e, validationParameters.ignoreValidationErrors()); } catch (Exception e) { throw new RuntimeException(e); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java index 362bc7b0964..8a2bae364a1 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java @@ -131,6 +131,35 @@ public class VespaMetricSet { addMetric(metrics, ConfigServerMetrics.ZK_CONNECTIONS.last()); addMetric(metrics, ConfigServerMetrics.ZK_OUTSTANDING_REQUESTS.last()); + // Node repository metrics + addMetric(metrics, ConfigServerMetrics.NODES_NON_ACTIVE_FRACTION.last()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_COST.last()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.last()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.last()); + addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.last()); + addMetric(metrics, ConfigServerMetrics.WANT_TO_REBOOT.max()); + addMetric(metrics, ConfigServerMetrics.WANT_TO_RESTART.max()); + addMetric(metrics, ConfigServerMetrics.RETIRED.max()); + addMetric(metrics, ConfigServerMetrics.WANT_TO_CHANGE_VESPA_VERSION.max()); + addMetric(metrics, ConfigServerMetrics.HAS_WIRE_GUARD_KEY.last()); + addMetric(metrics, ConfigServerMetrics.WANT_TO_DEPROVISION.max()); + addMetric(metrics, ConfigServerMetrics.SUSPENDED.max()); + addMetric(metrics, ConfigServerMetrics.SOME_SERVICES_DOWN.max()); + addMetric(metrics, ConfigServerMetrics.NODE_FAILER_BAD_NODE.last()); + addMetric(metrics, ConfigServerMetrics.LOCK_ATTEMPT_LOCKED_LOAD, EnumSet.of(max,average)); + + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_CPU.average()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_MEM.average()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_DISK.average()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_CPU.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_MEM.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_DISK.max()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_CPU, EnumSet.of(max,average)); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_DISK, EnumSet.of(max,average)); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_MEM, EnumSet.of(max,average)); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_SKEW.last()); + addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PENDING_REDEPLOYMENTS.last()); + return metrics; } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/AccessControlFilterExcludeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/AccessControlFilterExcludeValidator.java index ef695770987..5735a632085 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/AccessControlFilterExcludeValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/AccessControlFilterExcludeValidator.java @@ -34,7 +34,7 @@ public class AccessControlFilterExcludeValidator extends Validator { private void verifyNoExclusions(String clusterId, AccessControl accessControl, DeployState deployState) { if (!accessControl.excludedBindings().isEmpty()) { String message = "Application cluster %s excludes paths from access control, this is not allowed and should be removed.".formatted(clusterId); - if (deployState.zone().cloud().name() == CloudName.AWS) { + if (deployState.zone().cloud().name().equals(CloudName.AWS)) { throw new IllegalArgumentException(message); } else { deployState.getDeployLogger().log(Level.WARNING, message); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java index 66da43856b1..eccb6910866 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java @@ -28,11 +28,14 @@ public class ConstantTensorJsonValidator { private static final String FIELD_CELLS = "cells"; private static final String FIELD_ADDRESS = "address"; private static final String FIELD_VALUE = "value"; + private static final String FIELD_VALUES = "values"; private static final JsonFactory jsonFactory = new JsonFactory(); private JsonParser parser; private Map<String, TensorType.Dimension> tensorDimensions; + private boolean isSingleDenseType = false; + private boolean isSingleMappedType = false; public void validate(String fileName, TensorType type, Reader tensorData) { if (fileName.endsWith(".json")) { @@ -57,19 +60,69 @@ public class ConstantTensorJsonValidator { .dimensions() .stream() .collect(Collectors.toMap(TensorType.Dimension::name, Function.identity())); + if (type.dimensions().size() == 1) { + this.isSingleMappedType = (type.indexedSubtype() == TensorType.empty); + this.isSingleDenseType = (type.mappedSubtype() == TensorType.empty); + } + var top = parser.nextToken(); + if (top == JsonToken.START_ARRAY) { + consumeValuesArray(); + } else if (top == JsonToken.START_OBJECT) { + consumeTopObject(); + } + }); + } - assertNextTokenIs(JsonToken.START_OBJECT); - assertNextTokenIs(JsonToken.FIELD_NAME); - assertFieldNameIs(FIELD_CELLS); + private void consumeValuesArray() throws IOException { + if (! isSingleDenseType) { + throw new InvalidConstantTensorException(parser, String.format("Field 'values' is only valid for simple vectors (1-d dense tensors")); + } + assertCurrentTokenIs(JsonToken.START_ARRAY); + while (parser.nextToken() != JsonToken.END_ARRAY) { + validateNumeric(parser.getCurrentToken()); + } + } + private void consumeTopObject() throws IOException { + assertCurrentTokenIs(JsonToken.START_OBJECT); + assertNextTokenIs(JsonToken.FIELD_NAME); + String fieldName = parser.getCurrentName(); + if (fieldName.equals(FIELD_VALUES)) { assertNextTokenIs(JsonToken.START_ARRAY); + consumeValuesArray(); + } else if (fieldName.equals(FIELD_CELLS)) { + consumeCellsField(); + } else { + throw new InvalidConstantTensorException(parser, String.format("Expected 'cells' or 'values', got '%s'", fieldName)); + } + assertNextTokenIs(JsonToken.END_OBJECT); + } - while (parser.nextToken() != JsonToken.END_ARRAY) { - validateTensorCell(); - } + private void consumeCellsField() throws IOException { + var token = parser.nextToken(); + if (token == JsonToken.START_ARRAY) { + consumeLiteralFormArray(); + } else if (token == JsonToken.START_OBJECT) { + consumeSimpleMappedObject(); + } else { + throw new InvalidConstantTensorException(parser, String.format("Field 'cells' must be object or array, but got %s", token.toString())); + } + } - assertNextTokenIs(JsonToken.END_OBJECT); - }); + private void consumeLiteralFormArray() throws IOException { + while (parser.nextToken() != JsonToken.END_ARRAY) { + validateTensorCell(); + } + } + + private void consumeSimpleMappedObject() throws IOException { + if (! isSingleMappedType) { + throw new InvalidConstantTensorException(parser, String.format("Field 'cells' must be an array of address/value objects")); + } + while (parser.nextToken() != JsonToken.END_OBJECT) { + assertCurrentTokenIs(JsonToken.FIELD_NAME); + validateTensorCellValue(); + } } private void validateTensorCell() { @@ -87,7 +140,7 @@ public class ConstantTensorJsonValidator { if (fieldName.equals(FIELD_ADDRESS)) { validateTensorAddress(); } else if (fieldName.equals(FIELD_VALUE)) { - validateTensorValue(); + validateTensorCellValue(); } } else { throw new InvalidConstantTensorException(parser, "Only 'address' or 'value' fields are permitted within a cell object"); @@ -169,9 +222,12 @@ public class ConstantTensorJsonValidator { throw new InvalidConstantTensorException(parser, String.format("Index '%s' for dimension '%s' is not an integer", value, dimensionName)); } - private void validateTensorValue() throws IOException { + private void validateTensorCellValue() throws IOException { JsonToken token = parser.nextToken(); + validateNumeric(token); + } + private void validateNumeric(JsonToken token) throws IOException { if (token != JsonToken.VALUE_NUMBER_FLOAT && token != JsonToken.VALUE_NUMBER_INT) { throw new InvalidConstantTensorException(parser, String.format("Tensor value is not a number (%s)", token.toString())); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java index 4ea74147aaf..f0c29c74705 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java @@ -6,7 +6,9 @@ import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.QuotaExceededException; import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.Zone; import com.yahoo.vespa.model.VespaModel; import java.math.BigDecimal; @@ -31,11 +33,10 @@ public class QuotaValidator extends Validator { public void validate(VespaModel model, DeployState deployState) { var quota = deployState.getProperties().quota(); quota.maxClusterSize().ifPresent(maxClusterSize -> validateMaxClusterSize(maxClusterSize, model)); - quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, model, deployState.getProperties().zone().system())); + quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, model, deployState.getProperties().zone())); } - private void validateBudget(BigDecimal budget, VespaModel model, SystemName systemName) { - + private void validateBudget(BigDecimal budget, VespaModel model, Zone zone) { var maxSpend = model.allClusters().stream() .filter(id -> !adminClusterIds(model).contains(id)) .map(id -> model.provisioned().all().getOrDefault(id, zeroCapacity)) @@ -52,9 +53,10 @@ public class QuotaValidator extends Validator { return; } - throwIfBudgetNegative(actualSpend, budget, systemName); - throwIfBudgetExceeded(actualSpend, budget, systemName); - throwIfBudgetExceeded(maxSpend, budget, systemName); + throwIfBudgetNegative(actualSpend, budget, zone.system()); + throwIfBudgetExceeded(actualSpend, budget, zone.system(), true); + if ( ! zone.environment().isTest()) // Usage is constant after deploy in test zones + throwIfBudgetExceeded(maxSpend, budget, zone.system(), false); } private Set<ClusterSpec.Id> adminClusterIds(VespaModel model) { @@ -80,24 +82,28 @@ public class QuotaValidator extends Validator { if (!invalidClusters.isEmpty()) { var clusterNames = String.join(", ", invalidClusters); - throw new IllegalArgumentException("Clusters " + clusterNames + " exceeded max cluster size of " + maxClusterSize); + throw new QuotaExceededException("Clusters " + clusterNames + " exceeded max cluster size of " + maxClusterSize); } } private static void throwIfBudgetNegative(double spend, BigDecimal budget, SystemName systemName) { if (budget.doubleValue() < 0) { - throw new IllegalArgumentException(quotaMessage("Please free up some capacity.", systemName, spend, budget)); + throw new QuotaExceededException(quotaMessage("Please free up some capacity.", systemName, spend, budget, true)); } } - private static void throwIfBudgetExceeded(double spend, BigDecimal budget, SystemName systemName) { + private static void throwIfBudgetExceeded(double spend, BigDecimal budget, SystemName systemName, boolean actual) { if (budget.doubleValue() < spend) { - throw new IllegalArgumentException(quotaMessage("Contact support to upgrade your plan.", systemName, spend, budget)); + throw new QuotaExceededException(quotaMessage("Contact support to upgrade your plan.", systemName, spend, budget, actual)); } } - private static String quotaMessage(String message, SystemName system, double spend, BigDecimal budget) { - String quotaDescription = String.format(Locale.ENGLISH, "The max resources specified cost $%.2f but your quota is $%.2f", spend, budget); + private static String quotaMessage(String message, SystemName system, double spend, BigDecimal budget, boolean actual) { + String quotaDescription = String.format(Locale.ENGLISH, + "The %s cost $%.2f but your quota is $%.2f", + actual ? "resources used" : "max resources specified", + spend, + budget); return (system == SystemName.Public ? "" : system.value() + ": ") + quotaDescription + ": " + message; } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java index c57122e5bf5..d0e1ede2cfa 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java @@ -3,12 +3,14 @@ package com.yahoo.vespa.model.builder.xml.dom; import com.yahoo.component.ComponentId; import com.yahoo.config.model.deploy.DeployState; -import com.yahoo.container.bundle.BundleInstantiationSpecification; -import com.yahoo.osgi.provider.model.ComponentModel; import com.yahoo.config.model.producer.AnyConfigProducer; import com.yahoo.config.model.producer.TreeConfigProducer; +import com.yahoo.osgi.provider.model.ComponentModel; import com.yahoo.text.XML; +import com.yahoo.vespa.model.container.component.BertEmbedder; import com.yahoo.vespa.model.container.component.Component; +import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; +import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; import com.yahoo.vespa.model.container.xml.BundleInstantiationSpecificationBuilder; import org.w3c.dom.Element; @@ -31,17 +33,25 @@ public class DomComponentBuilder extends VespaDomBuilder.DomConfigProducerBuilde } @Override - protected Component doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) { - Component component = buildComponent(spec); + protected Component<? super Component<?, ?>, ?> doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) { + var component = buildComponent(spec, deployState); addChildren(deployState, ancestor, spec, component); return component; } - private Component buildComponent(Element spec) { - BundleInstantiationSpecification bundleSpec = - BundleInstantiationSpecificationBuilder.build(spec).nestInNamespace(namespace); - - return new Component<Component<?, ?>, ComponentModel>(new ComponentModel(bundleSpec)); + private Component<? super Component<?, ?>, ?> buildComponent(Element spec, DeployState state) { + if (spec.hasAttribute("type")) { + var type = spec.getAttribute("type"); + return switch (type) { + case "hugging-face-embedder" -> new HuggingFaceEmbedder(spec, state); + case "hugging-face-tokenizer" -> new HuggingFaceTokenizer(spec, state); + case "bert-embedder" -> new BertEmbedder(spec, state); + default -> throw new IllegalArgumentException("Unknown component type '%s'".formatted(type)); + }; + } else { + var bundleSpec = BundleInstantiationSpecificationBuilder.build(spec).nestInNamespace(namespace); + return new Component<>(new ComponentModel(bundleSpec)); + } } public static void addChildren(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element componentNode, Component<? super Component<?, ?>, ?> component) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java index d9ef5fd2123..64592e75c41 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java @@ -66,10 +66,22 @@ public class DomSearchTuningBuilder extends VespaDomBuilder.DomConfigProducerBui handleFeeding(e, t.searchNode); } else if (equals("removed-db", e)) { handleRemovedDB(e, t.searchNode); + } else if (equals("lidspace", e)) { + handleLidSpace(e, t.searchNode); } } } + private void handleLidSpace(Element spec, Tuning.SearchNode t) { + t.lidSpace = new Tuning.SearchNode.LidSpace(); + for (Element e : XML.getChildren(spec)) { + if (equals("max-bloat-factor", e)) { + t.lidSpace.bloatFactor = asDouble(e); + } + } + + } + private void handleRequestThreads(Element spec, Tuning.SearchNode sn) { sn.threads = new Tuning.SearchNode.RequestThreads(); Tuning.SearchNode.RequestThreads rt = sn.threads; @@ -180,9 +192,9 @@ public class DomSearchTuningBuilder extends VespaDomBuilder.DomConfigProducerBui Tuning.SearchNode.Index.Warmup warmup = sn.index.warmup; for (Element e2 : XML.getChildren(e)) { if (equals("time", e2)) { - warmup.time = Double.valueOf(asString(e2)); + warmup.time = asDouble(e2); } else if (equals("unpack", e2)) { - warmup.unpack = Boolean.valueOf(asString(e2)); + warmup.unpack = Boolean.parseBoolean(asString(e2)); } } } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java index 6977a5ca465..3c1c4867f13 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java @@ -31,6 +31,7 @@ import com.yahoo.vespa.config.search.core.OnnxModelsConfig; import com.yahoo.vespa.config.search.core.RankingConstantsConfig; import com.yahoo.vespa.config.search.core.RankingExpressionsConfig; import com.yahoo.vespa.model.AbstractService; +import com.yahoo.vespa.model.Host; import com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyContainer; import com.yahoo.vespa.model.container.component.BindingPattern; import com.yahoo.vespa.model.container.component.Component; @@ -75,8 +76,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private static final BindingPattern PROMETHEUS_V1_HANDLER_BINDING_1 = SystemBindingPattern.fromHttpPath(PrometheusV1Handler.V1_PATH); private static final BindingPattern PROMETHEUS_V1_HANDLER_BINDING_2 = SystemBindingPattern.fromHttpPath(PrometheusV1Handler.V1_PATH + "/*"); - public static final int defaultHeapSizePercentageOfTotalNodeMemory = 70; - public static final int heapSizePercentageOfTotalNodeMemoryWhenCombinedCluster = 18; + public static final int defaultHeapSizePercentageOfAvailableMemory = 85; + public static final int heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster = 24; private final Set<FileReference> applicationBundles = new LinkedHashSet<>(); @@ -91,7 +92,9 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat private int zookeeperSessionTimeoutSeconds = 30; private final int transport_events_before_wakeup; private final int transport_connections_per_target; - private final int heapSizePercentageOfTotalNodeMemory; + + /** The heap size % of total memory available to the JVM process. */ + private final int heapSizePercentageOfAvailableMemory; private Integer memoryPercentage = null; @@ -119,9 +122,9 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat addTestrunnerComponentsIfTester(deployState); transport_connections_per_target = deployState.featureFlags().mbusJavaRpcNumTargets(); transport_events_before_wakeup = deployState.featureFlags().mbusJavaEventsBeforeWakeup(); - heapSizePercentageOfTotalNodeMemory = deployState.featureFlags().heapSizePercentage() > 0 + heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0 ? Math.min(99, deployState.featureFlags().heapSizePercentage()) - : defaultHeapSizePercentageOfTotalNodeMemory; + : defaultHeapSizePercentageOfAvailableMemory; } @Override @@ -178,12 +181,18 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat @Override public Optional<Integer> getMemoryPercentage() { - if (memoryPercentage != null) { - return Optional.of(memoryPercentage); - } else if (isHostedVespa()) { - return getHostClusterId().isPresent() ? - Optional.of(heapSizePercentageOfTotalNodeMemoryWhenCombinedCluster) : - Optional.of(heapSizePercentageOfTotalNodeMemory); + if (memoryPercentage != null) return Optional.of(memoryPercentage); + + if (isHostedVespa()) { + int availableMemoryPercentage = getHostClusterId().isPresent() ? + heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster : + heapSizePercentageOfAvailableMemory; + if (getContainers().isEmpty()) return Optional.of(availableMemoryPercentage); // Node memory is not known + + // Node memory is known so convert available memory percentage to node memory percentage + double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb(); + double availableMemory = totalMemory - Host.memoryOverheadGb; + return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage)); } return Optional.empty(); } @@ -289,9 +298,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat .compressedClassSpaceSize(0) .minHeapsize(1536) .heapsize(1536); - if (getMemoryPercentage().isPresent()) { - builder.jvm.heapSizeAsPercentageOfPhysicalMemory(getMemoryPercentage().get()); - } + getMemoryPercentage().ifPresent(percentage -> builder.jvm.heapSizeAsPercentageOfPhysicalMemory(percentage)); } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java index 34c565871db..c227700733e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java @@ -26,6 +26,7 @@ public class ContainerModelEvaluation implements OnnxModelsConfig.Producer, RankingExpressionsConfig.Producer { + public final static String LINGUISTICS_BUNDLE_NAME = "linguistics-components"; public final static String EVALUATION_BUNDLE_NAME = "model-evaluation"; public final static String INTEGRATION_BUNDLE_NAME = "model-integration"; public final static String ONNXRUNTIME_BUNDLE_NAME = "container-onnxruntime.jar"; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java index 19df9a4064f..dbc7cd62fbd 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java @@ -12,6 +12,7 @@ import java.util.stream.Stream; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.EVALUATION_BUNDLE_NAME; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; +import static com.yahoo.vespa.model.container.ContainerModelEvaluation.LINGUISTICS_BUNDLE_NAME; import static com.yahoo.vespa.model.container.ContainerModelEvaluation.ONNXRUNTIME_BUNDLE_NAME; /** @@ -57,7 +58,7 @@ public class PlatformBundles { public static final Set<Path> SEARCH_AND_DOCPROC_BUNDLES = toBundlePaths( SEARCH_AND_DOCPROC_BUNDLE, "docprocs", - "linguistics-components", + LINGUISTICS_BUNDLE_NAME, EVALUATION_BUNDLE_NAME, INTEGRATION_BUNDLE_NAME, ONNXRUNTIME_BUNDLE_NAME diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java new file mode 100644 index 00000000000..56aa974da48 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java @@ -0,0 +1,70 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.container.component; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.embedding.BertBaseEmbedderConfig; +import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import org.w3c.dom.Element; + +import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChildValue; +import static com.yahoo.text.XML.getChild; +import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; + +/** + * @author bjorncs + */ +public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConfig.Producer { + + private final ModelReference model; + private final ModelReference vocab; + private final Integer maxTokens; + private final String transformerInputIds; + private final String transformerAttentionMask; + private final String transformerTokenTypeIds; + private final String transformerOutput; + private final Integer tranformerStartSequenceToken; + private final Integer transformerEndSequenceToken; + private final String poolingStrategy; + private final String onnxExecutionMode; + private final Integer onnxInteropThreads; + private final Integer onnxIntraopThreads; + private final Integer onnxGpuDevice; + + + public BertEmbedder(Element xml, DeployState state) { + super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml); + model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state); + vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state); + maxTokens = getOptionalChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null); + transformerInputIds = getOptionalChildValue(xml, "transformer-input-ids").orElse(null); + transformerAttentionMask = getOptionalChildValue(xml, "transformer-attention-mask").orElse(null); + transformerTokenTypeIds = getOptionalChildValue(xml, "transformer-token-type-ids").orElse(null); + transformerOutput = getOptionalChildValue(xml, "transformer-output").orElse(null); + tranformerStartSequenceToken = getOptionalChildValue(xml, "transformer-start-sequence-token").map(Integer::parseInt).orElse(null); + transformerEndSequenceToken = getOptionalChildValue(xml, "transformer-end-sequence-token").map(Integer::parseInt).orElse(null); + poolingStrategy = getOptionalChildValue(xml, "pooling-strategy").orElse(null); + onnxExecutionMode = getOptionalChildValue(xml, "onnx-execution-mode").orElse(null); + onnxInteropThreads = getOptionalChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); + onnxIntraopThreads = getOptionalChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); + onnxGpuDevice = getOptionalChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); + } + + @Override + public void getConfig(BertBaseEmbedderConfig.Builder b) { + b.transformerModel(model).tokenizerVocab(vocab); + if (maxTokens != null) b.transformerMaxTokens(maxTokens); + if (transformerInputIds != null) b.transformerInputIds(transformerInputIds); + if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask); + if (transformerTokenTypeIds != null) b.transformerTokenTypeIds(transformerTokenTypeIds); + if (transformerOutput != null) b.transformerOutput(transformerOutput); + if (tranformerStartSequenceToken != null) b.transformerStartSequenceToken(tranformerStartSequenceToken); + if (transformerEndSequenceToken != null) b.transformerEndSequenceToken(transformerEndSequenceToken); + if (poolingStrategy != null) b.poolingStrategy(BertBaseEmbedderConfig.PoolingStrategy.Enum.valueOf(poolingStrategy)); + if (onnxExecutionMode != null) b.onnxExecutionMode(BertBaseEmbedderConfig.OnnxExecutionMode.Enum.valueOf(onnxExecutionMode)); + if (onnxInteropThreads != null) b.onnxInterOpThreads(onnxInteropThreads); + if (onnxIntraopThreads != null) b.onnxIntraOpThreads(onnxIntraopThreads); + if (onnxGpuDevice != null) b.onnxGpuDevice(onnxGpuDevice); + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java new file mode 100644 index 00000000000..6e7a1cc31dd --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java @@ -0,0 +1,81 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.container.component; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; +import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import org.w3c.dom.Element; + +import java.util.Optional; + +import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChild; +import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChildValue; +import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME; + + +/** + * @author bjorncs + */ +public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEmbedderConfig.Producer { + private final ModelReference model; + private final ModelReference vocab; + private final Integer maxTokens; + private final String transformerInputIds; + private final String transformerAttentionMask; + private final String transformerTokenTypeIds; + private final String transformerOutput; + private final Boolean normalize; + private final String onnxExecutionMode; + private final Integer onnxInteropThreads; + private final Integer onnxIntraopThreads; + private final Integer onnxGpuDevice; + private final String poolingStrategy; + + public HuggingFaceEmbedder(Element xml, DeployState state) { + super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml); + var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow(); + model = ModelIdResolver.resolveToModelReference(transformerModelElem, state); + vocab = getOptionalChild(xml, "tokenizer-model") + .map(elem -> ModelIdResolver.resolveToModelReference(elem, state)) + .orElseGet(() -> resolveDefaultVocab(transformerModelElem, state)); + maxTokens = getOptionalChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null); + transformerInputIds = getOptionalChildValue(xml, "transformer-input-ids").orElse(null); + transformerAttentionMask = getOptionalChildValue(xml, "transformer-attention-mask").orElse(null); + transformerTokenTypeIds = getOptionalChildValue(xml, "transformer-token-type-ids").orElse(null); + transformerOutput = getOptionalChildValue(xml, "transformer-output").orElse(null); + normalize = getOptionalChildValue(xml, "normalize").map(Boolean::parseBoolean).orElse(null); + onnxExecutionMode = getOptionalChildValue(xml, "onnx-execution-mode").orElse(null); + onnxInteropThreads = getOptionalChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null); + onnxIntraopThreads = getOptionalChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null); + onnxGpuDevice = getOptionalChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null); + poolingStrategy = getOptionalChildValue(xml, "pooling-strategy").orElse(null); + } + + private static ModelReference resolveDefaultVocab(Element model, DeployState state) { + if (state.isHosted() && model.hasAttribute("model-id")) { + var implicitVocabId = model.getAttribute("model-id") + "-vocab"; + return ModelIdResolver.resolveToModelReference( + "tokenizer-model", Optional.of(implicitVocabId), Optional.empty(), Optional.empty(), state); + } + throw new IllegalArgumentException("'tokenizer-model' must be specified"); + } + + @Override + public void getConfig(HuggingFaceEmbedderConfig.Builder b) { + b.transformerModel(model).tokenizerPath(vocab); + if (maxTokens != null) b.transformerMaxTokens(maxTokens); + if (transformerInputIds != null) b.transformerInputIds(transformerInputIds); + if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask); + if (transformerTokenTypeIds != null) b.transformerTokenTypeIds(transformerTokenTypeIds); + if (transformerOutput != null) b.transformerOutput(transformerOutput); + if (normalize != null) b.normalize(normalize); + if (onnxExecutionMode != null) b.transformerExecutionMode( + HuggingFaceEmbedderConfig.TransformerExecutionMode.Enum.valueOf(onnxExecutionMode)); + if (onnxInteropThreads != null) b.transformerInterOpThreads(onnxInteropThreads); + if (onnxIntraopThreads != null) b.transformerIntraOpThreads(onnxIntraopThreads); + if (onnxGpuDevice != null) b.transformerGpuDevice(onnxGpuDevice); + if (poolingStrategy != null) b.poolingStrategy(HuggingFaceEmbedderConfig.PoolingStrategy.Enum.valueOf(poolingStrategy)); + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java new file mode 100644 index 00000000000..966dbe8260a --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java @@ -0,0 +1,47 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.model.container.component; + +import com.yahoo.config.ModelReference; +import com.yahoo.config.model.deploy.DeployState; +import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig; +import com.yahoo.text.XML; +import com.yahoo.vespa.model.container.xml.ModelIdResolver; +import org.w3c.dom.Element; + +import java.util.Map; +import java.util.TreeMap; + +import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChildValue; +import static com.yahoo.vespa.model.container.ContainerModelEvaluation.LINGUISTICS_BUNDLE_NAME; + +/** + * @author bjorncs + */ +public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceTokenizerConfig.Producer { + + private final Map<String, ModelReference> langToModel = new TreeMap<>(); + private final Boolean specialTokens; + private final Integer maxLength; + private final Boolean truncation; + + public HuggingFaceTokenizer(Element xml, DeployState state) { + super("com.yahoo.language.huggingface.HuggingFaceTokenizer", LINGUISTICS_BUNDLE_NAME, xml); + for (Element element : XML.getChildren(xml, "model")) { + var lang = element.hasAttribute("language") ? element.getAttribute("language") : "unknown"; + langToModel.put(lang, ModelIdResolver.resolveToModelReference(element, state)); + } + specialTokens = getOptionalChildValue(xml, "special-tokens").map(Boolean::parseBoolean).orElse(null); + maxLength = getOptionalChildValue(xml, "max-length").map(Integer::parseInt).orElse(null); + truncation = getOptionalChildValue(xml, "truncation").map(Boolean::parseBoolean).orElse(null); + } + + @Override + public void getConfig(HuggingFaceTokenizerConfig.Builder builder) { + langToModel.forEach((lang, vocab) -> { + builder.model.add(new HuggingFaceTokenizerConfig.Model.Builder().language(lang).path(vocab)); + }); + if (specialTokens != null) builder.addSpecialTokens(specialTokens); + if (maxLength != null) builder.maxLength(maxLength); + if (truncation != null) builder.truncation(truncation); + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/TypedComponent.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/TypedComponent.java new file mode 100644 index 00000000000..522c78f2f25 --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/TypedComponent.java @@ -0,0 +1,20 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +package com.yahoo.vespa.model.container.component; + +import com.yahoo.osgi.provider.model.ComponentModel; +import org.w3c.dom.Element; + +/** + * @author bjorncs + */ +abstract class TypedComponent extends SimpleComponent { + + private final Element xml; + + protected TypedComponent(String className, String bundle, Element xml) { + super(new ComponentModel(xml.getAttribute("id"), className, bundle)); + this.xml = xml; + } + +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java index 57303d6c9b3..bcebf1a9fdd 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java @@ -933,22 +933,19 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { } private static boolean applyMemoryPercentage(ApplicationContainerCluster cluster, String memoryPercentage) { - if (memoryPercentage == null || memoryPercentage.isEmpty()) return false; - memoryPercentage = memoryPercentage.trim(); - - if ( ! memoryPercentage.endsWith("%")) - throw new IllegalArgumentException("The memory percentage given for nodes in " + cluster + - " must be an integer percentage ending by the '%' sign"); - memoryPercentage = memoryPercentage.substring(0, memoryPercentage.length()-1).trim(); - try { + if (memoryPercentage == null || memoryPercentage.isEmpty()) return false; + memoryPercentage = memoryPercentage.trim(); + if ( ! memoryPercentage.endsWith("%")) + throw new IllegalArgumentException("Missing % sign"); + memoryPercentage = memoryPercentage.substring(0, memoryPercentage.length()-1).trim(); cluster.setMemoryPercentage(Integer.parseInt(memoryPercentage)); + return true; } catch (NumberFormatException e) { throw new IllegalArgumentException("The memory percentage given for nodes in " + cluster + - " must be an integer percentage ending by the '%' sign"); + " must be an integer percentage ending by the '%' sign", e); } - return true; } /** Allocate a container cluster without a nodes tag */ @@ -960,9 +957,11 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { int nodeCount = deployState.zone().environment().isProduction() ? 2 : 1; deployState.getDeployLogger().logApplicationPackage(Level.INFO, "Using " + nodeCount + " nodes in " + cluster); var nodesSpec = NodesSpecification.dedicated(nodeCount, context); + ClusterSpec.Id clusterId = ClusterSpec.Id.from(cluster.getName()); var hosts = nodesSpec.provision(hostSystem, ClusterSpec.Type.container, - ClusterSpec.Id.from(cluster.getName()), + clusterId, + zoneEndpoint(context, clusterId), deployState.getDeployLogger(), false, context.clusterInfo().build()); @@ -1192,9 +1191,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> { DeploymentSpec spec) { spec.athenzDomain() .ifPresent(domain -> { - AthenzService service = spec.instance(app.getApplicationId().instance()) - .flatMap(instanceSpec -> instanceSpec.athenzService(zone.environment(), zone.region())) - .or(spec::athenzService) + AthenzService service = spec.athenzService(app.getApplicationId().instance(), zone.environment(), zone.region()) .orElseThrow(() -> new IllegalArgumentException("Missing Athenz service configuration in instance '" + app.getApplicationId().instance() + "'")); String zoneDnsSuffix = zone.environment().value() + "-" + zone.region().value() + "." + athenzDnsSuffix; diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java index ff261d2b83a..96f653bf793 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java @@ -1,12 +1,17 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.model.container.xml; +import com.yahoo.config.ModelReference; +import com.yahoo.config.UrlReference; +import com.yahoo.config.model.builder.xml.XmlHelper; +import com.yahoo.config.model.deploy.DeployState; import com.yahoo.text.XML; import org.w3c.dom.Element; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import java.util.stream.Collectors; /** @@ -29,6 +34,19 @@ public class ModelIdResolver { models.put("flan-t5-base-decoder", "https://data.vespa.oath.cloud/onnx_models/flan-t5-base-decoder-model.onnx"); models.put("flan-t5-large-encoder", "https://data.vespa.oath.cloud/onnx_models/flan-t5-large-encoder-model.onnx"); models.put("flan-t5-large-decoder", "https://data.vespa.oath.cloud/onnx_models/flan-t5-large-decoder-model.onnx"); + + models.put("multilingual-e5-base", "https://data.vespa.oath.cloud/onnx_models/multilingual-e5-base/model.onnx"); + models.put("multilingual-e5-base-vocab", "https://data.vespa.oath.cloud/onnx_models/multilingual-e5-base/tokenizer.json"); + + models.put("e5-small-v2", "https://data.vespa.oath.cloud/onnx_models/e5-small-v2/model.onnx"); + models.put("e5-small-v2-vocab", "https://data.vespa.oath.cloud/onnx_models/e5-small-v2/tokenizer.json"); + + models.put("e5-base-v2", "https://data.vespa.oath.cloud/onnx_models/e5-base-v2/model.onnx"); + models.put("e5-base-v2-vocab", "https://data.vespa.oath.cloud/onnx_models/e5-base-v2/tokenizer.json"); + + models.put("e5-large-v2", "https://data.vespa.oath.cloud/onnx_models/e5-large-v2/model.onnx"); + models.put("e5-large-v2-vocab", "https://data.vespa.oath.cloud/onnx_models/e5-large-v2/tokenizer.json"); + return Collections.unmodifiableMap(models); } @@ -57,11 +75,36 @@ public class ModelIdResolver { value.removeAttribute("path"); } else if ( ! value.hasAttribute("url") && ! value.hasAttribute("path")) { - throw new IllegalArgumentException(value.getTagName() + " is configured with only a 'model-id'. " + - "Add a 'path' or 'url' to deploy this outside Vespa Cloud"); + throw onlyModelIdInHostedException(value.getTagName()); } } + + public static ModelReference resolveToModelReference(Element elem, DeployState state) { + return resolveToModelReference( + elem.getTagName(), XmlHelper.getOptionalAttribute(elem, "model-id"), + XmlHelper.getOptionalAttribute(elem, "url"), XmlHelper.getOptionalAttribute(elem, "path"), state); + } + + public static ModelReference resolveToModelReference( + String paramName, Optional<String> id, Optional<String> url, Optional<String> path, DeployState state) { + if (id.isEmpty()) return createModelReference(Optional.empty(), url, path, state); + else if (state.isHosted()) + return createModelReference(id, Optional.of(modelIdToUrl(paramName, id.get())), Optional.empty(), state); + else if (url.isEmpty() && path.isEmpty()) throw onlyModelIdInHostedException(paramName); + else return createModelReference(id, url, path, state); + } + + private static ModelReference createModelReference(Optional<String> id, Optional<String> url, Optional<String> path, DeployState state) { + var fileRef = path.map(p -> state.getFileRegistry().addFile(p)); + return ModelReference.unresolved(id, url.map(UrlReference::valueOf), fileRef); + } + + private static IllegalArgumentException onlyModelIdInHostedException(String paramName) { + return new IllegalArgumentException(paramName + " is configured with only a 'model-id'. " + + "Add a 'path' or 'url' to deploy this outside Vespa Cloud"); + } + private static String modelIdToUrl(String valueName, String modelId) { if ( ! providedModels.containsKey(modelId)) throw new IllegalArgumentException("Unknown model id '" + modelId + "' on '" + valueName + "'. Available models are [" + diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java b/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java index e044b97546c..43f045940c9 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java @@ -28,6 +28,7 @@ import com.yahoo.vespa.model.container.docproc.DocprocChain; import com.yahoo.vespa.model.container.docproc.DocprocChains; import com.yahoo.vespa.model.content.cluster.ContentCluster; import com.yahoo.vespa.model.search.IndexedSearchCluster; +import com.yahoo.vespa.model.search.IndexingDocproc; import com.yahoo.vespa.model.search.IndexingDocprocChain; import com.yahoo.vespa.model.search.SearchCluster; import com.yahoo.vespa.model.search.SearchNode; @@ -213,13 +214,17 @@ public class Content extends ConfigModel { /** Select/creates and initializes the indexing cluster coupled to this */ private void buildIndexingClusters(Content content, ConfigModelContext modelContext, ApplicationConfigProducerRoot root) { - if ( ! content.getCluster().getSearch().hasIndexedCluster()) return; - - IndexedSearchCluster indexedSearchCluster = content.getCluster().getSearch().getIndexed(); - if (indexedSearchCluster.hasExplicitIndexingCluster()) { - setExistingIndexingCluster(indexedSearchCluster, content.containers); + var search = content.getCluster().getSearch(); + if (!search.getIndexingDocproc().isPresent()) { + return; + } + var indexingDocproc = search.getIndexingDocproc().get(); + if (indexingDocproc.hasExplicitCluster()) { + setExistingIndexingCluster(content, indexingDocproc, content.containers); } else { - setContainerAsIndexingCluster(indexedSearchCluster, content, modelContext, root); + if (search.hasIndexedCluster()) { + setContainerAsIndexingCluster(search.getIndexed(), content, modelContext, root); + } } } @@ -237,18 +242,19 @@ public class Content extends ConfigModel { targetCluster = content.containers.iterator().next().getCluster(); addDocproc(targetCluster); - indexedSearchCluster.setIndexingClusterName(targetCluster.getName()); - addIndexingChainsTo(targetCluster, indexedSearchCluster); + var indexingDocproc = indexedSearchCluster.getIndexingDocproc(); + indexingDocproc.setClusterName(targetCluster.getName()); + addIndexingChainsTo(targetCluster, content, indexingDocproc); } } - private void setExistingIndexingCluster(IndexedSearchCluster cluster, Collection<ContainerModel> containers) { - String indexingClusterName = cluster.getIndexingClusterName(); + private void setExistingIndexingCluster(Content content, IndexingDocproc indexingDocproc, Collection<ContainerModel> containers) { + String indexingClusterName = indexingDocproc.getClusterName(content.getCluster().getName()); ContainerModel containerModel = findByName(indexingClusterName, containers); if (containerModel == null) - throw new IllegalArgumentException("Content cluster '" + cluster.getClusterName() + "' refers to docproc " + + throw new IllegalArgumentException("Content cluster '" + content.getCluster().getName() + "' refers to docproc " + "cluster '" + indexingClusterName + "', but this cluster does not exist."); - addIndexingChainsTo(containerModel.getCluster(), cluster); + addIndexingChainsTo(containerModel.getCluster(), content, indexingDocproc); } private ContainerModel findByName(String name, Collection<ContainerModel> containers) { @@ -258,19 +264,19 @@ public class Content extends ConfigModel { return null; } - private void addIndexingChainsTo(ContainerCluster<?> indexer, IndexedSearchCluster cluster) { + private void addIndexingChainsTo(ContainerCluster<?> indexer, Content content, IndexingDocproc indexingDocproc) { addIndexingChain(indexer); DocprocChain indexingChain; ComponentRegistry<DocprocChain> allChains = indexer.getDocprocChains().allChains(); - if (cluster.hasExplicitIndexingChain()) { - indexingChain = allChains.getComponent(cluster.getIndexingChainName()); + if (indexingDocproc.hasExplicitChain() && !indexingDocproc.getChainName().equals(IndexingDocprocChain.NAME)) { + indexingChain = allChains.getComponent(indexingDocproc.getChainName()); if (indexingChain == null) { - throw new IllegalArgumentException(cluster + " refers to docproc " + - "chain '" + cluster.getIndexingChainName() + + throw new IllegalArgumentException(content.getCluster() + " refers to docproc " + + "chain '" + indexingDocproc.getChainName() + "' for indexing, but this chain does not exist"); } else if (indexingChain.getId().getName().equals("default")) { - throw new IllegalArgumentException(cluster + " specifies the chain " + + throw new IllegalArgumentException(content.getCluster() + " specifies the chain " + "'default' as indexing chain. As the 'default' chain is run by default, " + "using it as the indexing chain will run it twice. " + "Use a different name for the indexing chain."); @@ -282,7 +288,7 @@ public class Content extends ConfigModel { indexingChain = allChains.getComponent(IndexingDocprocChain.NAME); } - cluster.setIndexingChain(indexingChain); + indexingDocproc.setChain(indexingChain); } private TreeConfigProducer<AnyConfigProducer> getDocProc(ApplicationConfigProducerRoot root) { @@ -301,7 +307,7 @@ public class Content extends ConfigModel { Content content, ConfigModelContext modelContext, ApplicationConfigProducerRoot root) { - String indexerName = cluster.getIndexingClusterName(); + String indexerName = cluster.getIndexingDocproc().getClusterName(content.getCluster().getName()); TreeConfigProducer<AnyConfigProducer> parent = getDocProc(root); ApplicationContainerCluster indexingCluster = new ApplicationContainerCluster(parent, "cluster." + indexerName, indexerName, modelContext.getDeployState()); ContainerModel indexingClusterModel = new ContainerModel(modelContext.withParent(parent).withId(indexingCluster.getSubId())); @@ -334,7 +340,7 @@ public class Content extends ConfigModel { indexingCluster.addContainers(nodes); addIndexingChain(indexingCluster); - cluster.setIndexingChain(indexingCluster.getDocprocChains().allChains().getComponent(IndexingDocprocChain.NAME)); + cluster.getIndexingDocproc().setChain(indexingCluster.getDocprocChains().allChains().getComponent(IndexingDocprocChain.NAME)); } private ContainerCluster<?> getContainerWithDocproc(Collection<ContainerModel> containers) { diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java index a0240d28a3c..ec7acaf819f 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.model.builder.xml.dom.ModelElement; import com.yahoo.vespa.model.builder.xml.dom.VespaDomBuilder; import com.yahoo.vespa.model.content.cluster.ContentCluster; import com.yahoo.vespa.model.search.IndexedSearchCluster; +import com.yahoo.vespa.model.search.IndexingDocproc; import com.yahoo.vespa.model.search.NodeSpec; import com.yahoo.vespa.model.search.SchemaDefinitionXMLHandler; import com.yahoo.vespa.model.search.SearchCluster; @@ -57,6 +58,7 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer> /** The single, indexed search cluster this sets up (supporting multiple document types), or null if none */ private IndexedSearchCluster indexedCluster; + private Optional<IndexingDocproc> indexingDocproc; private Redundancy redundancy; private final String clusterName; @@ -206,6 +208,7 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer> double fractionOfMemoryReserved) { super(parent, "search"); + this.indexingDocproc = Optional.empty(); this.clusterName = clusterName; this.documentDefinitions = documentDefinitions; this.globallyDistributedDocuments = globallyDistributedDocuments; @@ -259,6 +262,10 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer> throw new IllegalArgumentException("Duplicate indexed cluster '" + indexedCluster.getClusterName() + "'"); } indexedCluster = (IndexedSearchCluster)sc; + if (indexingDocproc.isPresent()) { + throw new IllegalArgumentException("Indexing docproc has previously been setup for streaming search"); + } + indexingDocproc = Optional.of(indexedCluster.getIndexingDocproc()); } clusters.put(sc.getClusterName(), sc); } @@ -458,6 +465,12 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer> public Map<String, SearchCluster> getClusters() { return clusters; } public IndexedSearchCluster getIndexed() { return indexedCluster; } public boolean hasIndexedCluster() { return indexedCluster != null; } + public Optional<IndexingDocproc> getIndexingDocproc() { return indexingDocproc; } + public void setupStreamingSearchIndexingDocProc() { + if (indexingDocproc.isEmpty()) { + indexingDocproc = Optional.of(new IndexingDocproc()); + } + } public String getClusterName() { return clusterName; } @Override diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java index 66a99e1993c..dfdfa9303a7 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java @@ -203,19 +203,24 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem if (docprocCluster != null) { docprocCluster = docprocCluster.trim(); } - if (c.getSearch().hasIndexedCluster()) { - if (docprocCluster != null && !docprocCluster.isEmpty()) { - c.getSearch().getIndexed().setIndexingClusterName(docprocCluster); - } - } - String docprocChain = e.stringAttribute("chain"); if (docprocChain != null) { docprocChain = docprocChain.trim(); } - if (c.getSearch().hasIndexedCluster()) { - if (docprocChain != null && !docprocChain.isEmpty()) { - c.getSearch().getIndexed().setIndexingChainName(docprocChain); + if (docprocCluster != null && !docprocCluster.isEmpty()) { + if (!c.getSearch().hasIndexedCluster() && !c.getSearch().getIndexingDocproc().isPresent() && + docprocChain != null && !docprocChain.isEmpty()) { + c.getSearch().setupStreamingSearchIndexingDocProc(); + } + var indexingDocproc = c.getSearch().getIndexingDocproc(); + if (indexingDocproc.isPresent()) { + indexingDocproc.get().setClusterName(docprocCluster); + } + } + if (docprocChain != null && !docprocChain.isEmpty()) { + var indexingDocproc = c.getSearch().getIndexingDocproc(); + if (indexingDocproc.isPresent()) { + indexingDocproc.get().setChainName(docprocChain); } } } @@ -451,7 +456,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem @Override public void getConfig(MessagetyperouteselectorpolicyConfig.Builder builder) { - if ( ! getSearch().hasIndexedCluster()) return; + if ( ! getSearch().getIndexingDocproc().isPresent()) return; DocumentProtocol.getConfig(builder, getConfigId()); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java b/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java index ad0312705ca..6623efb599d 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java @@ -110,7 +110,7 @@ public final class DocumentProtocol implements Protocol, for (ContentCluster cluster : Content.getContentClusters(repo)) { DocumentProtocolPoliciesConfig.Cluster.Builder clusterBuilder = new DocumentProtocolPoliciesConfig.Cluster.Builder(); addSelector(cluster.getConfigId(), cluster.getRoutingSelector(), clusterBuilder); - if (cluster.getSearch().hasIndexedCluster()) + if (cluster.getSearch().getIndexingDocproc().isPresent()) addRoutes(getDirectRouteName(cluster.getConfigId()), getIndexedRouteName(cluster.getConfigId()), clusterBuilder); else clusterBuilder.defaultRoute(cluster.getConfigId()); @@ -227,10 +227,11 @@ public final class DocumentProtocol implements Protocol, for (ContentCluster cluster : content) { RouteSpec spec = new RouteSpec(cluster.getConfigId()); - if (cluster.getSearch().hasIndexedCluster()) { + if (cluster.getSearch().getIndexingDocproc().isPresent()) { + var indexingDocproc = cluster.getSearch().getIndexingDocproc().get(); table.addRoute(spec.addHop("[MessageType:" + cluster.getConfigId() + "]")); table.addRoute(new RouteSpec(getIndexedRouteName(cluster.getConfigId())) - .addHop(cluster.getSearch().getIndexed().getIndexingServiceName()) + .addHop(indexingDocproc.getServiceName()) .addHop("[Content:cluster=" + cluster.getName() + "]")); table.addRoute(new RouteSpec(getDirectRouteName(cluster.getConfigId())) .addHop("[Content:cluster=" + cluster.getName() + "]")); diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java index 670460a9f9f..080a2ca43dc 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java @@ -43,11 +43,7 @@ public class IndexedSearchCluster extends SearchCluster DispatchNodesConfig.Producer, ConfigInstance.Producer { - private String indexingClusterName = null; // The name of the docproc cluster to run indexing, by config. - private String indexingChainName = null; - - private DocprocChain indexingChain; // The actual docproc chain indexing for this. - + private IndexingDocproc indexingDocproc; private Tuning tuning; private SearchCoverage searchCoverage; @@ -77,6 +73,7 @@ public class IndexedSearchCluster extends SearchCluster public IndexedSearchCluster(TreeConfigProducer<AnyConfigProducer> parent, String clusterName, int index, ModelContext.FeatureFlags featureFlags) { super(parent, clusterName, index); + indexingDocproc = new IndexingDocproc(); documentDbsConfigProducer = new MultipleDocumentDatabasesConfigProducer(this, documentDbs); rootDispatch = new DispatchGroup(this); defaultDispatchPolicy = DispatchTuning.Builder.toDispatchPolicy(featureFlags.queryDispatchPolicy()); @@ -87,58 +84,7 @@ public class IndexedSearchCluster extends SearchCluster @Override protected IndexingMode getIndexingMode() { return IndexingMode.REALTIME; } - public final boolean hasExplicitIndexingCluster() { - return indexingClusterName != null; - } - - public final boolean hasExplicitIndexingChain() { - return indexingChainName != null; - } - - /** - * Returns the name of the docproc cluster running indexing for this search cluster. This is derived from the - * services file on initialization, this can NOT be used at runtime to determine indexing chain. When initialization - * is done, the {@link #getIndexingServiceName()} method holds the actual indexing docproc chain object. - * - * @return the name of the docproc cluster associated with this - */ - public String getIndexingClusterName() { - return hasExplicitIndexingCluster() ? indexingClusterName : getClusterName() + ".indexing"; - } - - public String getIndexingChainName() { - return indexingChainName; - } - - public void setIndexingChainName(String indexingChainName) { - this.indexingChainName = indexingChainName; - } - - /** - * Sets the name of the docproc cluster running indexing for this search cluster. This is for initial configuration, - * and will not reflect the actual indexing chain. See {@link #getIndexingClusterName} for more detail. - * - * @param name the name of the docproc cluster associated with this - */ - public void setIndexingClusterName(String name) { - indexingClusterName = name; - } - - public String getIndexingServiceName() { - return indexingChain.getServiceName(); - } - - /** - * Sets the docproc chain that will be running indexing for this search cluster. This is set by the - * {@link com.yahoo.vespa.model.content.Content} model during build. - * - * @param chain the chain that is to run indexing for this cluster - * @return this, to allow chaining - */ - public SearchCluster setIndexingChain(DocprocChain chain) { - indexingChain = chain; - return this; - } + public IndexingDocproc getIndexingDocproc() { return indexingDocproc; } public DispatchGroup getRootDispatch() { return rootDispatch; } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java new file mode 100644 index 00000000000..46f3e6f459d --- /dev/null +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java @@ -0,0 +1,68 @@ +package com.yahoo.vespa.model.search; + +import com.yahoo.vespa.model.container.docproc.DocprocChain; + +/** + * Utility class to track configuration for which indexing docproc to use by a search cluster. + */ +public class IndexingDocproc { + private String clusterName; // The name of the docproc cluster to run indexing, by config. + private String chainName; + + private DocprocChain chain; // The actual docproc chain indexing for this. + + public boolean hasExplicitCluster() { + return clusterName != null; + } + + public boolean hasExplicitChain() { + return chainName != null; + } + + /** + * Returns the name of the docproc cluster running indexing for this search cluster. This is derived from the + * services file on initialization, this can NOT be used at runtime to determine indexing chain. When initialization + * is done, the {@link #getServiceName()} method holds the actual indexing docproc chain object. + * + * @return the name of the docproc cluster associated with this + */ + public String getClusterName(String searchClusterName) { + return hasExplicitCluster() ? clusterName : searchClusterName + ".indexing"; + } + + public String getChainName() { + return chainName; + } + + public void setChainName(String name) { + chainName = name; + } + + /** + * Sets the name of the docproc cluster running indexing for this search cluster. This is for initial configuration, + * and will not reflect the actual indexing chain. See {@link #getClusterName} for more detail. + * + * @param name the name of the docproc cluster associated with this + */ + public void setClusterName(String name) { + clusterName = name; + } + + public String getServiceName() { + return chain.getServiceName(); + } + + /** + * Sets the docproc chain that will be running indexing for this search cluster. This is set by the + * {@link com.yahoo.vespa.model.content.Content} model during build. + * + * @param chain the chain that is to run indexing for this cluster + */ + public void setChain(DocprocChain chain) { this.chain = chain; } + + public IndexingDocproc() { + clusterName = null; + chainName = null; + chain = null; + } +} diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java b/config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java index 5b747b93268..1ad99404823 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.model.search; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.config.search.core.ProtonConfig; +import com.yahoo.vespa.model.Host; import static java.lang.Long.min; import static java.lang.Long.max; @@ -27,9 +28,6 @@ public class NodeResourcesTuning implements ProtonConfig.Producer { private final int threadsPerSearch; private final double fractionOfMemoryReserved; - // "Reserve" 0.5GB of memory for other processes running on the content node (config-proxy, metrics-proxy). - public static final double reservedMemoryGb = 0.7; - public NodeResourcesTuning(NodeResources resources, int threadsPerSearch, double fractionOfMemoryReserved) { @@ -128,7 +126,7 @@ public class NodeResourcesTuning implements ProtonConfig.Producer { /** Returns the memory we can expect will be available for the content node processes */ private double usableMemoryGb() { - double usableMemoryGb = resources.memoryGb() - reservedMemoryGb; + double usableMemoryGb = resources.memoryGb() - Host.memoryOverheadGb; return usableMemoryGb * (1 - fractionOfMemoryReserved); } diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java b/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java index a29b7b90b44..93e3a6e7a19 100644 --- a/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java +++ b/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java @@ -53,6 +53,16 @@ public class Tuning extends AnyConfigProducer implements ProtonConfig.Producer { } } + public static class LidSpace implements ProtonConfig.Producer { + public Double bloatFactor = null; + + @Override + public void getConfig(ProtonConfig.Builder builder) { + if (bloatFactor != null) builder.lidspacecompaction.allowedlidbloatfactor(bloatFactor); + } + + } + public static class RemovedDB implements ProtonConfig.Producer { public static class Prune implements ProtonConfig.Producer { @@ -371,6 +381,7 @@ public class Tuning extends AnyConfigProducer implements ProtonConfig.Producer { } public RequestThreads threads = null; + public LidSpace lidSpace = null; public FlushStrategy strategy = null; public Resizing resizing = null; public Index index = null; @@ -383,6 +394,7 @@ public class Tuning extends AnyConfigProducer implements ProtonConfig.Producer { @Override public void getConfig(ProtonConfig.Builder builder) { if (threads != null) threads.getConfig(builder); + if (lidSpace != null) lidSpace.getConfig(builder); if (strategy != null) strategy.getConfig(builder); if (resizing != null) resizing.getConfig(builder); if (index != null) index.getConfig(builder); diff --git a/config-model/src/main/resources/schema/common.rnc b/config-model/src/main/resources/schema/common.rnc index 21f3399a027..061e54740f1 100644 --- a/config-model/src/main/resources/schema/common.rnc +++ b/config-model/src/main/resources/schema/common.rnc @@ -53,6 +53,11 @@ GenericConfig = element config { anyElement* } +ModelReference = + attribute model-id { xsd:string }? & + attribute path { xsd:string }? & + attribute url { xsd:string }? + ComponentSpec = ( attribute id { xsd:Name | JavaId } | attribute idref { xsd:Name } | attribute ident { xsd:Name } ) @@ -64,7 +69,7 @@ BundleSpec = attribute bundle { xsd:Name }? Component = element component { - ComponentDefinition + (ComponentDefinition | TypedComponentDefinition) } ComponentDefinition = @@ -72,3 +77,51 @@ ComponentDefinition = BundleSpec & GenericConfig* & Component* + +TypedComponentDefinition = + attribute id { xsd:Name } & + (HuggingFaceEmbedder | HuggingFaceTokenizer | BertBaseEmbedder) & + GenericConfig* & + Component* + +HuggingFaceEmbedder = + attribute type { "hugging-face-embedder" } & + element transformer-model { ModelReference } & + element tokenizer-model { ModelReference }? & + element max-tokens { xsd:nonNegativeInteger }? & + element transformer-input-ids { xsd:string }? & + element transformer-attention-mask { xsd:string }? & + element transformer-token-type-ids { xsd:string }? & + element transformer-output { xsd:string }? & + element normalize { xsd:boolean }? & + OnnxModelExecutionParams & + EmbedderPoolingStrategy + +HuggingFaceTokenizer = + attribute type { "hugging-face-tokenizer" } & + element model { attribute language { xsd:string }? & ModelReference }+ & + element special-tokens { xsd:boolean }? & + element max-length { xsd:integer }? & + element truncation { xsd:boolean }? + +BertBaseEmbedder = + attribute type { "bert-embedder" } & + element transformer-model { ModelReference } & + element tokenizer-vocab { ModelReference } & + element max-tokens { xsd:nonNegativeInteger }? & + element transformer-input-ids { xsd:string }? & + element transformer-attention-mask { xsd:string }? & + element transformer-token-type-ids { xsd:string }? & + element transformer-output { xsd:string }? & + element transformer-start-sequence-token { xsd:integer }? & + element transformer-end-sequence-token { xsd:integer }? & + OnnxModelExecutionParams & + EmbedderPoolingStrategy + +OnnxModelExecutionParams = + element onnx-execution-mode { "parallel" | "sequential" }? & + element onnx-interop-threads { xsd:integer }? & + element onnx-intraop-threads { xsd:integer }? & + element onnx-gpu-device { xsd:integer }? + +EmbedderPoolingStrategy = element pooling-strategy { "cls" | "mean" }?
\ No newline at end of file diff --git a/config-model/src/main/resources/schema/content.rnc b/config-model/src/main/resources/schema/content.rnc index 6486fdacc18..5833b575a74 100644 --- a/config-model/src/main/resources/schema/content.rnc +++ b/config-model/src/main/resources/schema/content.rnc @@ -301,6 +301,9 @@ Tuning = element tuning { element persearch { xsd:nonNegativeInteger }? & element summary { xsd:nonNegativeInteger }? }? & + element lidspace { + element max-bloat-factor { xsd:double { minInclusive = "0.0" maxInclusive = "1.0" } }? + }? & element flushstrategy { element native { element total { diff --git a/config-model/src/main/resources/schema/deployment.rnc b/config-model/src/main/resources/schema/deployment.rnc index ede05ad65ef..0f2eed3f72b 100644 --- a/config-model/src/main/resources/schema/deployment.rnc +++ b/config-model/src/main/resources/schema/deployment.rnc @@ -8,6 +8,7 @@ start = element deployment { attribute athenz-domain { xsd:string }? & attribute athenz-service { xsd:string }? & attribute cloud-account { xsd:string }? & + attribute empty-host-ttl { xsd:string }? & Step } @@ -39,6 +40,7 @@ Instance = element instance { attribute tags { xsd:string }? & attribute athenz-service { xsd:string }? & attribute cloud-account { xsd:string }? & + attribute empty-host-ttl { xsd:string }? & StepExceptInstance } @@ -106,11 +108,13 @@ Staging = element staging { } Dev = element dev { - attribute cloud-account { xsd:string }? + attribute cloud-account { xsd:string }? & + attribute empty-host-ttl { xsd:string }? } Perf = element perf { - attribute cloud-account { xsd:string }? + attribute cloud-account { xsd:string }? & + attribute empty-host-ttl { xsd:string }? } Prod = element prod { @@ -118,6 +122,7 @@ Prod = element prod { attribute athenz-service { xsd:string }? & attribute tester-flavor { xsd:string }? & attribute cloud-account { xsd:string }? & + attribute empty-host-ttl { xsd:string }? & Region* & Delay* & ProdTest* & @@ -132,6 +137,7 @@ Region = element region { attribute active { xsd:boolean }? & attribute athenz-service { xsd:string }? & attribute cloud-account { xsd:string }? & + attribute empty-host-ttl { xsd:string }? & text } diff --git a/config-model/src/test/cfg/application/embed/configdefinitions/embedding.bert-base-embedder.def b/config-model/src/test/cfg/application/embed/configdefinitions/embedding.bert-base-embedder.def deleted file mode 100644 index 144dfbd0001..00000000000 --- a/config-model/src/test/cfg/application/embed/configdefinitions/embedding.bert-base-embedder.def +++ /dev/null @@ -1,30 +0,0 @@ -# Copy of this Vespa config stored here because Vespa config definitions are not -# available in unit tests, and are needed (by DomConfigPayloadBuilder.parseLeaf) -# Alternatively, we could make that not need it as it is not strictly necessaery. - -namespace=embedding - -# Wordpiece tokenizer -tokenizerVocab model - -transformerModel model - -# Max length of token sequence model can handle -transformerMaxTokens int default=384 - -# Pooling strategy -poolingStrategy enum { cls, mean } default=mean - -# Input names -transformerInputIds string default=input_ids -transformerAttentionMask string default=attention_mask -transformerTokenTypeIds string default=token_type_ids - -# Output name -transformerOutput string default=output_0 - -# Settings for ONNX model evaluation -onnxExecutionMode enum { parallel, sequential } default=sequential -onnxInterOpThreads int default=1 -onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n - diff --git a/config-model/src/test/cfg/application/embed/configdefinitions/sentence-embedder.def b/config-model/src/test/cfg/application/embed/configdefinitions/sentence-embedder.def new file mode 100644 index 00000000000..87b80f1051a --- /dev/null +++ b/config-model/src/test/cfg/application/embed/configdefinitions/sentence-embedder.def @@ -0,0 +1,26 @@ +package=ai.vespa.example.paragraph + +# WordPiece tokenizer vocabulary +vocab model + +model model + +myValue string + +# Max length of token sequence model can handle +transforerMaxTokens int default=128 + +# Pooling strategy +poolingStrategy enum { cls, mean } default=mean + +# Input names +transformerInputIds string default=input_ids +transformerAttentionMask string default=attention_mask + +# Output name +transformerOutput string default=last_hidden_state + +# Settings for ONNX model evaluation +onnxExecutionMode enum { parallel, sequential } default=sequential +onnxInterOpThreads int default=1 +onnxIntraOpThreads int default=-4 diff --git a/config-model/src/test/cfg/application/embed/services.xml b/config-model/src/test/cfg/application/embed/services.xml index fcb1f10f32c..6823ef900ae 100644 --- a/config-model/src/test/cfg/application/embed/services.xml +++ b/config-model/src/test/cfg/application/embed/services.xml @@ -3,16 +3,47 @@ <services version="1.0"> <container version="1.0"> + <component id="hf-embedder" type="hugging-face-embedder"> + <transformer-model model-id="e5-base-v2" url="https://my/url/model.onnx"/> + <tokenizer-model model-id="e5-base-v2-vocab" path="app/tokenizer.json"/> + <max-tokens>1024</max-tokens> + <transformer-input-ids>my_input_ids</transformer-input-ids> + <transformer-attention-mask>my_attention_mask</transformer-attention-mask> + <transformer-token-type-ids>my_token_type_ids</transformer-token-type-ids> + <transformer-output>my_output</transformer-output> + <normalize>true</normalize> + <onnx-execution-mode>parallel</onnx-execution-mode> + <onnx-intraop-threads>10</onnx-intraop-threads> + <onnx-interop-threads>8</onnx-interop-threads> + <onnx-gpu-device>1</onnx-gpu-device> + <pooling-strategy>mean</pooling-strategy> + </component> + + <component id="hf-tokenizer" type="hugging-face-tokenizer"> + <model language="no" model-id="multilingual-e5-base-vocab" url="https://my/url/tokenizer.json"/> + <special-tokens>true</special-tokens> + <max-length>768</max-length> + <truncation>true</truncation> + </component> + + <component id="bert-embedder" type="bert-embedder"> + <!-- model specifics --> + <transformer-model model-id="minilm-l6-v2" url="application-url"/> + <tokenizer-vocab path="files/vocab.txt"/> + <max-tokens>512</max-tokens> + <transformer-input-ids>my_input_ids</transformer-input-ids> + <transformer-attention-mask>my_attention_mask</transformer-attention-mask> + <transformer-token-type-ids>my_token_type_ids</transformer-token-type-ids> + <transformer-output>my_output</transformer-output> + <transformer-start-sequence-token>101</transformer-start-sequence-token> + <transformer-end-sequence-token>102</transformer-end-sequence-token> - <component id="transformer" class="ai.vespa.embedding.BertBaseEmbedder" bundle="model-integration"> - <config name="embedding.bert-base-embedder"> - <!-- model specifics --> - <transformerModel model-id="minilm-l6-v2" url="application-url"/> - <tokenizerVocab path="files/vocab.txt"/> - <!-- tunable parameters: number of threads etc --> - <onnxIntraOpThreads>4</onnxIntraOpThreads> - </config> + <!-- tunable parameters: number of threads etc --> + <onnx-execution-mode>parallel</onnx-execution-mode> + <onnx-intraop-threads>4</onnx-intraop-threads> + <onnx-interop-threads>8</onnx-interop-threads> + <onnx-gpu-device>1</onnx-gpu-device> </component> <nodes> diff --git a/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def b/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def deleted file mode 100644 index 144dfbd0001..00000000000 --- a/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/embedding.bert-base-embedder.def +++ /dev/null @@ -1,30 +0,0 @@ -# Copy of this Vespa config stored here because Vespa config definitions are not -# available in unit tests, and are needed (by DomConfigPayloadBuilder.parseLeaf) -# Alternatively, we could make that not need it as it is not strictly necessaery. - -namespace=embedding - -# Wordpiece tokenizer -tokenizerVocab model - -transformerModel model - -# Max length of token sequence model can handle -transformerMaxTokens int default=384 - -# Pooling strategy -poolingStrategy enum { cls, mean } default=mean - -# Input names -transformerInputIds string default=input_ids -transformerAttentionMask string default=attention_mask -transformerTokenTypeIds string default=token_type_ids - -# Output name -transformerOutput string default=output_0 - -# Settings for ONNX model evaluation -onnxExecutionMode enum { parallel, sequential } default=sequential -onnxInterOpThreads int default=1 -onnxIntraOpThreads int default=-4 # n=number of threads -> n<0: CPUs/(-n), n==0: CPUs, n>0: n - diff --git a/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/sentence-embedder.def b/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/sentence-embedder.def new file mode 100644 index 00000000000..87b80f1051a --- /dev/null +++ b/config-model/src/test/cfg/application/embed_cloud_only/configdefinitions/sentence-embedder.def @@ -0,0 +1,26 @@ +package=ai.vespa.example.paragraph + +# WordPiece tokenizer vocabulary +vocab model + +model model + +myValue string + +# Max length of token sequence model can handle +transforerMaxTokens int default=128 + +# Pooling strategy +poolingStrategy enum { cls, mean } default=mean + +# Input names +transformerInputIds string default=input_ids +transformerAttentionMask string default=attention_mask + +# Output name +transformerOutput string default=last_hidden_state + +# Settings for ONNX model evaluation +onnxExecutionMode enum { parallel, sequential } default=sequential +onnxInterOpThreads int default=1 +onnxIntraOpThreads int default=-4 diff --git a/config-model/src/test/cfg/application/embed_cloud_only/services.xml b/config-model/src/test/cfg/application/embed_cloud_only/services.xml index 57db4f5bfae..e203ec56669 100644 --- a/config-model/src/test/cfg/application/embed_cloud_only/services.xml +++ b/config-model/src/test/cfg/application/embed_cloud_only/services.xml @@ -4,14 +4,11 @@ <container version="1.0"> - <component id="transformer" class="ai.vespa.embedding.BertBaseEmbedder" bundle="model-integration"> - <config name="embedding.bert-base-embedder"> - <!-- No fallback to url or path when deploying outside cloud --> - <transformerModel model-id="minilm-l6-v2"/> - <tokenizerVocab path="files/vocab.txt"/> - - <!-- tunable parameters: number of threads etc --> - <onnxIntraOpThreads>4</onnxIntraOpThreads> + <component id="transformer" class="ai.vespa.example.paragraph.ApplicationSpecificEmbedder" bundle="app"> + <config name='ai.vespa.example.paragraph.sentence-embedder'> + <model model-id="minilm-l6-v2"/> + <vocab path="files/vocab.txt"/> + <myValue>foo</myValue> </config> </component> diff --git a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java index 5472ea2ca82..8b8191ebbbb 100644 --- a/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java +++ b/config-model/src/test/java/com/yahoo/config/model/provision/ModelProvisioningTest.java @@ -57,7 +57,7 @@ import static com.yahoo.config.provision.NodeResources.DiskSpeed; import static com.yahoo.config.provision.NodeResources.StorageType; import static com.yahoo.vespa.defaults.Defaults.getDefaults; import static com.yahoo.vespa.model.search.NodeResourcesTuning.GB; -import static com.yahoo.vespa.model.search.NodeResourcesTuning.reservedMemoryGb; +import static com.yahoo.vespa.model.Host.memoryOverheadGb; import static com.yahoo.vespa.model.test.utils.ApplicationPackageUtils.generateSchemas; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; @@ -202,6 +202,12 @@ public class ModelProvisioningTest { " <search/>" + " <nodes count='1'/>" + " </container>" + + " <container version='1.0' id='container2'>" + + " <search/>" + + " <nodes count='1'>" + + " <resources vcpu='10' memory='100Gb' disk='1Tb'/>" + + " </nodes>" + + " </container>" + " <content version='1.0' id='content1'>" + " <redundancy>2</redundancy>" + " <documents>" + @@ -219,12 +225,14 @@ public class ModelProvisioningTest { "</services>"; VespaModelTester tester = new VespaModelTester(); tester.addHosts(8); + tester.addHosts(new NodeResources(20, 200, 2000, 1.0), 1); VespaModel model = tester.createModel(xmlWithNodes, true); assertEquals(2, model.getContentClusters().get("content1").getRootGroup().getNodes().size(), "Nodes in content1"); assertEquals(1, model.getContainerClusters().get("container1").getContainers().size(), "Nodes in container1"); assertEquals(2, model.getContentClusters().get("content").getRootGroup().getNodes().size(), "Nodes in cluster without ID"); - assertEquals(ApplicationContainerCluster.defaultHeapSizePercentageOfTotalNodeMemory, physicalMemoryPercentage(model.getContainerClusters().get("container1")), "Heap size for container"); + assertEquals(65, physicalMemoryPercentage(model.getContainerClusters().get("container1")), "Heap size for container1"); + assertEquals(84, physicalMemoryPercentage(model.getContainerClusters().get("container2")), "Heap size for container2"); assertProvisioned(2, ClusterSpec.Id.from("content1"), ClusterSpec.Type.content, model); assertProvisioned(1, ClusterSpec.Id.from("container1"), ClusterSpec.Type.container, model); assertProvisioned(2, ClusterSpec.Id.from("content"), ClusterSpec.Type.content, model); @@ -277,8 +285,7 @@ public class ModelProvisioningTest { assertEquals(2, model.getContentClusters().get("content1").getRootGroup().getNodes().size(), "Nodes in content1"); assertEquals(2, model.getContainerClusters().get("container1").getContainers().size(), "Nodes in container1"); assertEquals(18, physicalMemoryPercentage(model.getContainerClusters().get("container1")), "Heap size is lowered with combined clusters"); - assertEquals((long) ((3 - reservedMemoryGb) * (Math.pow(1024, 3)) * (1 - 0.18)), protonMemorySize(model.getContentClusters() - .get("content1")), "Memory for proton is lowered to account for the jvm heap"); + assertEquals(2025077080L, protonMemorySize(model.getContentClusters().get("content1")), "Memory for proton is lowered to account for the jvm heap"); assertProvisioned(0, ClusterSpec.Id.from("container1"), ClusterSpec.Type.container, model); assertProvisioned(2, ClusterSpec.Id.from("content1"), ClusterSpec.Id.from("container1"), ClusterSpec.Type.combined, model); assertEquals(1, logger.msgs().size()); @@ -314,8 +321,8 @@ public class ModelProvisioningTest { assertEquals(2, model.getContentClusters().get("content1").getRootGroup().getNodes().size(), "Nodes in content1"); assertEquals(2, model.getContainerClusters().get("container1").getContainers().size(), "Nodes in container1"); assertEquals(30, physicalMemoryPercentage(model.getContainerClusters().get("container1")), "Heap size is lowered with combined clusters"); - assertEquals((long) ((3 - reservedMemoryGb) * (Math.pow(1024, 3)) * (1 - 0.30)), protonMemorySize(model.getContentClusters() - .get("content1")), "Memory for proton is lowered to account for the jvm heap"); + assertEquals((long) ((3 - memoryOverheadGb) * (Math.pow(1024, 3)) * (1 - 0.30)), protonMemorySize(model.getContentClusters() + .get("content1")), "Memory for proton is lowered to account for the jvm heap"); assertProvisioned(0, ClusterSpec.Id.from("container1"), ClusterSpec.Type.container, model); assertProvisioned(2, ClusterSpec.Id.from("content1"), ClusterSpec.Id.from("container1"), ClusterSpec.Type.combined, model); } @@ -345,8 +352,8 @@ public class ModelProvisioningTest { VespaModel model = tester.createModel(xmlWithNodes, true); assertEquals(2, model.getContentClusters().get("content1").getRootGroup().getNodes().size(), "Nodes in content1"); assertEquals(2, model.getContainerClusters().get("container1").getContainers().size(), "Nodes in container1"); - assertEquals(ApplicationContainerCluster.defaultHeapSizePercentageOfTotalNodeMemory, physicalMemoryPercentage(model.getContainerClusters().get("container1")), "Heap size is normal"); - assertEquals((long) ((3 - reservedMemoryGb) * (Math.pow(1024, 3))), protonMemorySize(model.getContentClusters().get("content1")), "Memory for proton is normal"); + assertEquals(65, physicalMemoryPercentage(model.getContainerClusters().get("container1")), "Heap size is normal"); + assertEquals((long) ((3 - memoryOverheadGb) * (Math.pow(1024, 3))), protonMemorySize(model.getContentClusters().get("content1")), "Memory for proton is normal"); } @Test @@ -2569,7 +2576,7 @@ public class ModelProvisioningTest { ProtonConfig cfg = getProtonConfig(model, cluster.getSearchNodes().get(0).getConfigId()); assertEquals(2000, cfg.flush().memory().maxtlssize()); // from config override assertEquals(1000, cfg.flush().memory().maxmemory()); // from explicit tuning - assertEquals((long) ((128 - reservedMemoryGb) * GB * 0.08), cfg.flush().memory().each().maxmemory()); // from default node flavor tuning + assertEquals((long) ((128 - memoryOverheadGb) * GB * 0.08), cfg.flush().memory().each().maxmemory()); // from default node flavor tuning } private static ProtonConfig getProtonConfig(VespaModel model, String configId) { diff --git a/config-model/src/test/java/com/yahoo/schema/processing/NGramTestCase.java b/config-model/src/test/java/com/yahoo/schema/processing/NGramTestCase.java index c143aa43d53..06ea202b9c3 100644 --- a/config-model/src/test/java/com/yahoo/schema/processing/NGramTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/processing/NGramTestCase.java @@ -52,7 +52,7 @@ public class NGramTestCase extends AbstractSchemaTestCase { @Test void testInvalidNGramSetting1() throws IOException, ParseException { try { - Schema schema = ApplicationBuilder.buildFromFile("src/test/examples/invalidngram1.sd"); + ApplicationBuilder.buildFromFile("src/test/examples/invalidngram1.sd"); fail("Should cause an exception"); } catch (IllegalArgumentException e) { @@ -63,7 +63,7 @@ public class NGramTestCase extends AbstractSchemaTestCase { @Test void testInvalidNGramSetting2() throws IOException, ParseException { try { - Schema schema = ApplicationBuilder.buildFromFile("src/test/examples/invalidngram2.sd"); + ApplicationBuilder.buildFromFile("src/test/examples/invalidngram2.sd"); fail("Should cause an exception"); } catch (IllegalArgumentException e) { @@ -74,7 +74,7 @@ public class NGramTestCase extends AbstractSchemaTestCase { @Test void testInvalidNGramSetting3() throws IOException, ParseException { try { - Schema schema = ApplicationBuilder.buildFromFile("src/test/examples/invalidngram3.sd"); + ApplicationBuilder.buildFromFile("src/test/examples/invalidngram3.sd"); fail("Should cause an exception"); } catch (IllegalArgumentException e) { diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidatorTest.java index 80643917a58..42be1592eca 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidatorTest.java @@ -281,7 +281,25 @@ public class ConstantTensorJsonValidatorTest { " }", "}")); }); - assertTrue(exception.getMessage().contains("Expected field name 'cells', got 'stats'")); + System.err.println("msg: " + exception.getMessage()); + assertTrue(exception.getMessage().contains("Expected 'cells' or 'values', got 'stats'")); + } + + @Test + void ensure_that_values_array_for_vector_works() { + validateTensorJson( + TensorType.fromSpec("tensor(x[5])"), + inputJsonToReader("[5,4.0,3.1,-2,-1.0]")); + validateTensorJson( + TensorType.fromSpec("tensor(x[5])"), + inputJsonToReader("{'values':[5,4.0,3.1,-2,-1.0]}")); + } + + @Test + void ensure_that_simple_object_for_map_works() { + validateTensorJson( + TensorType.fromSpec("tensor(x{})"), + inputJsonToReader("{'cells':{'a':5,'b':4.0,'c':3.1,'d':-2,'e':-1.0}}")); } } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/QuotaValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/QuotaValidatorTest.java index 1a7b3d62cb7..a1a3b40a858 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/QuotaValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/QuotaValidatorTest.java @@ -21,19 +21,20 @@ public class QuotaValidatorTest { private final Zone publicZone = new Zone(SystemName.Public, Environment.prod, RegionName.from("foo")); private final Zone publicCdZone = new Zone(SystemName.PublicCd, Environment.prod, RegionName.from("foo")); + private final Zone devZone = new Zone(SystemName.Public, Environment.dev, RegionName.from("foo")); private final Quota quota = Quota.unlimited().withClusterSize(10).withBudget(BigDecimal.valueOf(1.25)); @Test void test_deploy_under_quota() { var tester = new ValidationTester(8, false, new TestProperties().setHostedVespa(true).setQuota(quota).setZone(publicZone)); - tester.deploy(null, getServices("testCluster", 4), Environment.prod, null); + tester.deploy(null, getServices(4), Environment.prod, null); } @Test void test_deploy_above_quota_clustersize() { var tester = new ValidationTester(14, false, new TestProperties().setHostedVespa(true).setQuota(quota).setZone(publicZone)); try { - tester.deploy(null, getServices("testCluster", 11), Environment.prod, null); + tester.deploy(null, getServices(11), Environment.prod, null); fail(); } catch (RuntimeException e) { assertEquals("Clusters testCluster exceeded max cluster size of 10", e.getMessage()); @@ -44,10 +45,10 @@ public class QuotaValidatorTest { void test_deploy_above_quota_budget() { var tester = new ValidationTester(13, false, new TestProperties().setHostedVespa(true).setQuota(quota).setZone(publicZone)); try { - tester.deploy(null, getServices("testCluster", 10), Environment.prod, null); + tester.deploy(null, getServices(10), Environment.prod, null); fail(); } catch (RuntimeException e) { - assertEquals("The max resources specified cost $1.63 but your quota is $1.25: Contact support to upgrade your plan.", e.getMessage()); + assertEquals("The resources used cost $1.63 but your quota is $1.25: Contact support to upgrade your plan.", e.getMessage()); } } @@ -55,10 +56,10 @@ public class QuotaValidatorTest { void test_deploy_above_quota_budget_in_publiccd() { var tester = new ValidationTester(13, false, new TestProperties().setHostedVespa(true).setQuota(quota.withBudget(BigDecimal.ONE)).setZone(publicCdZone)); try { - tester.deploy(null, getServices("testCluster", 10), Environment.prod, null); + tester.deploy(null, getServices(10), Environment.prod, null); fail(); } catch (RuntimeException e) { - assertEquals("publiccd: The max resources specified cost $1.63 but your quota is $1.00: Contact support to upgrade your plan.", e.getMessage()); + assertEquals("publiccd: The resources used cost $1.63 but your quota is $1.00: Contact support to upgrade your plan.", e.getMessage()); } } @@ -66,11 +67,33 @@ public class QuotaValidatorTest { void test_deploy_max_resources_above_quota() { var tester = new ValidationTester(13, false, new TestProperties().setHostedVespa(true).setQuota(quota).setZone(publicCdZone)); try { - tester.deploy(null, getServices("testCluster", 10), Environment.prod, null); + tester.deploy(null, getServices(10), Environment.prod, null); fail(); } catch (RuntimeException e) { - assertEquals("publiccd: The max resources specified cost $1.63 but your quota is $1.25: Contact support to upgrade your plan.", e.getMessage()); + assertEquals("publiccd: The resources used cost $1.63 but your quota is $1.25: Contact support to upgrade your plan.", e.getMessage()); + } + } + + + @Test + void test_deploy_above_quota_budget_in_dev() { + var quota = Quota.unlimited().withBudget(BigDecimal.valueOf(0.01)); + var tester = new ValidationTester(5, false, new TestProperties().setHostedVespa(true).setQuota(quota).setZone(devZone)); + // There is downscaling to 1 node per cluster in dev + try { + tester.deploy(null, getServices(2, false), Environment.dev, null); + fail(); + } catch (RuntimeException e) { + assertEquals("The resources used cost $0.16 but your quota is $0.01: Contact support to upgrade your plan.", e.getMessage()); + } + + // Override so that we will get 2 nodes in content cluster + try { + tester.deploy(null, getServices(2, true), Environment.dev, null); + fail(); + } catch (RuntimeException e) { + assertEquals("The resources used cost $0.33 but your quota is $0.01: Contact support to upgrade your plan.", e.getMessage()); } } @@ -79,25 +102,26 @@ public class QuotaValidatorTest { var quota = Quota.unlimited().withBudget(BigDecimal.valueOf(-1)); var tester = new ValidationTester(13, false, new TestProperties().setHostedVespa(true).setQuota(quota).setZone(publicZone)); try { - tester.deploy(null, getServices("testCluster", 10), Environment.prod, null); + tester.deploy(null, getServices(10), Environment.prod, null); fail(); } catch (RuntimeException e) { - assertEquals("The max resources specified cost $-.-- but your quota is $--.--: Please free up some capacity.", + assertEquals("The resources used cost $-.-- but your quota is $--.--: Please free up some capacity.", ValidationTester.censorNumbers(e.getMessage())); } } - private static String getServices(String contentClusterId, int nodeCount) { - return "<services version='1.0'>" + - " <content id='" + contentClusterId + "' version='1.0'>" + + private static String getServices(int nodeCount) { + return getServices(nodeCount, false); + } + + private static String getServices(int nodeCount, boolean devOverride) { + return "<services version='1.0' xmlns:deploy='vespa' xmlns:preprocess='properties'>" + + " <content id='" + "testCluster" + "' version='1.0'>" + " <redundancy>1</redundancy>" + - " <engine>" + - " <proton/>" + - " </engine>" + " <documents>" + " <document type='music' mode='index'/>" + " </documents>" + - " <nodes count='" + nodeCount + "'>" + + " <nodes count='" + nodeCount + "' " + (devOverride ? "required='true'" : "") + " >\n" + " <resources vcpu=\"[0.5, 2]\" memory=\"[1Gb, 6Gb]\" disk=\"[1Gb, 18Gb]\"/>\n" + " </nodes>" + " </content>" + diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ValidationTester.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ValidationTester.java index 78d3838d39d..1517f7971ed 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ValidationTester.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/ValidationTester.java @@ -57,6 +57,7 @@ public class ValidationTester { public ValidationTester(InMemoryProvisioner hostProvisioner, TestProperties testProperties) { this.hostProvisioner = hostProvisioner; this.properties = testProperties; + hostProvisioner.setEnvironment(testProperties.zone().environment()); } /** diff --git a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/CloudAccountChangeValidatorTest.java b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/CloudAccountChangeValidatorTest.java index a8a063cb5fb..77704817045 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/CloudAccountChangeValidatorTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/application/validation/change/CloudAccountChangeValidatorTest.java @@ -1,14 +1,14 @@ package com.yahoo.vespa.model.application.validation.change; -import com.yahoo.config.provision.ClusterInfo; -import com.yahoo.config.provision.IntRange; import com.yahoo.config.model.api.Provisioned; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.deploy.TestProperties; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.CloudAccount; +import com.yahoo.config.provision.ClusterInfo; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.IntRange; import com.yahoo.config.provision.NodeResources; import com.yahoo.vespa.model.VespaModel; import com.yahoo.vespa.model.test.utils.VespaModelCreatorWithMockPkg; @@ -36,7 +36,7 @@ class CloudAccountChangeValidatorTest { fail("Expected exception"); } catch (IllegalArgumentException e) { assertEquals(e.getMessage(), "Cannot change cloud account from unspecified account to " + - "account '000000000000'. The existing deployment must be removed before " + + "account '000000000000' in aws. The existing deployment must be removed before " + "changing accounts"); } assertEquals(List.of(), validator.validate(model0, model0, new DeployState.Builder().build())); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilderTest.java b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilderTest.java index ed3073a0ef4..78c95c03b44 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilderTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilderTest.java @@ -30,13 +30,13 @@ public class DomComponentBuilderTest extends DomBuilderTest { @Test @SuppressWarnings("unchecked") void components_can_be_nested() { - Component<Component<?, ?>, ?> parent = new DomComponentBuilder().doBuild(root.getDeployState(), root, parse( + Component<? super Component<?, ?>, ?> parent = new DomComponentBuilder().doBuild(root.getDeployState(), root, parse( "<component id='parent'>", " <component id='child' />", "</component>")); assertEquals(ComponentId.fromString("parent"), parent.getGlobalComponentId()); - Component<?, ?> child = first(parent.getChildren().values()); + Component<?, ?> child = (Component<?, ?>) first(parent.getChildren().values()); assertNotNull(child); assertEquals(ComponentId.fromString("child@parent"), child.getGlobalComponentId()); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomSchemaTuningBuilderTest.java b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomSchemaTuningBuilderTest.java index 41cef783b68..e3e9fc1a232 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomSchemaTuningBuilderTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/builder/xml/dom/DomSchemaTuningBuilderTest.java @@ -58,6 +58,16 @@ public class DomSchemaTuningBuilderTest extends DomBuilderTest { } @Test + void requireThatWeCanParseLidSpaceTag() { + Tuning t = createTuning(parseXml("<lidspace>", + "<max-bloat-factor>0.5</max-bloat-factor>", + "</lidspace>")); + assertEquals(0.5, t.searchNode.lidSpace.bloatFactor.doubleValue()); + ProtonConfig cfg = getProtonCfg(t); + assertEquals(cfg.lidspacecompaction().allowedlidbloatfactor(), 0.5); + } + + @Test void requireThatWeCanParseFlushStrategyTag() { Tuning t = createTuning(parseXml("<flushstrategy>", "<native>", "<total>", diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/ContainerClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/container/ContainerClusterTest.java index 5973ef56962..2562e1e3124 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/ContainerClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/ContainerClusterTest.java @@ -129,10 +129,10 @@ public class ContainerClusterTest { int heapSizeInFlag = 89; boolean hosted = true; boolean combined = true; // a cluster running on content nodes (only relevant with hosted) - verifyHeapSizeAsPercentageOfPhysicalMemory(createRoot(hosted), !combined, null, ApplicationContainerCluster.defaultHeapSizePercentageOfTotalNodeMemory); + verifyHeapSizeAsPercentageOfPhysicalMemory(createRoot(hosted), !combined, null, ApplicationContainerCluster.defaultHeapSizePercentageOfAvailableMemory); verifyHeapSizeAsPercentageOfPhysicalMemory(createRoot(hosted, heapSizeInFlag), !combined, null, heapSizeInFlag); - verifyHeapSizeAsPercentageOfPhysicalMemory(createRoot(hosted), combined, null, 18); - verifyHeapSizeAsPercentageOfPhysicalMemory(createRoot(hosted, heapSizeInFlag), combined, null, 18); + verifyHeapSizeAsPercentageOfPhysicalMemory(createRoot(hosted), combined, null, 24); + verifyHeapSizeAsPercentageOfPhysicalMemory(createRoot(hosted, heapSizeInFlag), combined, null, 24); verifyHeapSizeAsPercentageOfPhysicalMemory(createRoot(!hosted), !combined, null, 0); verifyHeapSizeAsPercentageOfPhysicalMemory(createRoot(!hosted, heapSizeInFlag), !combined, null, 0); diff --git a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java index 50416d50fe5..2a82daef9e3 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/container/xml/EmbedderTestCase.java @@ -2,16 +2,26 @@ package com.yahoo.vespa.model.container.xml; import com.yahoo.component.ComponentId; +import com.yahoo.config.InnerNode; +import com.yahoo.config.ModelNode; +import com.yahoo.config.ModelReference; import com.yahoo.config.model.application.provider.FilesApplicationPackage; import com.yahoo.config.model.deploy.DeployState; import com.yahoo.config.model.deploy.TestProperties; +import com.yahoo.embedding.BertBaseEmbedderConfig; +import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig; +import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig; import com.yahoo.path.Path; import com.yahoo.text.XML; import com.yahoo.vespa.config.ConfigDefinitionKey; import com.yahoo.vespa.config.ConfigPayloadBuilder; import com.yahoo.vespa.model.VespaModel; import com.yahoo.vespa.model.container.ApplicationContainerCluster; +import com.yahoo.vespa.model.container.component.BertEmbedder; import com.yahoo.vespa.model.container.component.Component; +import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder; +import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer; +import com.yahoo.vespa.model.test.utils.VespaModelCreatorWithFilePkg; import com.yahoo.yolean.Exceptions; import org.junit.jupiter.api.Test; import org.w3c.dom.Document; @@ -30,55 +40,18 @@ import static org.junit.jupiter.api.Assertions.fail; public class EmbedderTestCase { - private static final String BUNDLED_EMBEDDER_CLASS = "ai.vespa.embedding.BertBaseEmbedder"; - private static final String BUNDLED_EMBEDDER_CONFIG = "embedding.bert-base-embedder"; - - @Test - void testBundledEmbedder_selfhosted() throws IOException, SAXException { - String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel id='my_model_id' url='my-model-url' />" + - " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" + - " </config>" + - "</component>"; - String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel id='my_model_id' url='my-model-url' />" + - " <tokenizerVocab id='my_vocab_id' url='my-vocab-url' />" + - " </config>" + - "</component>"; - assertTransform(input, component, false); - } - - @Test - void testBundledEmbedder_hosted() throws IOException, SAXException { - String input = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='minilm-l6-v2' />" + - " <tokenizerVocab model-id='bert-base-uncased' path='ignored.txt'/>" + - " </config>" + - "</component>"; - String component = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='minilm-l6-v2' url='https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx' />" + - " <tokenizerVocab model-id='bert-base-uncased' url='https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt' />" + - " </config>" + - "</component>"; - assertTransform(input, component, true); - } - @Test void testApplicationComponentWithModelReference_hosted() throws IOException, SAXException { - String input = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='minilm-l6-v2' />" + - " <tokenizerVocab model-id='bert-base-uncased' />" + + String input = "<component id='test' class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder' bundle='app'>" + + " <config name='ai.vespa.example.paragraph.sentence-embedder'>" + + " <model model-id='minilm-l6-v2' />" + + " <vocab model-id='bert-base-uncased' />" + " </config>" + "</component>"; - String component = "<component id='test' class='ApplicationSpecificEmbedder' bundle='model-integration'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='minilm-l6-v2' url='https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx' />" + - " <tokenizerVocab model-id='bert-base-uncased' url='https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt' />" + + String component = "<component id='test' class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder' bundle='app'>" + + " <config name='ai.vespa.example.paragraph.sentence-embedder'>" + + " <model model-id='minilm-l6-v2' url='https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx' />" + + " <vocab model-id='bert-base-uncased' url='https://data.vespa.oath.cloud/onnx_models/bert-base-uncased-vocab.txt' />" + " </config>" + "</component>"; assertTransform(input, component, true); @@ -86,42 +59,65 @@ public class EmbedderTestCase { @Test void testUnknownModelId_hosted() throws IOException, SAXException { - String embedder = "<component id='test' class='" + BUNDLED_EMBEDDER_CLASS + "'>" + - " <config name='" + BUNDLED_EMBEDDER_CONFIG + "'>" + - " <transformerModel model-id='my_model_id' />" + - " <tokenizerVocab model-id='my_vocab_id' />" + + String embedder = "<component id='test' class='ai.vespa.example.paragraph.ApplicationSpecificEmbedder'>" + + " <config name='ai.vespa.example.paragraph.sentence-embedder'>" + + " <model model-id='my_model_id' />" + + " <vocab model-id='my_vocab_id' />" + " </config>" + "</component>"; assertTransformThrows(embedder, - "Unknown model id 'my_model_id' on 'transformerModel'", + "Unknown model id 'my_model_id' on 'model'", true); } @Test - void testApplicationPackageWithEmbedder_selfhosted() throws Exception { - Path applicationDir = Path.fromString("src/test/cfg/application/embed/"); - VespaModel model = loadModel(applicationDir, false); - ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container"); + void huggingfaceEmbedder_selfhosted() throws Exception { + var model = loadModel(Path.fromString("src/test/cfg/application/embed/"), false); + var cluster = model.getContainerClusters().get("container"); + var embedderCfg = assertHuggingfaceEmbedderComponentPresent(cluster); + assertEquals("my_input_ids", embedderCfg.transformerInputIds()); + assertEquals("https://my/url/model.onnx", modelReference(embedderCfg, "transformerModel").url().orElseThrow().value()); + var tokenizerCfg = assertHuggingfaceTokenizerComponentPresent(cluster); + assertEquals("https://my/url/tokenizer.json", modelReference(tokenizerCfg.model().get(0), "path").url().orElseThrow().value()); + assertEquals(768, tokenizerCfg.maxLength()); + } - Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer")); - ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding")); - assertEquals("minilm-l6-v2 application-url \"\"", config.getObject("transformerModel").getValue()); - assertEquals("\"\" \"\" files/vocab.txt", config.getObject("tokenizerVocab").getValue()); - assertEquals("4", config.getObject("onnxIntraOpThreads").getValue()); + @Test + void huggingfaceEmbedder_hosted() throws Exception { + var model = loadModel(Path.fromString("src/test/cfg/application/embed/"), true); + var cluster = model.getContainerClusters().get("container"); + var embedderCfg = assertHuggingfaceEmbedderComponentPresent(cluster); + assertEquals("my_input_ids", embedderCfg.transformerInputIds()); + assertEquals("https://data.vespa.oath.cloud/onnx_models/e5-base-v2/model.onnx", modelReference(embedderCfg, "transformerModel").url().orElseThrow().value()); + var tokenizerCfg = assertHuggingfaceTokenizerComponentPresent(cluster); + assertEquals("https://data.vespa.oath.cloud/onnx_models/multilingual-e5-base/tokenizer.json", modelReference(tokenizerCfg.model().get(0), "path").url().orElseThrow().value()); + assertEquals(768, tokenizerCfg.maxLength()); } + @Test - void testApplicationPackageWithEmbedder_hosted() throws Exception { - Path applicationDir = Path.fromString("src/test/cfg/application/embed/"); - VespaModel model = loadModel(applicationDir, true); - ApplicationContainerCluster containerCluster = model.getContainerClusters().get("container"); + void bertEmbedder_selfhosted() throws Exception { + var model = loadModel(Path.fromString("src/test/cfg/application/embed/"), false); + var cluster = model.getContainerClusters().get("container"); + var embedderCfg = assertBertEmbedderComponentPresent(cluster); + assertEquals("application-url", modelReference(embedderCfg, "transformerModel").url().orElseThrow().value()); + assertEquals("files/vocab.txt", modelReference(embedderCfg, "tokenizerVocab").path().orElseThrow().value()); + } - Component<?, ?> transformer = containerCluster.getComponentsMap().get(new ComponentId("transformer")); - ConfigPayloadBuilder config = transformer.getUserConfigs().get(new ConfigDefinitionKey("bert-base-embedder", "embedding")); - assertEquals("minilm-l6-v2 https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx \"\"", - config.getObject("transformerModel").getValue()); - assertEquals("\"\" \"\" files/vocab.txt", config.getObject("tokenizerVocab").getValue()); - assertEquals("4", config.getObject("onnxIntraOpThreads").getValue()); + @Test + void bertEmbedder_hosted() throws Exception { + var model = loadModel(Path.fromString("src/test/cfg/application/embed/"), true); + var cluster = model.getContainerClusters().get("container"); + var embedderCfg = assertBertEmbedderComponentPresent(cluster); + assertEquals("https://data.vespa.oath.cloud/onnx_models/sentence_all_MiniLM_L6_v2.onnx", + modelReference(embedderCfg, "transformerModel").url().orElseThrow().value()); + assertTrue(modelReference(embedderCfg, "tokenizerVocab").url().isEmpty()); + assertEquals("files/vocab.txt", modelReference(embedderCfg, "tokenizerVocab").path().orElseThrow().value()); + } + + @Test + void passesXmlValidation() { + new VespaModelCreatorWithFilePkg("src/test/cfg/application/embed/").create(); } @Test @@ -157,7 +153,7 @@ public class EmbedderTestCase { fail("Expected failure"); } catch (IllegalArgumentException e) { - assertEquals("transformerModel is configured with only a 'model-id'. Add a 'path' or 'url' to deploy this outside Vespa Cloud", + assertEquals("model is configured with only a 'model-id'. Add a 'path' or 'url' to deploy this outside Vespa Cloud", Exceptions.toMessageString(e)); } } @@ -217,4 +213,39 @@ public class EmbedderTestCase { return (Element) doc.getFirstChild(); } + private static HuggingFaceTokenizerConfig assertHuggingfaceTokenizerComponentPresent(ApplicationContainerCluster cluster) { + var hfTokenizer = (HuggingFaceTokenizer) cluster.getComponentsMap().get(new ComponentId("hf-tokenizer")); + assertEquals("com.yahoo.language.huggingface.HuggingFaceTokenizer", hfTokenizer.getClassId().getName()); + var cfgBuilder = new HuggingFaceTokenizerConfig.Builder(); + hfTokenizer.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + private static HuggingFaceEmbedderConfig assertHuggingfaceEmbedderComponentPresent(ApplicationContainerCluster cluster) { + var hfEmbedder = (HuggingFaceEmbedder) cluster.getComponentsMap().get(new ComponentId("hf-embedder")); + assertEquals("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", hfEmbedder.getClassId().getName()); + var cfgBuilder = new HuggingFaceEmbedderConfig.Builder(); + hfEmbedder.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + private static BertBaseEmbedderConfig assertBertEmbedderComponentPresent(ApplicationContainerCluster cluster) { + var bertEmbedder = (BertEmbedder) cluster.getComponentsMap().get(new ComponentId("bert-embedder")); + assertEquals("ai.vespa.embedding.BertBaseEmbedder", bertEmbedder.getClassId().getName()); + var cfgBuilder = new BertBaseEmbedderConfig.Builder(); + bertEmbedder.getConfig(cfgBuilder); + return cfgBuilder.build(); + } + + // Ugly hack to read underlying model reference from config instance + private static ModelReference modelReference(InnerNode cfg, String name) { + try { + var f = cfg.getClass().getDeclaredField(name); + f.setAccessible(true); + return ((ModelNode) f.get(cfg)).getModelReference(); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new RuntimeException(e); + } + } + } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java index 14fe7bbcc36..f9b1edf4f35 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/ContentClusterTest.java @@ -1397,4 +1397,42 @@ public class ContentClusterTest extends ContentBaseTest { assertEquals(2, fleetControllerConfigBuilder.build().max_number_of_groups_allowed_to_be_down()); } + private void assertIndexingDocprocEnabled(boolean indexed, boolean force, boolean expEnabled) + { + String services = "<?xml version='1.0' encoding='UTF-8' ?>" + + "<services version='1.0'>" + + " <container id='default' version='1.0'>" + + " <document-processing/>" + + " </container>" + + " <content id='search' version='1.0'>" + + " <redundancy>1</redundancy>" + + " <documents>" + + " <document-processing cluster='default'" + (force ? " chain='indexing'" : "") + "/>" + + " <document type='type1' mode='" + (indexed ? "index" : "streaming") + "'/>" + + " </documents>" + + " </content>" + + "</services>"; + VespaModel model = createEnd2EndOneNode(new TestProperties(), services); + var searchCluster = model.getContentClusters().get("search").getSearch(); + assertEquals(expEnabled, searchCluster.getIndexingDocproc().isPresent()); + } + + @Test + void testIndexingDocprocEnabledWhenIndexMode() + { + assertIndexingDocprocEnabled(true, false, true); + } + + @Test + void testIndexingDocprocNotEnabledWhenStreamingMode() + { + assertIndexingDocprocEnabled(false, false, false); + } + + @Test + void testIndexingDocprocEnabledWhenStreamingModeAndForced() + { + assertIndexingDocprocEnabled(false, true, true); + } + } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/content/IndexingAndDocprocRoutingTest.java b/config-model/src/test/java/com/yahoo/vespa/model/content/IndexingAndDocprocRoutingTest.java index 4476e128196..ac9d0ad8724 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/content/IndexingAndDocprocRoutingTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/content/IndexingAndDocprocRoutingTest.java @@ -220,7 +220,7 @@ public class IndexingAndDocprocRoutingTest extends ContentBaseTest { fail("Expected exception"); } catch (IllegalArgumentException e) { - assertTrue(e.getMessage().startsWith("Indexing cluster 'musiccluster' specifies the chain 'default' as indexing chain")); + assertTrue(e.getMessage().startsWith("content cluster 'musiccluster' specifies the chain 'default' as indexing chain")); } } diff --git a/config-model/src/test/java/com/yahoo/vespa/model/search/NodeResourcesTuningTest.java b/config-model/src/test/java/com/yahoo/vespa/model/search/NodeResourcesTuningTest.java index 5831090c261..8e719fa90c3 100644 --- a/config-model/src/test/java/com/yahoo/vespa/model/search/NodeResourcesTuningTest.java +++ b/config-model/src/test/java/com/yahoo/vespa/model/search/NodeResourcesTuningTest.java @@ -11,7 +11,7 @@ import org.junit.jupiter.api.Test; import java.util.Arrays; import java.util.List; -import static com.yahoo.vespa.model.search.NodeResourcesTuning.reservedMemoryGb; +import static com.yahoo.vespa.model.Host.memoryOverheadGb; import static org.junit.jupiter.api.Assertions.assertEquals; import static com.yahoo.vespa.model.search.NodeResourcesTuning.MB; import static com.yahoo.vespa.model.search.NodeResourcesTuning.GB; @@ -33,13 +33,13 @@ public class NodeResourcesTuningTest { @Test void require_that_hwinfo_memory_size_is_set() { - assertEquals(24 * GB, configFromMemorySetting(24 + reservedMemoryGb, 0).hwinfo().memory().size()); - assertEquals(combinedFactor * 24 * GB, configFromMemorySetting(24 + reservedMemoryGb, ApplicationContainerCluster.heapSizePercentageOfTotalNodeMemoryWhenCombinedCluster * 0.01).hwinfo().memory().size(), 1000); + assertEquals(24 * GB, configFromMemorySetting(24 + memoryOverheadGb, 0).hwinfo().memory().size()); + assertEquals(1.9585050869E10, configFromMemorySetting(24 + memoryOverheadGb, ApplicationContainerCluster.heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster * 0.01).hwinfo().memory().size(), 1000); } @Test void reserved_memory_on_content_node() { - assertEquals(0.7, reservedMemoryGb, delta); + assertEquals(0.7, memoryOverheadGb, delta); } private ProtonConfig getProtonMemoryConfig(List<Pair<String, String>> sdAndMode, double gb) { @@ -54,7 +54,7 @@ public class NodeResourcesTuningTest { } private void verify_that_initial_numdocs_is_dependent_of_mode() { - ProtonConfig cfg = getProtonMemoryConfig(Arrays.asList(new Pair<>("a", "INDEX"), new Pair<>("b", "STREAMING"), new Pair<>("c", "STORE_ONLY")), 24 + reservedMemoryGb); + ProtonConfig cfg = getProtonMemoryConfig(Arrays.asList(new Pair<>("a", "INDEX"), new Pair<>("b", "STREAMING"), new Pair<>("c", "STORE_ONLY")), 24 + memoryOverheadGb); assertEquals(3, cfg.documentdb().size()); assertEquals(1024, cfg.documentdb(0).allocation().initialnumdocs()); assertEquals("a", cfg.documentdb(0).inputdoctypename()); @@ -162,14 +162,14 @@ public class NodeResourcesTuningTest { @Test void require_that_summary_cache_max_bytes_is_set_based_on_memory() { - assertEquals(1 * GB / 25, configFromMemorySetting(1 + reservedMemoryGb, 0).summary().cache().maxbytes()); - assertEquals(256 * GB / 25, configFromMemorySetting(256 + reservedMemoryGb, 0).summary().cache().maxbytes()); + assertEquals(1 * GB / 25, configFromMemorySetting(1 + memoryOverheadGb, 0).summary().cache().maxbytes()); + assertEquals(256 * GB / 25, configFromMemorySetting(256 + memoryOverheadGb, 0).summary().cache().maxbytes()); } @Test void require_that_summary_cache_memory_is_reduced_with_combined_cluster() { - assertEquals(combinedFactor * 1 * GB / 25, configFromMemorySetting(1 + reservedMemoryGb, ApplicationContainerCluster.heapSizePercentageOfTotalNodeMemoryWhenCombinedCluster * 0.01).summary().cache().maxbytes(), 1000); - assertEquals(combinedFactor * 256 * GB / 25, configFromMemorySetting(256 + reservedMemoryGb, ApplicationContainerCluster.heapSizePercentageOfTotalNodeMemoryWhenCombinedCluster * 0.01).summary().cache().maxbytes(), 1000); + assertEquals(3.2641751E7, configFromMemorySetting(1 + memoryOverheadGb, ApplicationContainerCluster.heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster * 0.01).summary().cache().maxbytes(), 1000); + assertEquals(8.356288371E9, configFromMemorySetting(256 + memoryOverheadGb, ApplicationContainerCluster.heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster * 0.01).summary().cache().maxbytes(), 1000); } @Test @@ -191,12 +191,12 @@ public class NodeResourcesTuningTest { } private static void assertDocumentStoreMaxFileSize(long expFileSizeBytes, int wantedMemoryGb) { - assertEquals(expFileSizeBytes, configFromMemorySetting(wantedMemoryGb + reservedMemoryGb, 0).summary().log().maxfilesize()); + assertEquals(expFileSizeBytes, configFromMemorySetting(wantedMemoryGb + memoryOverheadGb, 0).summary().log().maxfilesize()); } private static void assertFlushStrategyMemory(long expMemoryBytes, int wantedMemoryGb) { - assertEquals(expMemoryBytes, configFromMemorySetting(wantedMemoryGb + reservedMemoryGb, 0).flush().memory().maxmemory()); - assertEquals(expMemoryBytes, configFromMemorySetting(wantedMemoryGb + reservedMemoryGb, 0).flush().memory().each().maxmemory()); + assertEquals(expMemoryBytes, configFromMemorySetting(wantedMemoryGb + memoryOverheadGb, 0).flush().memory().maxmemory()); + assertEquals(expMemoryBytes, configFromMemorySetting(wantedMemoryGb + memoryOverheadGb, 0).flush().memory().each().maxmemory()); } private static void assertFlushStrategyTlsSize(long expTlsSizeBytes, int diskGb) { |