aboutsummaryrefslogtreecommitdiffstats
path: root/config-model/src/main
diff options
context:
space:
mode:
authorOla Aunrønning <olaa@yahooinc.com>2023-06-08 10:58:17 +0200
committerGitHub <noreply@github.com>2023-06-08 10:58:17 +0200
commit79de2d78433c11932357d0c244704b16fc87da21 (patch)
tree382a06ce4e1217e395fbba25c1ce74a1afaadbe5 /config-model/src/main
parent96d3814b80a693ee46640ae89a88fdb2d78dcd40 (diff)
parentd7568f0d450df2287657ac18c37955a1867496f5 (diff)
Merge branch 'master' into olaa/dataplane-proxy-config
Diffstat (limited to 'config-model/src/main')
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/ConfigModelContext.java29
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java2
-rw-r--r--config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java11
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/Host.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java3
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java29
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/AccessControlFilterExcludeValidator.java2
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java76
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java30
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java28
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java16
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java35
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java1
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java3
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java70
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java81
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java47
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/component/TypedComponent.java20
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java25
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java47
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/Content.java48
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java13
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java25
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java7
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java60
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java68
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java6
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java12
-rw-r--r--config-model/src/main/resources/schema/common.rnc55
-rw-r--r--config-model/src/main/resources/schema/content.rnc3
-rw-r--r--config-model/src/main/resources/schema/deployment.rnc10
31 files changed, 686 insertions, 182 deletions
diff --git a/config-model/src/main/java/com/yahoo/config/model/ConfigModelContext.java b/config-model/src/main/java/com/yahoo/config/model/ConfigModelContext.java
index 13d87b852e4..b4b3dccd440 100644
--- a/config-model/src/main/java/com/yahoo/config/model/ConfigModelContext.java
+++ b/config-model/src/main/java/com/yahoo/config/model/ConfigModelContext.java
@@ -2,16 +2,21 @@
package com.yahoo.config.model;
import com.yahoo.config.application.api.ApplicationPackage;
+import com.yahoo.config.application.api.Bcp.Group;
import com.yahoo.config.application.api.DeployLogger;
+import com.yahoo.config.application.api.DeploymentSpec;
import com.yahoo.config.model.api.ModelContext;
import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.config.model.producer.AnyConfigProducer;
import com.yahoo.config.model.producer.TreeConfigProducer;
import com.yahoo.config.provision.ClusterInfo;
+import com.yahoo.config.provision.ClusterInfo.Builder;
+import com.yahoo.config.provision.zone.ZoneId;
import com.yahoo.vespa.model.VespaModel;
import java.time.Duration;
import java.util.Comparator;
+import java.util.Optional;
import java.util.stream.Stream;
/**
@@ -72,14 +77,22 @@ public final class ConfigModelContext {
/** Returns a cluster info builder pre-populated with info known in this context. */
public ClusterInfo.Builder clusterInfo() {
- var instance = getApplicationPackage().getDeploymentSpec().instance(properties().applicationId().instance());
- if ( ! instance.isPresent()) return new ClusterInfo.Builder();
- var maxDeadline = instance.get().bcp().groups().stream()
- .filter(group -> group.memberRegions().contains(properties().zone().region()))
- .map(group -> group.deadline())
- .min(Comparator.comparing(deadline -> deadline))
- .orElse(Duration.ofMinutes(0));
- return new ClusterInfo.Builder().bcpDeadline(maxDeadline);
+ DeploymentSpec spec = getApplicationPackage().getDeploymentSpec();
+ ClusterInfo.Builder builder = new ClusterInfo.Builder();
+ spec.hostTTL(properties().applicationId().instance(), deployState.zone().environment(), deployState.zone().region())
+ .ifPresent(ttl -> {
+ ZoneId zoneId = ZoneId.from(deployState.zone().environment(), deployState.zone().region());
+ if (spec.cloudAccount(deployState.zone().cloud().name(), properties().applicationId().instance(), zoneId).isUnspecified())
+ throw new IllegalArgumentException("deployment spec specifies host TTL for " + zoneId +
+ " but no cloud account is specified for this zone");
+ });
+ spec.instance(properties().applicationId().instance())
+ .flatMap(instance -> instance.bcp().groups().stream()
+ .filter(group -> group.memberRegions().contains(properties().zone().region()))
+ .map(Group::deadline)
+ .min(Comparator.naturalOrder()))
+ .ifPresent(builder::bcpDeadline);
+ return builder;
}
/**
diff --git a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
index 56f999f85b4..0e39b7b5c3a 100644
--- a/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
+++ b/config-model/src/main/java/com/yahoo/config/model/deploy/TestProperties.java
@@ -78,7 +78,7 @@ public class TestProperties implements ModelContext.Properties, ModelContext.Fea
private int rpc_num_targets = 2;
private int rpc_events_before_wakeup = 1;
private int mbus_network_threads = 1;
- private int heapSizePercentage = ApplicationContainerCluster.defaultHeapSizePercentageOfTotalNodeMemory;
+ private int heapSizePercentage = ApplicationContainerCluster.defaultHeapSizePercentageOfAvailableMemory;
private Architecture adminClusterNodeResourcesArchitecture = Architecture.getDefault();
private boolean useRestrictedDataPlaneBindings = false;
private Optional<CloudAccount> cloudAccount = Optional.empty();
diff --git a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java
index 4b993f8e244..585e69d9141 100644
--- a/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java
+++ b/config-model/src/main/java/com/yahoo/config/model/provision/InMemoryProvisioner.java
@@ -160,7 +160,7 @@ public class InMemoryProvisioner implements HostProvisioner {
public List<HostSpec> prepare(ClusterSpec cluster, Capacity requested, ProvisionLogger logger) {
provisioned.add(cluster.id(), requested);
clusters.add(cluster);
- if (environment == Environment.dev) {
+ if (environment == Environment.dev && ! requested.isRequired()) {
requested = requested.withLimits(requested.minResources().withNodes(1),
requested.maxResources().withNodes(1));
}
@@ -233,13 +233,8 @@ public class InMemoryProvisioner implements HostProvisioner {
// Minimal capacity policies
private NodeResources decideResources(NodeResources resources) {
- if (resources.vcpuIsUnspecified())
- resources = resources.withVcpu(defaultNodeResources.vcpu());
- if (resources.memoryGbIsUnspecified())
- resources = resources.withMemoryGb(defaultNodeResources.memoryGb());
- if (resources.diskGbIsUnspecified())
- resources = resources.withDiskGb(defaultNodeResources.diskGb());
- return resources;
+ if (defaultNodeResources.isUnspecified()) return resources;
+ return resources.withUnspecifiedNumbersFrom(defaultNodeResources);
}
private List<HostSpec> allocateHostGroup(ClusterSpec clusterGroup, NodeResources requestedResourcesOrUnspecified,
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/Host.java b/config-model/src/main/java/com/yahoo/vespa/model/Host.java
index 047a6ef9bd5..581f20cbfe9 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/Host.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/Host.java
@@ -8,13 +8,17 @@ import com.yahoo.config.model.producer.TreeConfigProducer;
import java.util.Objects;
/**
- * A physical host, running a set of services.
+ * A node with an identity, with some dedicated compute resources, running a set of services.
* The identity of a host is its hostname. Hosts are comparable on their host name.
*
* @author gjoranv
*/
public final class Host extends TreeConfigProducer<AnyConfigProducer> implements SentinelConfig.Producer, Comparable<Host> {
+ // Memory needed for auxiliary processes always running on the node (config-proxy, metrics-proxy).
+ // Keep in sync with node-repository/ClusterModel.
+ public static final double memoryOverheadGb = 0.7;
+
private ConfigSentinel configSentinel = null;
private final String hostname;
private final boolean runsConfigServer;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
index 9e867a4c3bc..28ff8dff620 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/VespaModelFactory.java
@@ -25,6 +25,7 @@ import com.yahoo.config.model.api.ValidationParameters;
import com.yahoo.config.model.application.provider.ApplicationPackageXmlFilesValidator;
import com.yahoo.config.model.builder.xml.ConfigModelBuilder;
import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.config.provision.QuotaExceededException;
import com.yahoo.config.provision.TransientException;
import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.config.VespaVersion;
@@ -222,7 +223,7 @@ public class VespaModelFactory implements ModelFactory {
Exceptions.toMessageString(e));
else
rethrowUnlessIgnoreErrors(e, validationParameters.ignoreValidationErrors());
- } catch (IllegalArgumentException | TransientException e) {
+ } catch (IllegalArgumentException | TransientException | QuotaExceededException e) {
rethrowUnlessIgnoreErrors(e, validationParameters.ignoreValidationErrors());
} catch (Exception e) {
throw new RuntimeException(e);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
index 362bc7b0964..8a2bae364a1 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/admin/monitoring/VespaMetricSet.java
@@ -131,6 +131,35 @@ public class VespaMetricSet {
addMetric(metrics, ConfigServerMetrics.ZK_CONNECTIONS.last());
addMetric(metrics, ConfigServerMetrics.ZK_OUTSTANDING_REQUESTS.last());
+ // Node repository metrics
+ addMetric(metrics, ConfigServerMetrics.NODES_NON_ACTIVE_FRACTION.last());
+ addMetric(metrics, ConfigServerMetrics.CLUSTER_COST.last());
+ addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_CPU.last());
+ addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_MEMORY.last());
+ addMetric(metrics, ConfigServerMetrics.CLUSTER_LOAD_IDEAL_DISK.last());
+ addMetric(metrics, ConfigServerMetrics.WANT_TO_REBOOT.max());
+ addMetric(metrics, ConfigServerMetrics.WANT_TO_RESTART.max());
+ addMetric(metrics, ConfigServerMetrics.RETIRED.max());
+ addMetric(metrics, ConfigServerMetrics.WANT_TO_CHANGE_VESPA_VERSION.max());
+ addMetric(metrics, ConfigServerMetrics.HAS_WIRE_GUARD_KEY.last());
+ addMetric(metrics, ConfigServerMetrics.WANT_TO_DEPROVISION.max());
+ addMetric(metrics, ConfigServerMetrics.SUSPENDED.max());
+ addMetric(metrics, ConfigServerMetrics.SOME_SERVICES_DOWN.max());
+ addMetric(metrics, ConfigServerMetrics.NODE_FAILER_BAD_NODE.last());
+ addMetric(metrics, ConfigServerMetrics.LOCK_ATTEMPT_LOCKED_LOAD, EnumSet.of(max,average));
+
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_CPU.average());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_MEM.average());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_ALLOCATED_CAPACITY_DISK.average());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_CPU.max());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_MEM.max());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_FREE_CAPACITY_DISK.max());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_CPU, EnumSet.of(max,average));
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_DISK, EnumSet.of(max,average));
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_TOTAL_CAPACITY_MEM, EnumSet.of(max,average));
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_DOCKER_SKEW.last());
+ addMetric(metrics, ConfigServerMetrics.HOSTED_VESPA_PENDING_REDEPLOYMENTS.last());
+
return metrics;
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/AccessControlFilterExcludeValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/AccessControlFilterExcludeValidator.java
index ef695770987..5735a632085 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/AccessControlFilterExcludeValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/AccessControlFilterExcludeValidator.java
@@ -34,7 +34,7 @@ public class AccessControlFilterExcludeValidator extends Validator {
private void verifyNoExclusions(String clusterId, AccessControl accessControl, DeployState deployState) {
if (!accessControl.excludedBindings().isEmpty()) {
String message = "Application cluster %s excludes paths from access control, this is not allowed and should be removed.".formatted(clusterId);
- if (deployState.zone().cloud().name() == CloudName.AWS) {
+ if (deployState.zone().cloud().name().equals(CloudName.AWS)) {
throw new IllegalArgumentException(message);
} else {
deployState.getDeployLogger().log(Level.WARNING, message);
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java
index 66da43856b1..eccb6910866 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/ConstantTensorJsonValidator.java
@@ -28,11 +28,14 @@ public class ConstantTensorJsonValidator {
private static final String FIELD_CELLS = "cells";
private static final String FIELD_ADDRESS = "address";
private static final String FIELD_VALUE = "value";
+ private static final String FIELD_VALUES = "values";
private static final JsonFactory jsonFactory = new JsonFactory();
private JsonParser parser;
private Map<String, TensorType.Dimension> tensorDimensions;
+ private boolean isSingleDenseType = false;
+ private boolean isSingleMappedType = false;
public void validate(String fileName, TensorType type, Reader tensorData) {
if (fileName.endsWith(".json")) {
@@ -57,19 +60,69 @@ public class ConstantTensorJsonValidator {
.dimensions()
.stream()
.collect(Collectors.toMap(TensorType.Dimension::name, Function.identity()));
+ if (type.dimensions().size() == 1) {
+ this.isSingleMappedType = (type.indexedSubtype() == TensorType.empty);
+ this.isSingleDenseType = (type.mappedSubtype() == TensorType.empty);
+ }
+ var top = parser.nextToken();
+ if (top == JsonToken.START_ARRAY) {
+ consumeValuesArray();
+ } else if (top == JsonToken.START_OBJECT) {
+ consumeTopObject();
+ }
+ });
+ }
- assertNextTokenIs(JsonToken.START_OBJECT);
- assertNextTokenIs(JsonToken.FIELD_NAME);
- assertFieldNameIs(FIELD_CELLS);
+ private void consumeValuesArray() throws IOException {
+ if (! isSingleDenseType) {
+ throw new InvalidConstantTensorException(parser, String.format("Field 'values' is only valid for simple vectors (1-d dense tensors"));
+ }
+ assertCurrentTokenIs(JsonToken.START_ARRAY);
+ while (parser.nextToken() != JsonToken.END_ARRAY) {
+ validateNumeric(parser.getCurrentToken());
+ }
+ }
+ private void consumeTopObject() throws IOException {
+ assertCurrentTokenIs(JsonToken.START_OBJECT);
+ assertNextTokenIs(JsonToken.FIELD_NAME);
+ String fieldName = parser.getCurrentName();
+ if (fieldName.equals(FIELD_VALUES)) {
assertNextTokenIs(JsonToken.START_ARRAY);
+ consumeValuesArray();
+ } else if (fieldName.equals(FIELD_CELLS)) {
+ consumeCellsField();
+ } else {
+ throw new InvalidConstantTensorException(parser, String.format("Expected 'cells' or 'values', got '%s'", fieldName));
+ }
+ assertNextTokenIs(JsonToken.END_OBJECT);
+ }
- while (parser.nextToken() != JsonToken.END_ARRAY) {
- validateTensorCell();
- }
+ private void consumeCellsField() throws IOException {
+ var token = parser.nextToken();
+ if (token == JsonToken.START_ARRAY) {
+ consumeLiteralFormArray();
+ } else if (token == JsonToken.START_OBJECT) {
+ consumeSimpleMappedObject();
+ } else {
+ throw new InvalidConstantTensorException(parser, String.format("Field 'cells' must be object or array, but got %s", token.toString()));
+ }
+ }
- assertNextTokenIs(JsonToken.END_OBJECT);
- });
+ private void consumeLiteralFormArray() throws IOException {
+ while (parser.nextToken() != JsonToken.END_ARRAY) {
+ validateTensorCell();
+ }
+ }
+
+ private void consumeSimpleMappedObject() throws IOException {
+ if (! isSingleMappedType) {
+ throw new InvalidConstantTensorException(parser, String.format("Field 'cells' must be an array of address/value objects"));
+ }
+ while (parser.nextToken() != JsonToken.END_OBJECT) {
+ assertCurrentTokenIs(JsonToken.FIELD_NAME);
+ validateTensorCellValue();
+ }
}
private void validateTensorCell() {
@@ -87,7 +140,7 @@ public class ConstantTensorJsonValidator {
if (fieldName.equals(FIELD_ADDRESS)) {
validateTensorAddress();
} else if (fieldName.equals(FIELD_VALUE)) {
- validateTensorValue();
+ validateTensorCellValue();
}
} else {
throw new InvalidConstantTensorException(parser, "Only 'address' or 'value' fields are permitted within a cell object");
@@ -169,9 +222,12 @@ public class ConstantTensorJsonValidator {
throw new InvalidConstantTensorException(parser, String.format("Index '%s' for dimension '%s' is not an integer", value, dimensionName));
}
- private void validateTensorValue() throws IOException {
+ private void validateTensorCellValue() throws IOException {
JsonToken token = parser.nextToken();
+ validateNumeric(token);
+ }
+ private void validateNumeric(JsonToken token) throws IOException {
if (token != JsonToken.VALUE_NUMBER_FLOAT && token != JsonToken.VALUE_NUMBER_INT) {
throw new InvalidConstantTensorException(parser, String.format("Tensor value is not a number (%s)", token.toString()));
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java
index 4ea74147aaf..f0c29c74705 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/application/validation/QuotaValidator.java
@@ -6,7 +6,9 @@ import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.QuotaExceededException;
import com.yahoo.config.provision.SystemName;
+import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.model.VespaModel;
import java.math.BigDecimal;
@@ -31,11 +33,10 @@ public class QuotaValidator extends Validator {
public void validate(VespaModel model, DeployState deployState) {
var quota = deployState.getProperties().quota();
quota.maxClusterSize().ifPresent(maxClusterSize -> validateMaxClusterSize(maxClusterSize, model));
- quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, model, deployState.getProperties().zone().system()));
+ quota.budgetAsDecimal().ifPresent(budget -> validateBudget(budget, model, deployState.getProperties().zone()));
}
- private void validateBudget(BigDecimal budget, VespaModel model, SystemName systemName) {
-
+ private void validateBudget(BigDecimal budget, VespaModel model, Zone zone) {
var maxSpend = model.allClusters().stream()
.filter(id -> !adminClusterIds(model).contains(id))
.map(id -> model.provisioned().all().getOrDefault(id, zeroCapacity))
@@ -52,9 +53,10 @@ public class QuotaValidator extends Validator {
return;
}
- throwIfBudgetNegative(actualSpend, budget, systemName);
- throwIfBudgetExceeded(actualSpend, budget, systemName);
- throwIfBudgetExceeded(maxSpend, budget, systemName);
+ throwIfBudgetNegative(actualSpend, budget, zone.system());
+ throwIfBudgetExceeded(actualSpend, budget, zone.system(), true);
+ if ( ! zone.environment().isTest()) // Usage is constant after deploy in test zones
+ throwIfBudgetExceeded(maxSpend, budget, zone.system(), false);
}
private Set<ClusterSpec.Id> adminClusterIds(VespaModel model) {
@@ -80,24 +82,28 @@ public class QuotaValidator extends Validator {
if (!invalidClusters.isEmpty()) {
var clusterNames = String.join(", ", invalidClusters);
- throw new IllegalArgumentException("Clusters " + clusterNames + " exceeded max cluster size of " + maxClusterSize);
+ throw new QuotaExceededException("Clusters " + clusterNames + " exceeded max cluster size of " + maxClusterSize);
}
}
private static void throwIfBudgetNegative(double spend, BigDecimal budget, SystemName systemName) {
if (budget.doubleValue() < 0) {
- throw new IllegalArgumentException(quotaMessage("Please free up some capacity.", systemName, spend, budget));
+ throw new QuotaExceededException(quotaMessage("Please free up some capacity.", systemName, spend, budget, true));
}
}
- private static void throwIfBudgetExceeded(double spend, BigDecimal budget, SystemName systemName) {
+ private static void throwIfBudgetExceeded(double spend, BigDecimal budget, SystemName systemName, boolean actual) {
if (budget.doubleValue() < spend) {
- throw new IllegalArgumentException(quotaMessage("Contact support to upgrade your plan.", systemName, spend, budget));
+ throw new QuotaExceededException(quotaMessage("Contact support to upgrade your plan.", systemName, spend, budget, actual));
}
}
- private static String quotaMessage(String message, SystemName system, double spend, BigDecimal budget) {
- String quotaDescription = String.format(Locale.ENGLISH, "The max resources specified cost $%.2f but your quota is $%.2f", spend, budget);
+ private static String quotaMessage(String message, SystemName system, double spend, BigDecimal budget, boolean actual) {
+ String quotaDescription = String.format(Locale.ENGLISH,
+ "The %s cost $%.2f but your quota is $%.2f",
+ actual ? "resources used" : "max resources specified",
+ spend,
+ budget);
return (system == SystemName.Public ? "" : system.value() + ": ") + quotaDescription + ": " + message;
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
index c57122e5bf5..d0e1ede2cfa 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomComponentBuilder.java
@@ -3,12 +3,14 @@ package com.yahoo.vespa.model.builder.xml.dom;
import com.yahoo.component.ComponentId;
import com.yahoo.config.model.deploy.DeployState;
-import com.yahoo.container.bundle.BundleInstantiationSpecification;
-import com.yahoo.osgi.provider.model.ComponentModel;
import com.yahoo.config.model.producer.AnyConfigProducer;
import com.yahoo.config.model.producer.TreeConfigProducer;
+import com.yahoo.osgi.provider.model.ComponentModel;
import com.yahoo.text.XML;
+import com.yahoo.vespa.model.container.component.BertEmbedder;
import com.yahoo.vespa.model.container.component.Component;
+import com.yahoo.vespa.model.container.component.HuggingFaceEmbedder;
+import com.yahoo.vespa.model.container.component.HuggingFaceTokenizer;
import com.yahoo.vespa.model.container.xml.BundleInstantiationSpecificationBuilder;
import org.w3c.dom.Element;
@@ -31,17 +33,25 @@ public class DomComponentBuilder extends VespaDomBuilder.DomConfigProducerBuilde
}
@Override
- protected Component doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) {
- Component component = buildComponent(spec);
+ protected Component<? super Component<?, ?>, ?> doBuild(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element spec) {
+ var component = buildComponent(spec, deployState);
addChildren(deployState, ancestor, spec, component);
return component;
}
- private Component buildComponent(Element spec) {
- BundleInstantiationSpecification bundleSpec =
- BundleInstantiationSpecificationBuilder.build(spec).nestInNamespace(namespace);
-
- return new Component<Component<?, ?>, ComponentModel>(new ComponentModel(bundleSpec));
+ private Component<? super Component<?, ?>, ?> buildComponent(Element spec, DeployState state) {
+ if (spec.hasAttribute("type")) {
+ var type = spec.getAttribute("type");
+ return switch (type) {
+ case "hugging-face-embedder" -> new HuggingFaceEmbedder(spec, state);
+ case "hugging-face-tokenizer" -> new HuggingFaceTokenizer(spec, state);
+ case "bert-embedder" -> new BertEmbedder(spec, state);
+ default -> throw new IllegalArgumentException("Unknown component type '%s'".formatted(type));
+ };
+ } else {
+ var bundleSpec = BundleInstantiationSpecificationBuilder.build(spec).nestInNamespace(namespace);
+ return new Component<>(new ComponentModel(bundleSpec));
+ }
}
public static void addChildren(DeployState deployState, TreeConfigProducer<AnyConfigProducer> ancestor, Element componentNode, Component<? super Component<?, ?>, ?> component) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java
index d9ef5fd2123..64592e75c41 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/builder/xml/dom/DomSearchTuningBuilder.java
@@ -66,10 +66,22 @@ public class DomSearchTuningBuilder extends VespaDomBuilder.DomConfigProducerBui
handleFeeding(e, t.searchNode);
} else if (equals("removed-db", e)) {
handleRemovedDB(e, t.searchNode);
+ } else if (equals("lidspace", e)) {
+ handleLidSpace(e, t.searchNode);
}
}
}
+ private void handleLidSpace(Element spec, Tuning.SearchNode t) {
+ t.lidSpace = new Tuning.SearchNode.LidSpace();
+ for (Element e : XML.getChildren(spec)) {
+ if (equals("max-bloat-factor", e)) {
+ t.lidSpace.bloatFactor = asDouble(e);
+ }
+ }
+
+ }
+
private void handleRequestThreads(Element spec, Tuning.SearchNode sn) {
sn.threads = new Tuning.SearchNode.RequestThreads();
Tuning.SearchNode.RequestThreads rt = sn.threads;
@@ -180,9 +192,9 @@ public class DomSearchTuningBuilder extends VespaDomBuilder.DomConfigProducerBui
Tuning.SearchNode.Index.Warmup warmup = sn.index.warmup;
for (Element e2 : XML.getChildren(e)) {
if (equals("time", e2)) {
- warmup.time = Double.valueOf(asString(e2));
+ warmup.time = asDouble(e2);
} else if (equals("unpack", e2)) {
- warmup.unpack = Boolean.valueOf(asString(e2));
+ warmup.unpack = Boolean.parseBoolean(asString(e2));
}
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
index 6977a5ca465..3c1c4867f13 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ApplicationContainerCluster.java
@@ -31,6 +31,7 @@ import com.yahoo.vespa.config.search.core.OnnxModelsConfig;
import com.yahoo.vespa.config.search.core.RankingConstantsConfig;
import com.yahoo.vespa.config.search.core.RankingExpressionsConfig;
import com.yahoo.vespa.model.AbstractService;
+import com.yahoo.vespa.model.Host;
import com.yahoo.vespa.model.admin.metricsproxy.MetricsProxyContainer;
import com.yahoo.vespa.model.container.component.BindingPattern;
import com.yahoo.vespa.model.container.component.Component;
@@ -75,8 +76,8 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private static final BindingPattern PROMETHEUS_V1_HANDLER_BINDING_1 = SystemBindingPattern.fromHttpPath(PrometheusV1Handler.V1_PATH);
private static final BindingPattern PROMETHEUS_V1_HANDLER_BINDING_2 = SystemBindingPattern.fromHttpPath(PrometheusV1Handler.V1_PATH + "/*");
- public static final int defaultHeapSizePercentageOfTotalNodeMemory = 70;
- public static final int heapSizePercentageOfTotalNodeMemoryWhenCombinedCluster = 18;
+ public static final int defaultHeapSizePercentageOfAvailableMemory = 85;
+ public static final int heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster = 24;
private final Set<FileReference> applicationBundles = new LinkedHashSet<>();
@@ -91,7 +92,9 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
private int zookeeperSessionTimeoutSeconds = 30;
private final int transport_events_before_wakeup;
private final int transport_connections_per_target;
- private final int heapSizePercentageOfTotalNodeMemory;
+
+ /** The heap size % of total memory available to the JVM process. */
+ private final int heapSizePercentageOfAvailableMemory;
private Integer memoryPercentage = null;
@@ -119,9 +122,9 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
addTestrunnerComponentsIfTester(deployState);
transport_connections_per_target = deployState.featureFlags().mbusJavaRpcNumTargets();
transport_events_before_wakeup = deployState.featureFlags().mbusJavaEventsBeforeWakeup();
- heapSizePercentageOfTotalNodeMemory = deployState.featureFlags().heapSizePercentage() > 0
+ heapSizePercentageOfAvailableMemory = deployState.featureFlags().heapSizePercentage() > 0
? Math.min(99, deployState.featureFlags().heapSizePercentage())
- : defaultHeapSizePercentageOfTotalNodeMemory;
+ : defaultHeapSizePercentageOfAvailableMemory;
}
@Override
@@ -178,12 +181,18 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
@Override
public Optional<Integer> getMemoryPercentage() {
- if (memoryPercentage != null) {
- return Optional.of(memoryPercentage);
- } else if (isHostedVespa()) {
- return getHostClusterId().isPresent() ?
- Optional.of(heapSizePercentageOfTotalNodeMemoryWhenCombinedCluster) :
- Optional.of(heapSizePercentageOfTotalNodeMemory);
+ if (memoryPercentage != null) return Optional.of(memoryPercentage);
+
+ if (isHostedVespa()) {
+ int availableMemoryPercentage = getHostClusterId().isPresent() ?
+ heapSizePercentageOfTotalAvailableMemoryWhenCombinedCluster :
+ heapSizePercentageOfAvailableMemory;
+ if (getContainers().isEmpty()) return Optional.of(availableMemoryPercentage); // Node memory is not known
+
+ // Node memory is known so convert available memory percentage to node memory percentage
+ double totalMemory = getContainers().get(0).getHostResource().realResources().memoryGb();
+ double availableMemory = totalMemory - Host.memoryOverheadGb;
+ return Optional.of((int) (availableMemory / totalMemory * availableMemoryPercentage));
}
return Optional.empty();
}
@@ -289,9 +298,7 @@ public final class ApplicationContainerCluster extends ContainerCluster<Applicat
.compressedClassSpaceSize(0)
.minHeapsize(1536)
.heapsize(1536);
- if (getMemoryPercentage().isPresent()) {
- builder.jvm.heapSizeAsPercentageOfPhysicalMemory(getMemoryPercentage().get());
- }
+ getMemoryPercentage().ifPresent(percentage -> builder.jvm.heapSizeAsPercentageOfPhysicalMemory(percentage));
}
@Override
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
index 34c565871db..c227700733e 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/ContainerModelEvaluation.java
@@ -26,6 +26,7 @@ public class ContainerModelEvaluation implements
OnnxModelsConfig.Producer,
RankingExpressionsConfig.Producer {
+ public final static String LINGUISTICS_BUNDLE_NAME = "linguistics-components";
public final static String EVALUATION_BUNDLE_NAME = "model-evaluation";
public final static String INTEGRATION_BUNDLE_NAME = "model-integration";
public final static String ONNXRUNTIME_BUNDLE_NAME = "container-onnxruntime.jar";
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java
index 19df9a4064f..dbc7cd62fbd 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/PlatformBundles.java
@@ -12,6 +12,7 @@ import java.util.stream.Stream;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.EVALUATION_BUNDLE_NAME;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME;
+import static com.yahoo.vespa.model.container.ContainerModelEvaluation.LINGUISTICS_BUNDLE_NAME;
import static com.yahoo.vespa.model.container.ContainerModelEvaluation.ONNXRUNTIME_BUNDLE_NAME;
/**
@@ -57,7 +58,7 @@ public class PlatformBundles {
public static final Set<Path> SEARCH_AND_DOCPROC_BUNDLES = toBundlePaths(
SEARCH_AND_DOCPROC_BUNDLE,
"docprocs",
- "linguistics-components",
+ LINGUISTICS_BUNDLE_NAME,
EVALUATION_BUNDLE_NAME,
INTEGRATION_BUNDLE_NAME,
ONNXRUNTIME_BUNDLE_NAME
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
new file mode 100644
index 00000000000..56aa974da48
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/BertEmbedder.java
@@ -0,0 +1,70 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.container.component;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.embedding.BertBaseEmbedderConfig;
+import com.yahoo.vespa.model.container.xml.ModelIdResolver;
+import org.w3c.dom.Element;
+
+import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChildValue;
+import static com.yahoo.text.XML.getChild;
+import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME;
+
+/**
+ * @author bjorncs
+ */
+public class BertEmbedder extends TypedComponent implements BertBaseEmbedderConfig.Producer {
+
+ private final ModelReference model;
+ private final ModelReference vocab;
+ private final Integer maxTokens;
+ private final String transformerInputIds;
+ private final String transformerAttentionMask;
+ private final String transformerTokenTypeIds;
+ private final String transformerOutput;
+ private final Integer tranformerStartSequenceToken;
+ private final Integer transformerEndSequenceToken;
+ private final String poolingStrategy;
+ private final String onnxExecutionMode;
+ private final Integer onnxInteropThreads;
+ private final Integer onnxIntraopThreads;
+ private final Integer onnxGpuDevice;
+
+
+ public BertEmbedder(Element xml, DeployState state) {
+ super("ai.vespa.embedding.BertBaseEmbedder", INTEGRATION_BUNDLE_NAME, xml);
+ model = ModelIdResolver.resolveToModelReference(getChild(xml, "transformer-model"), state);
+ vocab = ModelIdResolver.resolveToModelReference(getChild(xml, "tokenizer-vocab"), state);
+ maxTokens = getOptionalChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null);
+ transformerInputIds = getOptionalChildValue(xml, "transformer-input-ids").orElse(null);
+ transformerAttentionMask = getOptionalChildValue(xml, "transformer-attention-mask").orElse(null);
+ transformerTokenTypeIds = getOptionalChildValue(xml, "transformer-token-type-ids").orElse(null);
+ transformerOutput = getOptionalChildValue(xml, "transformer-output").orElse(null);
+ tranformerStartSequenceToken = getOptionalChildValue(xml, "transformer-start-sequence-token").map(Integer::parseInt).orElse(null);
+ transformerEndSequenceToken = getOptionalChildValue(xml, "transformer-end-sequence-token").map(Integer::parseInt).orElse(null);
+ poolingStrategy = getOptionalChildValue(xml, "pooling-strategy").orElse(null);
+ onnxExecutionMode = getOptionalChildValue(xml, "onnx-execution-mode").orElse(null);
+ onnxInteropThreads = getOptionalChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
+ onnxIntraopThreads = getOptionalChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
+ onnxGpuDevice = getOptionalChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
+ }
+
+ @Override
+ public void getConfig(BertBaseEmbedderConfig.Builder b) {
+ b.transformerModel(model).tokenizerVocab(vocab);
+ if (maxTokens != null) b.transformerMaxTokens(maxTokens);
+ if (transformerInputIds != null) b.transformerInputIds(transformerInputIds);
+ if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask);
+ if (transformerTokenTypeIds != null) b.transformerTokenTypeIds(transformerTokenTypeIds);
+ if (transformerOutput != null) b.transformerOutput(transformerOutput);
+ if (tranformerStartSequenceToken != null) b.transformerStartSequenceToken(tranformerStartSequenceToken);
+ if (transformerEndSequenceToken != null) b.transformerEndSequenceToken(transformerEndSequenceToken);
+ if (poolingStrategy != null) b.poolingStrategy(BertBaseEmbedderConfig.PoolingStrategy.Enum.valueOf(poolingStrategy));
+ if (onnxExecutionMode != null) b.onnxExecutionMode(BertBaseEmbedderConfig.OnnxExecutionMode.Enum.valueOf(onnxExecutionMode));
+ if (onnxInteropThreads != null) b.onnxInterOpThreads(onnxInteropThreads);
+ if (onnxIntraopThreads != null) b.onnxIntraOpThreads(onnxIntraopThreads);
+ if (onnxGpuDevice != null) b.onnxGpuDevice(onnxGpuDevice);
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
new file mode 100644
index 00000000000..6e7a1cc31dd
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceEmbedder.java
@@ -0,0 +1,81 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.container.component;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.embedding.huggingface.HuggingFaceEmbedderConfig;
+import com.yahoo.vespa.model.container.xml.ModelIdResolver;
+import org.w3c.dom.Element;
+
+import java.util.Optional;
+
+import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChild;
+import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChildValue;
+import static com.yahoo.vespa.model.container.ContainerModelEvaluation.INTEGRATION_BUNDLE_NAME;
+
+
+/**
+ * @author bjorncs
+ */
+public class HuggingFaceEmbedder extends TypedComponent implements HuggingFaceEmbedderConfig.Producer {
+ private final ModelReference model;
+ private final ModelReference vocab;
+ private final Integer maxTokens;
+ private final String transformerInputIds;
+ private final String transformerAttentionMask;
+ private final String transformerTokenTypeIds;
+ private final String transformerOutput;
+ private final Boolean normalize;
+ private final String onnxExecutionMode;
+ private final Integer onnxInteropThreads;
+ private final Integer onnxIntraopThreads;
+ private final Integer onnxGpuDevice;
+ private final String poolingStrategy;
+
+ public HuggingFaceEmbedder(Element xml, DeployState state) {
+ super("ai.vespa.embedding.huggingface.HuggingFaceEmbedder", INTEGRATION_BUNDLE_NAME, xml);
+ var transformerModelElem = getOptionalChild(xml, "transformer-model").orElseThrow();
+ model = ModelIdResolver.resolveToModelReference(transformerModelElem, state);
+ vocab = getOptionalChild(xml, "tokenizer-model")
+ .map(elem -> ModelIdResolver.resolveToModelReference(elem, state))
+ .orElseGet(() -> resolveDefaultVocab(transformerModelElem, state));
+ maxTokens = getOptionalChildValue(xml, "max-tokens").map(Integer::parseInt).orElse(null);
+ transformerInputIds = getOptionalChildValue(xml, "transformer-input-ids").orElse(null);
+ transformerAttentionMask = getOptionalChildValue(xml, "transformer-attention-mask").orElse(null);
+ transformerTokenTypeIds = getOptionalChildValue(xml, "transformer-token-type-ids").orElse(null);
+ transformerOutput = getOptionalChildValue(xml, "transformer-output").orElse(null);
+ normalize = getOptionalChildValue(xml, "normalize").map(Boolean::parseBoolean).orElse(null);
+ onnxExecutionMode = getOptionalChildValue(xml, "onnx-execution-mode").orElse(null);
+ onnxInteropThreads = getOptionalChildValue(xml, "onnx-interop-threads").map(Integer::parseInt).orElse(null);
+ onnxIntraopThreads = getOptionalChildValue(xml, "onnx-intraop-threads").map(Integer::parseInt).orElse(null);
+ onnxGpuDevice = getOptionalChildValue(xml, "onnx-gpu-device").map(Integer::parseInt).orElse(null);
+ poolingStrategy = getOptionalChildValue(xml, "pooling-strategy").orElse(null);
+ }
+
+ private static ModelReference resolveDefaultVocab(Element model, DeployState state) {
+ if (state.isHosted() && model.hasAttribute("model-id")) {
+ var implicitVocabId = model.getAttribute("model-id") + "-vocab";
+ return ModelIdResolver.resolveToModelReference(
+ "tokenizer-model", Optional.of(implicitVocabId), Optional.empty(), Optional.empty(), state);
+ }
+ throw new IllegalArgumentException("'tokenizer-model' must be specified");
+ }
+
+ @Override
+ public void getConfig(HuggingFaceEmbedderConfig.Builder b) {
+ b.transformerModel(model).tokenizerPath(vocab);
+ if (maxTokens != null) b.transformerMaxTokens(maxTokens);
+ if (transformerInputIds != null) b.transformerInputIds(transformerInputIds);
+ if (transformerAttentionMask != null) b.transformerAttentionMask(transformerAttentionMask);
+ if (transformerTokenTypeIds != null) b.transformerTokenTypeIds(transformerTokenTypeIds);
+ if (transformerOutput != null) b.transformerOutput(transformerOutput);
+ if (normalize != null) b.normalize(normalize);
+ if (onnxExecutionMode != null) b.transformerExecutionMode(
+ HuggingFaceEmbedderConfig.TransformerExecutionMode.Enum.valueOf(onnxExecutionMode));
+ if (onnxInteropThreads != null) b.transformerInterOpThreads(onnxInteropThreads);
+ if (onnxIntraopThreads != null) b.transformerIntraOpThreads(onnxIntraopThreads);
+ if (onnxGpuDevice != null) b.transformerGpuDevice(onnxGpuDevice);
+ if (poolingStrategy != null) b.poolingStrategy(HuggingFaceEmbedderConfig.PoolingStrategy.Enum.valueOf(poolingStrategy));
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
new file mode 100644
index 00000000000..966dbe8260a
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/HuggingFaceTokenizer.java
@@ -0,0 +1,47 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.model.container.component;
+
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.model.deploy.DeployState;
+import com.yahoo.language.huggingface.config.HuggingFaceTokenizerConfig;
+import com.yahoo.text.XML;
+import com.yahoo.vespa.model.container.xml.ModelIdResolver;
+import org.w3c.dom.Element;
+
+import java.util.Map;
+import java.util.TreeMap;
+
+import static com.yahoo.config.model.builder.xml.XmlHelper.getOptionalChildValue;
+import static com.yahoo.vespa.model.container.ContainerModelEvaluation.LINGUISTICS_BUNDLE_NAME;
+
+/**
+ * @author bjorncs
+ */
+public class HuggingFaceTokenizer extends TypedComponent implements HuggingFaceTokenizerConfig.Producer {
+
+ private final Map<String, ModelReference> langToModel = new TreeMap<>();
+ private final Boolean specialTokens;
+ private final Integer maxLength;
+ private final Boolean truncation;
+
+ public HuggingFaceTokenizer(Element xml, DeployState state) {
+ super("com.yahoo.language.huggingface.HuggingFaceTokenizer", LINGUISTICS_BUNDLE_NAME, xml);
+ for (Element element : XML.getChildren(xml, "model")) {
+ var lang = element.hasAttribute("language") ? element.getAttribute("language") : "unknown";
+ langToModel.put(lang, ModelIdResolver.resolveToModelReference(element, state));
+ }
+ specialTokens = getOptionalChildValue(xml, "special-tokens").map(Boolean::parseBoolean).orElse(null);
+ maxLength = getOptionalChildValue(xml, "max-length").map(Integer::parseInt).orElse(null);
+ truncation = getOptionalChildValue(xml, "truncation").map(Boolean::parseBoolean).orElse(null);
+ }
+
+ @Override
+ public void getConfig(HuggingFaceTokenizerConfig.Builder builder) {
+ langToModel.forEach((lang, vocab) -> {
+ builder.model.add(new HuggingFaceTokenizerConfig.Model.Builder().language(lang).path(vocab));
+ });
+ if (specialTokens != null) builder.addSpecialTokens(specialTokens);
+ if (maxLength != null) builder.maxLength(maxLength);
+ if (truncation != null) builder.truncation(truncation);
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/component/TypedComponent.java b/config-model/src/main/java/com/yahoo/vespa/model/container/component/TypedComponent.java
new file mode 100644
index 00000000000..522c78f2f25
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/component/TypedComponent.java
@@ -0,0 +1,20 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+package com.yahoo.vespa.model.container.component;
+
+import com.yahoo.osgi.provider.model.ComponentModel;
+import org.w3c.dom.Element;
+
+/**
+ * @author bjorncs
+ */
+abstract class TypedComponent extends SimpleComponent {
+
+ private final Element xml;
+
+ protected TypedComponent(String className, String bundle, Element xml) {
+ super(new ComponentModel(xml.getAttribute("id"), className, bundle));
+ this.xml = xml;
+ }
+
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
index 57303d6c9b3..bcebf1a9fdd 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ContainerModelBuilder.java
@@ -933,22 +933,19 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
}
private static boolean applyMemoryPercentage(ApplicationContainerCluster cluster, String memoryPercentage) {
- if (memoryPercentage == null || memoryPercentage.isEmpty()) return false;
- memoryPercentage = memoryPercentage.trim();
-
- if ( ! memoryPercentage.endsWith("%"))
- throw new IllegalArgumentException("The memory percentage given for nodes in " + cluster +
- " must be an integer percentage ending by the '%' sign");
- memoryPercentage = memoryPercentage.substring(0, memoryPercentage.length()-1).trim();
-
try {
+ if (memoryPercentage == null || memoryPercentage.isEmpty()) return false;
+ memoryPercentage = memoryPercentage.trim();
+ if ( ! memoryPercentage.endsWith("%"))
+ throw new IllegalArgumentException("Missing % sign");
+ memoryPercentage = memoryPercentage.substring(0, memoryPercentage.length()-1).trim();
cluster.setMemoryPercentage(Integer.parseInt(memoryPercentage));
+ return true;
}
catch (NumberFormatException e) {
throw new IllegalArgumentException("The memory percentage given for nodes in " + cluster +
- " must be an integer percentage ending by the '%' sign");
+ " must be an integer percentage ending by the '%' sign", e);
}
- return true;
}
/** Allocate a container cluster without a nodes tag */
@@ -960,9 +957,11 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
int nodeCount = deployState.zone().environment().isProduction() ? 2 : 1;
deployState.getDeployLogger().logApplicationPackage(Level.INFO, "Using " + nodeCount + " nodes in " + cluster);
var nodesSpec = NodesSpecification.dedicated(nodeCount, context);
+ ClusterSpec.Id clusterId = ClusterSpec.Id.from(cluster.getName());
var hosts = nodesSpec.provision(hostSystem,
ClusterSpec.Type.container,
- ClusterSpec.Id.from(cluster.getName()),
+ clusterId,
+ zoneEndpoint(context, clusterId),
deployState.getDeployLogger(),
false,
context.clusterInfo().build());
@@ -1192,9 +1191,7 @@ public class ContainerModelBuilder extends ConfigModelBuilder<ContainerModel> {
DeploymentSpec spec) {
spec.athenzDomain()
.ifPresent(domain -> {
- AthenzService service = spec.instance(app.getApplicationId().instance())
- .flatMap(instanceSpec -> instanceSpec.athenzService(zone.environment(), zone.region()))
- .or(spec::athenzService)
+ AthenzService service = spec.athenzService(app.getApplicationId().instance(), zone.environment(), zone.region())
.orElseThrow(() -> new IllegalArgumentException("Missing Athenz service configuration in instance '" +
app.getApplicationId().instance() + "'"));
String zoneDnsSuffix = zone.environment().value() + "-" + zone.region().value() + "." + athenzDnsSuffix;
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java
index ff261d2b83a..96f653bf793 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/container/xml/ModelIdResolver.java
@@ -1,12 +1,17 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.model.container.xml;
+import com.yahoo.config.ModelReference;
+import com.yahoo.config.UrlReference;
+import com.yahoo.config.model.builder.xml.XmlHelper;
+import com.yahoo.config.model.deploy.DeployState;
import com.yahoo.text.XML;
import org.w3c.dom.Element;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
+import java.util.Optional;
import java.util.stream.Collectors;
/**
@@ -29,6 +34,19 @@ public class ModelIdResolver {
models.put("flan-t5-base-decoder", "https://data.vespa.oath.cloud/onnx_models/flan-t5-base-decoder-model.onnx");
models.put("flan-t5-large-encoder", "https://data.vespa.oath.cloud/onnx_models/flan-t5-large-encoder-model.onnx");
models.put("flan-t5-large-decoder", "https://data.vespa.oath.cloud/onnx_models/flan-t5-large-decoder-model.onnx");
+
+ models.put("multilingual-e5-base", "https://data.vespa.oath.cloud/onnx_models/multilingual-e5-base/model.onnx");
+ models.put("multilingual-e5-base-vocab", "https://data.vespa.oath.cloud/onnx_models/multilingual-e5-base/tokenizer.json");
+
+ models.put("e5-small-v2", "https://data.vespa.oath.cloud/onnx_models/e5-small-v2/model.onnx");
+ models.put("e5-small-v2-vocab", "https://data.vespa.oath.cloud/onnx_models/e5-small-v2/tokenizer.json");
+
+ models.put("e5-base-v2", "https://data.vespa.oath.cloud/onnx_models/e5-base-v2/model.onnx");
+ models.put("e5-base-v2-vocab", "https://data.vespa.oath.cloud/onnx_models/e5-base-v2/tokenizer.json");
+
+ models.put("e5-large-v2", "https://data.vespa.oath.cloud/onnx_models/e5-large-v2/model.onnx");
+ models.put("e5-large-v2-vocab", "https://data.vespa.oath.cloud/onnx_models/e5-large-v2/tokenizer.json");
+
return Collections.unmodifiableMap(models);
}
@@ -57,11 +75,36 @@ public class ModelIdResolver {
value.removeAttribute("path");
}
else if ( ! value.hasAttribute("url") && ! value.hasAttribute("path")) {
- throw new IllegalArgumentException(value.getTagName() + " is configured with only a 'model-id'. " +
- "Add a 'path' or 'url' to deploy this outside Vespa Cloud");
+ throw onlyModelIdInHostedException(value.getTagName());
}
}
+
+ public static ModelReference resolveToModelReference(Element elem, DeployState state) {
+ return resolveToModelReference(
+ elem.getTagName(), XmlHelper.getOptionalAttribute(elem, "model-id"),
+ XmlHelper.getOptionalAttribute(elem, "url"), XmlHelper.getOptionalAttribute(elem, "path"), state);
+ }
+
+ public static ModelReference resolveToModelReference(
+ String paramName, Optional<String> id, Optional<String> url, Optional<String> path, DeployState state) {
+ if (id.isEmpty()) return createModelReference(Optional.empty(), url, path, state);
+ else if (state.isHosted())
+ return createModelReference(id, Optional.of(modelIdToUrl(paramName, id.get())), Optional.empty(), state);
+ else if (url.isEmpty() && path.isEmpty()) throw onlyModelIdInHostedException(paramName);
+ else return createModelReference(id, url, path, state);
+ }
+
+ private static ModelReference createModelReference(Optional<String> id, Optional<String> url, Optional<String> path, DeployState state) {
+ var fileRef = path.map(p -> state.getFileRegistry().addFile(p));
+ return ModelReference.unresolved(id, url.map(UrlReference::valueOf), fileRef);
+ }
+
+ private static IllegalArgumentException onlyModelIdInHostedException(String paramName) {
+ return new IllegalArgumentException(paramName + " is configured with only a 'model-id'. " +
+ "Add a 'path' or 'url' to deploy this outside Vespa Cloud");
+ }
+
private static String modelIdToUrl(String valueName, String modelId) {
if ( ! providedModels.containsKey(modelId))
throw new IllegalArgumentException("Unknown model id '" + modelId + "' on '" + valueName + "'. Available models are [" +
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java b/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java
index e044b97546c..43f045940c9 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/Content.java
@@ -28,6 +28,7 @@ import com.yahoo.vespa.model.container.docproc.DocprocChain;
import com.yahoo.vespa.model.container.docproc.DocprocChains;
import com.yahoo.vespa.model.content.cluster.ContentCluster;
import com.yahoo.vespa.model.search.IndexedSearchCluster;
+import com.yahoo.vespa.model.search.IndexingDocproc;
import com.yahoo.vespa.model.search.IndexingDocprocChain;
import com.yahoo.vespa.model.search.SearchCluster;
import com.yahoo.vespa.model.search.SearchNode;
@@ -213,13 +214,17 @@ public class Content extends ConfigModel {
/** Select/creates and initializes the indexing cluster coupled to this */
private void buildIndexingClusters(Content content, ConfigModelContext modelContext,
ApplicationConfigProducerRoot root) {
- if ( ! content.getCluster().getSearch().hasIndexedCluster()) return;
-
- IndexedSearchCluster indexedSearchCluster = content.getCluster().getSearch().getIndexed();
- if (indexedSearchCluster.hasExplicitIndexingCluster()) {
- setExistingIndexingCluster(indexedSearchCluster, content.containers);
+ var search = content.getCluster().getSearch();
+ if (!search.getIndexingDocproc().isPresent()) {
+ return;
+ }
+ var indexingDocproc = search.getIndexingDocproc().get();
+ if (indexingDocproc.hasExplicitCluster()) {
+ setExistingIndexingCluster(content, indexingDocproc, content.containers);
} else {
- setContainerAsIndexingCluster(indexedSearchCluster, content, modelContext, root);
+ if (search.hasIndexedCluster()) {
+ setContainerAsIndexingCluster(search.getIndexed(), content, modelContext, root);
+ }
}
}
@@ -237,18 +242,19 @@ public class Content extends ConfigModel {
targetCluster = content.containers.iterator().next().getCluster();
addDocproc(targetCluster);
- indexedSearchCluster.setIndexingClusterName(targetCluster.getName());
- addIndexingChainsTo(targetCluster, indexedSearchCluster);
+ var indexingDocproc = indexedSearchCluster.getIndexingDocproc();
+ indexingDocproc.setClusterName(targetCluster.getName());
+ addIndexingChainsTo(targetCluster, content, indexingDocproc);
}
}
- private void setExistingIndexingCluster(IndexedSearchCluster cluster, Collection<ContainerModel> containers) {
- String indexingClusterName = cluster.getIndexingClusterName();
+ private void setExistingIndexingCluster(Content content, IndexingDocproc indexingDocproc, Collection<ContainerModel> containers) {
+ String indexingClusterName = indexingDocproc.getClusterName(content.getCluster().getName());
ContainerModel containerModel = findByName(indexingClusterName, containers);
if (containerModel == null)
- throw new IllegalArgumentException("Content cluster '" + cluster.getClusterName() + "' refers to docproc " +
+ throw new IllegalArgumentException("Content cluster '" + content.getCluster().getName() + "' refers to docproc " +
"cluster '" + indexingClusterName + "', but this cluster does not exist.");
- addIndexingChainsTo(containerModel.getCluster(), cluster);
+ addIndexingChainsTo(containerModel.getCluster(), content, indexingDocproc);
}
private ContainerModel findByName(String name, Collection<ContainerModel> containers) {
@@ -258,19 +264,19 @@ public class Content extends ConfigModel {
return null;
}
- private void addIndexingChainsTo(ContainerCluster<?> indexer, IndexedSearchCluster cluster) {
+ private void addIndexingChainsTo(ContainerCluster<?> indexer, Content content, IndexingDocproc indexingDocproc) {
addIndexingChain(indexer);
DocprocChain indexingChain;
ComponentRegistry<DocprocChain> allChains = indexer.getDocprocChains().allChains();
- if (cluster.hasExplicitIndexingChain()) {
- indexingChain = allChains.getComponent(cluster.getIndexingChainName());
+ if (indexingDocproc.hasExplicitChain() && !indexingDocproc.getChainName().equals(IndexingDocprocChain.NAME)) {
+ indexingChain = allChains.getComponent(indexingDocproc.getChainName());
if (indexingChain == null) {
- throw new IllegalArgumentException(cluster + " refers to docproc " +
- "chain '" + cluster.getIndexingChainName() +
+ throw new IllegalArgumentException(content.getCluster() + " refers to docproc " +
+ "chain '" + indexingDocproc.getChainName() +
"' for indexing, but this chain does not exist");
}
else if (indexingChain.getId().getName().equals("default")) {
- throw new IllegalArgumentException(cluster + " specifies the chain " +
+ throw new IllegalArgumentException(content.getCluster() + " specifies the chain " +
"'default' as indexing chain. As the 'default' chain is run by default, " +
"using it as the indexing chain will run it twice. " +
"Use a different name for the indexing chain.");
@@ -282,7 +288,7 @@ public class Content extends ConfigModel {
indexingChain = allChains.getComponent(IndexingDocprocChain.NAME);
}
- cluster.setIndexingChain(indexingChain);
+ indexingDocproc.setChain(indexingChain);
}
private TreeConfigProducer<AnyConfigProducer> getDocProc(ApplicationConfigProducerRoot root) {
@@ -301,7 +307,7 @@ public class Content extends ConfigModel {
Content content,
ConfigModelContext modelContext,
ApplicationConfigProducerRoot root) {
- String indexerName = cluster.getIndexingClusterName();
+ String indexerName = cluster.getIndexingDocproc().getClusterName(content.getCluster().getName());
TreeConfigProducer<AnyConfigProducer> parent = getDocProc(root);
ApplicationContainerCluster indexingCluster = new ApplicationContainerCluster(parent, "cluster." + indexerName, indexerName, modelContext.getDeployState());
ContainerModel indexingClusterModel = new ContainerModel(modelContext.withParent(parent).withId(indexingCluster.getSubId()));
@@ -334,7 +340,7 @@ public class Content extends ConfigModel {
indexingCluster.addContainers(nodes);
addIndexingChain(indexingCluster);
- cluster.setIndexingChain(indexingCluster.getDocprocChains().allChains().getComponent(IndexingDocprocChain.NAME));
+ cluster.getIndexingDocproc().setChain(indexingCluster.getDocprocChains().allChains().getComponent(IndexingDocprocChain.NAME));
}
private ContainerCluster<?> getContainerWithDocproc(Collection<ContainerModel> containers) {
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java
index a0240d28a3c..ec7acaf819f 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/ContentSearchCluster.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.model.builder.xml.dom.ModelElement;
import com.yahoo.vespa.model.builder.xml.dom.VespaDomBuilder;
import com.yahoo.vespa.model.content.cluster.ContentCluster;
import com.yahoo.vespa.model.search.IndexedSearchCluster;
+import com.yahoo.vespa.model.search.IndexingDocproc;
import com.yahoo.vespa.model.search.NodeSpec;
import com.yahoo.vespa.model.search.SchemaDefinitionXMLHandler;
import com.yahoo.vespa.model.search.SearchCluster;
@@ -57,6 +58,7 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer>
/** The single, indexed search cluster this sets up (supporting multiple document types), or null if none */
private IndexedSearchCluster indexedCluster;
+ private Optional<IndexingDocproc> indexingDocproc;
private Redundancy redundancy;
private final String clusterName;
@@ -206,6 +208,7 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer>
double fractionOfMemoryReserved)
{
super(parent, "search");
+ this.indexingDocproc = Optional.empty();
this.clusterName = clusterName;
this.documentDefinitions = documentDefinitions;
this.globallyDistributedDocuments = globallyDistributedDocuments;
@@ -259,6 +262,10 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer>
throw new IllegalArgumentException("Duplicate indexed cluster '" + indexedCluster.getClusterName() + "'");
}
indexedCluster = (IndexedSearchCluster)sc;
+ if (indexingDocproc.isPresent()) {
+ throw new IllegalArgumentException("Indexing docproc has previously been setup for streaming search");
+ }
+ indexingDocproc = Optional.of(indexedCluster.getIndexingDocproc());
}
clusters.put(sc.getClusterName(), sc);
}
@@ -458,6 +465,12 @@ public class ContentSearchCluster extends TreeConfigProducer<AnyConfigProducer>
public Map<String, SearchCluster> getClusters() { return clusters; }
public IndexedSearchCluster getIndexed() { return indexedCluster; }
public boolean hasIndexedCluster() { return indexedCluster != null; }
+ public Optional<IndexingDocproc> getIndexingDocproc() { return indexingDocproc; }
+ public void setupStreamingSearchIndexingDocProc() {
+ if (indexingDocproc.isEmpty()) {
+ indexingDocproc = Optional.of(new IndexingDocproc());
+ }
+ }
public String getClusterName() { return clusterName; }
@Override
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
index 66a99e1993c..dfdfa9303a7 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/content/cluster/ContentCluster.java
@@ -203,19 +203,24 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem
if (docprocCluster != null) {
docprocCluster = docprocCluster.trim();
}
- if (c.getSearch().hasIndexedCluster()) {
- if (docprocCluster != null && !docprocCluster.isEmpty()) {
- c.getSearch().getIndexed().setIndexingClusterName(docprocCluster);
- }
- }
-
String docprocChain = e.stringAttribute("chain");
if (docprocChain != null) {
docprocChain = docprocChain.trim();
}
- if (c.getSearch().hasIndexedCluster()) {
- if (docprocChain != null && !docprocChain.isEmpty()) {
- c.getSearch().getIndexed().setIndexingChainName(docprocChain);
+ if (docprocCluster != null && !docprocCluster.isEmpty()) {
+ if (!c.getSearch().hasIndexedCluster() && !c.getSearch().getIndexingDocproc().isPresent() &&
+ docprocChain != null && !docprocChain.isEmpty()) {
+ c.getSearch().setupStreamingSearchIndexingDocProc();
+ }
+ var indexingDocproc = c.getSearch().getIndexingDocproc();
+ if (indexingDocproc.isPresent()) {
+ indexingDocproc.get().setClusterName(docprocCluster);
+ }
+ }
+ if (docprocChain != null && !docprocChain.isEmpty()) {
+ var indexingDocproc = c.getSearch().getIndexingDocproc();
+ if (indexingDocproc.isPresent()) {
+ indexingDocproc.get().setChainName(docprocChain);
}
}
}
@@ -451,7 +456,7 @@ public class ContentCluster extends TreeConfigProducer<AnyConfigProducer> implem
@Override
public void getConfig(MessagetyperouteselectorpolicyConfig.Builder builder) {
- if ( ! getSearch().hasIndexedCluster()) return;
+ if ( ! getSearch().getIndexingDocproc().isPresent()) return;
DocumentProtocol.getConfig(builder, getConfigId());
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java b/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java
index ad0312705ca..6623efb599d 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/routing/DocumentProtocol.java
@@ -110,7 +110,7 @@ public final class DocumentProtocol implements Protocol,
for (ContentCluster cluster : Content.getContentClusters(repo)) {
DocumentProtocolPoliciesConfig.Cluster.Builder clusterBuilder = new DocumentProtocolPoliciesConfig.Cluster.Builder();
addSelector(cluster.getConfigId(), cluster.getRoutingSelector(), clusterBuilder);
- if (cluster.getSearch().hasIndexedCluster())
+ if (cluster.getSearch().getIndexingDocproc().isPresent())
addRoutes(getDirectRouteName(cluster.getConfigId()), getIndexedRouteName(cluster.getConfigId()), clusterBuilder);
else
clusterBuilder.defaultRoute(cluster.getConfigId());
@@ -227,10 +227,11 @@ public final class DocumentProtocol implements Protocol,
for (ContentCluster cluster : content) {
RouteSpec spec = new RouteSpec(cluster.getConfigId());
- if (cluster.getSearch().hasIndexedCluster()) {
+ if (cluster.getSearch().getIndexingDocproc().isPresent()) {
+ var indexingDocproc = cluster.getSearch().getIndexingDocproc().get();
table.addRoute(spec.addHop("[MessageType:" + cluster.getConfigId() + "]"));
table.addRoute(new RouteSpec(getIndexedRouteName(cluster.getConfigId()))
- .addHop(cluster.getSearch().getIndexed().getIndexingServiceName())
+ .addHop(indexingDocproc.getServiceName())
.addHop("[Content:cluster=" + cluster.getName() + "]"));
table.addRoute(new RouteSpec(getDirectRouteName(cluster.getConfigId()))
.addHop("[Content:cluster=" + cluster.getName() + "]"));
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java
index 670460a9f9f..080a2ca43dc 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexedSearchCluster.java
@@ -43,11 +43,7 @@ public class IndexedSearchCluster extends SearchCluster
DispatchNodesConfig.Producer,
ConfigInstance.Producer {
- private String indexingClusterName = null; // The name of the docproc cluster to run indexing, by config.
- private String indexingChainName = null;
-
- private DocprocChain indexingChain; // The actual docproc chain indexing for this.
-
+ private IndexingDocproc indexingDocproc;
private Tuning tuning;
private SearchCoverage searchCoverage;
@@ -77,6 +73,7 @@ public class IndexedSearchCluster extends SearchCluster
public IndexedSearchCluster(TreeConfigProducer<AnyConfigProducer> parent, String clusterName, int index, ModelContext.FeatureFlags featureFlags) {
super(parent, clusterName, index);
+ indexingDocproc = new IndexingDocproc();
documentDbsConfigProducer = new MultipleDocumentDatabasesConfigProducer(this, documentDbs);
rootDispatch = new DispatchGroup(this);
defaultDispatchPolicy = DispatchTuning.Builder.toDispatchPolicy(featureFlags.queryDispatchPolicy());
@@ -87,58 +84,7 @@ public class IndexedSearchCluster extends SearchCluster
@Override
protected IndexingMode getIndexingMode() { return IndexingMode.REALTIME; }
- public final boolean hasExplicitIndexingCluster() {
- return indexingClusterName != null;
- }
-
- public final boolean hasExplicitIndexingChain() {
- return indexingChainName != null;
- }
-
- /**
- * Returns the name of the docproc cluster running indexing for this search cluster. This is derived from the
- * services file on initialization, this can NOT be used at runtime to determine indexing chain. When initialization
- * is done, the {@link #getIndexingServiceName()} method holds the actual indexing docproc chain object.
- *
- * @return the name of the docproc cluster associated with this
- */
- public String getIndexingClusterName() {
- return hasExplicitIndexingCluster() ? indexingClusterName : getClusterName() + ".indexing";
- }
-
- public String getIndexingChainName() {
- return indexingChainName;
- }
-
- public void setIndexingChainName(String indexingChainName) {
- this.indexingChainName = indexingChainName;
- }
-
- /**
- * Sets the name of the docproc cluster running indexing for this search cluster. This is for initial configuration,
- * and will not reflect the actual indexing chain. See {@link #getIndexingClusterName} for more detail.
- *
- * @param name the name of the docproc cluster associated with this
- */
- public void setIndexingClusterName(String name) {
- indexingClusterName = name;
- }
-
- public String getIndexingServiceName() {
- return indexingChain.getServiceName();
- }
-
- /**
- * Sets the docproc chain that will be running indexing for this search cluster. This is set by the
- * {@link com.yahoo.vespa.model.content.Content} model during build.
- *
- * @param chain the chain that is to run indexing for this cluster
- * @return this, to allow chaining
- */
- public SearchCluster setIndexingChain(DocprocChain chain) {
- indexingChain = chain;
- return this;
- }
+ public IndexingDocproc getIndexingDocproc() { return indexingDocproc; }
public DispatchGroup getRootDispatch() { return rootDispatch; }
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java
new file mode 100644
index 00000000000..46f3e6f459d
--- /dev/null
+++ b/config-model/src/main/java/com/yahoo/vespa/model/search/IndexingDocproc.java
@@ -0,0 +1,68 @@
+package com.yahoo.vespa.model.search;
+
+import com.yahoo.vespa.model.container.docproc.DocprocChain;
+
+/**
+ * Utility class to track configuration for which indexing docproc to use by a search cluster.
+ */
+public class IndexingDocproc {
+ private String clusterName; // The name of the docproc cluster to run indexing, by config.
+ private String chainName;
+
+ private DocprocChain chain; // The actual docproc chain indexing for this.
+
+ public boolean hasExplicitCluster() {
+ return clusterName != null;
+ }
+
+ public boolean hasExplicitChain() {
+ return chainName != null;
+ }
+
+ /**
+ * Returns the name of the docproc cluster running indexing for this search cluster. This is derived from the
+ * services file on initialization, this can NOT be used at runtime to determine indexing chain. When initialization
+ * is done, the {@link #getServiceName()} method holds the actual indexing docproc chain object.
+ *
+ * @return the name of the docproc cluster associated with this
+ */
+ public String getClusterName(String searchClusterName) {
+ return hasExplicitCluster() ? clusterName : searchClusterName + ".indexing";
+ }
+
+ public String getChainName() {
+ return chainName;
+ }
+
+ public void setChainName(String name) {
+ chainName = name;
+ }
+
+ /**
+ * Sets the name of the docproc cluster running indexing for this search cluster. This is for initial configuration,
+ * and will not reflect the actual indexing chain. See {@link #getClusterName} for more detail.
+ *
+ * @param name the name of the docproc cluster associated with this
+ */
+ public void setClusterName(String name) {
+ clusterName = name;
+ }
+
+ public String getServiceName() {
+ return chain.getServiceName();
+ }
+
+ /**
+ * Sets the docproc chain that will be running indexing for this search cluster. This is set by the
+ * {@link com.yahoo.vespa.model.content.Content} model during build.
+ *
+ * @param chain the chain that is to run indexing for this cluster
+ */
+ public void setChain(DocprocChain chain) { this.chain = chain; }
+
+ public IndexingDocproc() {
+ clusterName = null;
+ chainName = null;
+ chain = null;
+ }
+}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java b/config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java
index 5b747b93268..1ad99404823 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/search/NodeResourcesTuning.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.model.search;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.vespa.config.search.core.ProtonConfig;
+import com.yahoo.vespa.model.Host;
import static java.lang.Long.min;
import static java.lang.Long.max;
@@ -27,9 +28,6 @@ public class NodeResourcesTuning implements ProtonConfig.Producer {
private final int threadsPerSearch;
private final double fractionOfMemoryReserved;
- // "Reserve" 0.5GB of memory for other processes running on the content node (config-proxy, metrics-proxy).
- public static final double reservedMemoryGb = 0.7;
-
public NodeResourcesTuning(NodeResources resources,
int threadsPerSearch,
double fractionOfMemoryReserved) {
@@ -128,7 +126,7 @@ public class NodeResourcesTuning implements ProtonConfig.Producer {
/** Returns the memory we can expect will be available for the content node processes */
private double usableMemoryGb() {
- double usableMemoryGb = resources.memoryGb() - reservedMemoryGb;
+ double usableMemoryGb = resources.memoryGb() - Host.memoryOverheadGb;
return usableMemoryGb * (1 - fractionOfMemoryReserved);
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java b/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java
index a29b7b90b44..93e3a6e7a19 100644
--- a/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java
+++ b/config-model/src/main/java/com/yahoo/vespa/model/search/Tuning.java
@@ -53,6 +53,16 @@ public class Tuning extends AnyConfigProducer implements ProtonConfig.Producer {
}
}
+ public static class LidSpace implements ProtonConfig.Producer {
+ public Double bloatFactor = null;
+
+ @Override
+ public void getConfig(ProtonConfig.Builder builder) {
+ if (bloatFactor != null) builder.lidspacecompaction.allowedlidbloatfactor(bloatFactor);
+ }
+
+ }
+
public static class RemovedDB implements ProtonConfig.Producer {
public static class Prune implements ProtonConfig.Producer {
@@ -371,6 +381,7 @@ public class Tuning extends AnyConfigProducer implements ProtonConfig.Producer {
}
public RequestThreads threads = null;
+ public LidSpace lidSpace = null;
public FlushStrategy strategy = null;
public Resizing resizing = null;
public Index index = null;
@@ -383,6 +394,7 @@ public class Tuning extends AnyConfigProducer implements ProtonConfig.Producer {
@Override
public void getConfig(ProtonConfig.Builder builder) {
if (threads != null) threads.getConfig(builder);
+ if (lidSpace != null) lidSpace.getConfig(builder);
if (strategy != null) strategy.getConfig(builder);
if (resizing != null) resizing.getConfig(builder);
if (index != null) index.getConfig(builder);
diff --git a/config-model/src/main/resources/schema/common.rnc b/config-model/src/main/resources/schema/common.rnc
index 21f3399a027..061e54740f1 100644
--- a/config-model/src/main/resources/schema/common.rnc
+++ b/config-model/src/main/resources/schema/common.rnc
@@ -53,6 +53,11 @@ GenericConfig = element config {
anyElement*
}
+ModelReference =
+ attribute model-id { xsd:string }? &
+ attribute path { xsd:string }? &
+ attribute url { xsd:string }?
+
ComponentSpec =
( attribute id { xsd:Name | JavaId } | attribute idref { xsd:Name } | attribute ident { xsd:Name } )
@@ -64,7 +69,7 @@ BundleSpec =
attribute bundle { xsd:Name }?
Component = element component {
- ComponentDefinition
+ (ComponentDefinition | TypedComponentDefinition)
}
ComponentDefinition =
@@ -72,3 +77,51 @@ ComponentDefinition =
BundleSpec &
GenericConfig* &
Component*
+
+TypedComponentDefinition =
+ attribute id { xsd:Name } &
+ (HuggingFaceEmbedder | HuggingFaceTokenizer | BertBaseEmbedder) &
+ GenericConfig* &
+ Component*
+
+HuggingFaceEmbedder =
+ attribute type { "hugging-face-embedder" } &
+ element transformer-model { ModelReference } &
+ element tokenizer-model { ModelReference }? &
+ element max-tokens { xsd:nonNegativeInteger }? &
+ element transformer-input-ids { xsd:string }? &
+ element transformer-attention-mask { xsd:string }? &
+ element transformer-token-type-ids { xsd:string }? &
+ element transformer-output { xsd:string }? &
+ element normalize { xsd:boolean }? &
+ OnnxModelExecutionParams &
+ EmbedderPoolingStrategy
+
+HuggingFaceTokenizer =
+ attribute type { "hugging-face-tokenizer" } &
+ element model { attribute language { xsd:string }? & ModelReference }+ &
+ element special-tokens { xsd:boolean }? &
+ element max-length { xsd:integer }? &
+ element truncation { xsd:boolean }?
+
+BertBaseEmbedder =
+ attribute type { "bert-embedder" } &
+ element transformer-model { ModelReference } &
+ element tokenizer-vocab { ModelReference } &
+ element max-tokens { xsd:nonNegativeInteger }? &
+ element transformer-input-ids { xsd:string }? &
+ element transformer-attention-mask { xsd:string }? &
+ element transformer-token-type-ids { xsd:string }? &
+ element transformer-output { xsd:string }? &
+ element transformer-start-sequence-token { xsd:integer }? &
+ element transformer-end-sequence-token { xsd:integer }? &
+ OnnxModelExecutionParams &
+ EmbedderPoolingStrategy
+
+OnnxModelExecutionParams =
+ element onnx-execution-mode { "parallel" | "sequential" }? &
+ element onnx-interop-threads { xsd:integer }? &
+ element onnx-intraop-threads { xsd:integer }? &
+ element onnx-gpu-device { xsd:integer }?
+
+EmbedderPoolingStrategy = element pooling-strategy { "cls" | "mean" }? \ No newline at end of file
diff --git a/config-model/src/main/resources/schema/content.rnc b/config-model/src/main/resources/schema/content.rnc
index 6486fdacc18..5833b575a74 100644
--- a/config-model/src/main/resources/schema/content.rnc
+++ b/config-model/src/main/resources/schema/content.rnc
@@ -301,6 +301,9 @@ Tuning = element tuning {
element persearch { xsd:nonNegativeInteger }? &
element summary { xsd:nonNegativeInteger }?
}? &
+ element lidspace {
+ element max-bloat-factor { xsd:double { minInclusive = "0.0" maxInclusive = "1.0" } }?
+ }? &
element flushstrategy {
element native {
element total {
diff --git a/config-model/src/main/resources/schema/deployment.rnc b/config-model/src/main/resources/schema/deployment.rnc
index ede05ad65ef..0f2eed3f72b 100644
--- a/config-model/src/main/resources/schema/deployment.rnc
+++ b/config-model/src/main/resources/schema/deployment.rnc
@@ -8,6 +8,7 @@ start = element deployment {
attribute athenz-domain { xsd:string }? &
attribute athenz-service { xsd:string }? &
attribute cloud-account { xsd:string }? &
+ attribute empty-host-ttl { xsd:string }? &
Step
}
@@ -39,6 +40,7 @@ Instance = element instance {
attribute tags { xsd:string }? &
attribute athenz-service { xsd:string }? &
attribute cloud-account { xsd:string }? &
+ attribute empty-host-ttl { xsd:string }? &
StepExceptInstance
}
@@ -106,11 +108,13 @@ Staging = element staging {
}
Dev = element dev {
- attribute cloud-account { xsd:string }?
+ attribute cloud-account { xsd:string }? &
+ attribute empty-host-ttl { xsd:string }?
}
Perf = element perf {
- attribute cloud-account { xsd:string }?
+ attribute cloud-account { xsd:string }? &
+ attribute empty-host-ttl { xsd:string }?
}
Prod = element prod {
@@ -118,6 +122,7 @@ Prod = element prod {
attribute athenz-service { xsd:string }? &
attribute tester-flavor { xsd:string }? &
attribute cloud-account { xsd:string }? &
+ attribute empty-host-ttl { xsd:string }? &
Region* &
Delay* &
ProdTest* &
@@ -132,6 +137,7 @@ Region = element region {
attribute active { xsd:boolean }? &
attribute athenz-service { xsd:string }? &
attribute cloud-account { xsd:string }? &
+ attribute empty-host-ttl { xsd:string }? &
text
}