aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--config-provisioning/src/main/resources/configdefinitions/config.provisioning.node-repository.def3
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImages.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImagesTest.java38
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java1
9 files changed, 49 insertions, 22 deletions
diff --git a/config-provisioning/src/main/resources/configdefinitions/config.provisioning.node-repository.def b/config-provisioning/src/main/resources/configdefinitions/config.provisioning.node-repository.def
index b054f434322..6a9c388f8b0 100644
--- a/config-provisioning/src/main/resources/configdefinitions/config.provisioning.node-repository.def
+++ b/config-provisioning/src/main/resources/configdefinitions/config.provisioning.node-repository.def
@@ -7,6 +7,9 @@ containerImage string default="registry.example.com:9999/myorg/vespa"
# Default container image to use for tenant nodes. If this is unset (empty), it defaults to containerImage.
tenantContainerImage string default=""
+# Default container image to use for tenant nodes with GPU resources. If this is unset (empty), starting nodes with GPUs will fail
+tenantGpuContainerImage string default=""
+
# Whether to cache data read from ZooKeeper in-memory.
useCuratorClientCache bool default=false
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index fb21b009a30..c490c50c940 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -7,7 +7,6 @@ import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.config.provision.ApplicationTransaction;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.DockerImage;
-import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.Zone;
import com.yahoo.config.provisioning.NodeRepositoryConfig;
@@ -88,7 +87,8 @@ public class NodeRepository extends AbstractComponent {
zone,
new DnsNameResolver(),
DockerImage.fromString(config.containerImage()),
- Optional.of(config.tenantContainerImage()).filter(s -> !s.isEmpty()).map(DockerImage::fromString),
+ optionalImage(config.tenantContainerImage()),
+ optionalImage(config.tenantGpuContainerImage()),
flagSource,
metricsDb,
orchestrator,
@@ -109,6 +109,7 @@ public class NodeRepository extends AbstractComponent {
NameResolver nameResolver,
DockerImage containerImage,
Optional<DockerImage> tenantContainerImage,
+ Optional<DockerImage> tenantGpuContainerImage,
FlagSource flagSource,
MetricsDb metricsDb,
Orchestrator orchestrator,
@@ -132,7 +133,7 @@ public class NodeRepository extends AbstractComponent {
this.osVersions = new OsVersions(this);
this.infrastructureVersions = new InfrastructureVersions(db);
this.firmwareChecks = new FirmwareChecks(db, clock);
- this.containerImages = new ContainerImages(containerImage, tenantContainerImage);
+ this.containerImages = new ContainerImages(containerImage, tenantContainerImage, tenantGpuContainerImage);
this.archiveUris = new ArchiveUris(db);
this.jobControl = new JobControl(new JobControlFlags(db, flagSource));
this.loadBalancers = new LoadBalancers(db);
@@ -231,4 +232,8 @@ public class NodeRepository extends AbstractComponent {
applications.remove(transaction);
}
+ private static Optional<DockerImage> optionalImage(String image) {
+ return Optional.of(image).filter(s -> !s.isEmpty()).map(DockerImage::fromString);
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImages.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImages.java
index f1358788c17..8553172cef3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImages.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImages.java
@@ -12,7 +12,7 @@ import java.util.Optional;
* This class decides the container image to use for a given node. Two sources are considered, in the following order:
*
* 1. Requested image (from node allocation, this is set by either a feature flag or through services.xml)
- * 2. Default image, specified in the node repository config file
+ * 2. Default image for the node type/configuration, specified in the node repository config file.
*
* Independent of source, the registry part of the image is rewritten to match the one set in the node repository config
* file.
@@ -24,10 +24,12 @@ public class ContainerImages {
private final DockerImage defaultImage;
private final Optional<DockerImage> tenantImage;
+ private final Optional<DockerImage> tenantGpuImage;
- public ContainerImages(DockerImage defaultImage, Optional<DockerImage> tenantContainerImage) {
+ public ContainerImages(DockerImage defaultImage, Optional<DockerImage> tenantContainerImage, Optional<DockerImage> tenantGpuImage) {
this.defaultImage = Objects.requireNonNull(defaultImage);
this.tenantImage = Objects.requireNonNull(tenantContainerImage);
+ this.tenantGpuImage = Objects.requireNonNull(tenantGpuImage);
}
/** Returns the container image to use for given node */
@@ -39,7 +41,13 @@ public class ContainerImages {
if (requestedImage.isPresent()) {
image = requestedImage.get();
} else if (nodeType == NodeType.tenant) {
- image = tenantImage.orElse(defaultImage);
+ if (tenantImage.isPresent()) {
+ image = tenantImage.get();
+ } else if (!node.resources().gpuResources().isZero()) {
+ image = tenantGpuImage.orElseThrow(() -> new IllegalArgumentException(node + " has GPU resources, but there is no GPU container image available"));
+ } else {
+ image = defaultImage;
+ }
} else {
image = defaultImage;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
index 93e33051616..5bd53a2f8af 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockNodeRepository.java
@@ -81,6 +81,7 @@ public class MockNodeRepository extends NodeRepository {
new MockNameResolver().mockAnyLookup(),
DockerImage.fromString("docker-registry.domain.tld:8080/dist/vespa"),
Optional.empty(),
+ Optional.empty(),
new InMemoryFlagSource(),
new MemoryMetricsDb(Clock.fixed(Instant.ofEpochMilli(123), ZoneId.of("Z"))),
new OrchestratorMock(),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java
index cd73914850d..b964bf871c1 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java
@@ -45,6 +45,7 @@ public class NodeRepositoryTester {
new MockNameResolver().mockAnyLookup(),
DockerImage.fromString("docker-registry.domain.tld:8080/dist/vespa"),
Optional.empty(),
+ Optional.empty(),
new InMemoryFlagSource(),
new MemoryMetricsDb(clock),
new OrchestratorMock(),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java
index d9eef310c20..606bc55fdd2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTester.java
@@ -72,6 +72,7 @@ public class CapacityCheckerTester {
new MockNameResolver().mockAnyLookup(),
DockerImage.fromString("docker-registry.domain.tld:8080/dist/vespa"),
Optional.empty(),
+ Optional.empty(),
new InMemoryFlagSource(),
new MemoryMetricsDb(clock),
new OrchestratorMock(),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
index 00fff017836..c9421f098e7 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java
@@ -266,6 +266,7 @@ public class SpareCapacityMaintainerTest {
new MockNameResolver().mockAnyLookup(),
DockerImage.fromString("docker-registry.domain.tld:8080/dist/vespa"),
Optional.empty(),
+ Optional.empty(),
new InMemoryFlagSource(),
new MemoryMetricsDb(clock),
new OrchestratorMock(),
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImagesTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImagesTest.java
index 217ead40b81..20b299c85bc 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImagesTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ContainerImagesTest.java
@@ -5,14 +5,11 @@ import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterMembership;
import com.yahoo.config.provision.DockerImage;
-import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.Generation;
-import com.yahoo.vespa.hosted.provision.node.IP;
-import com.yahoo.vespa.hosted.provision.testutils.MockNodeFlavors;
import org.junit.Test;
import java.util.Optional;
@@ -29,7 +26,8 @@ public class ContainerImagesTest {
public void image_selection() {
DockerImage defaultImage = DockerImage.fromString("registry.example.com/vespa/default");
DockerImage tenantImage = DockerImage.fromString("registry.example.com/vespa/tenant");
- ContainerImages images = new ContainerImages(defaultImage, Optional.of(tenantImage));
+ DockerImage gpuImage = DockerImage.fromString("registry.example.com/vespa/tenant-gpu");
+ ContainerImages images = new ContainerImages(defaultImage, Optional.of(tenantImage), Optional.of(gpuImage));
assertEquals(defaultImage, images.get(node(NodeType.confighost))); // For preload purposes
assertEquals(defaultImage, images.get(node(NodeType.config)));
@@ -45,27 +43,35 @@ public class ContainerImagesTest {
assertEquals(requested, images.get(node(NodeType.tenant, requested)));
// When there is no custom tenant image, the default one is used
- images = new ContainerImages(defaultImage, Optional.empty());
+ images = new ContainerImages(defaultImage, Optional.empty(), Optional.of(gpuImage));
assertEquals(defaultImage, images.get(node(NodeType.host)));
assertEquals(defaultImage, images.get(node(NodeType.tenant)));
+
+ // Choose GPU when node has GPU resources
+ assertEquals(gpuImage, images.get(node(NodeType.tenant, null, true)));
}
private static Node node(NodeType type) {
- return node(type, null);
+ return node(type, null, false);
}
private static Node node(NodeType type, DockerImage requested) {
- Flavor flavor = new MockNodeFlavors().getFlavorOrThrow("default");
- Node.Builder b = Node.create(type + "1", new IP.Config(Set.of(), Set.of()), type + "1.example.com", flavor, type);
- if (requested != null) {
- b.allocation(new Allocation(ApplicationId.defaultId(),
- ClusterMembership.from("container/id1/4/37",
- Version.fromString("1.2.3"),
- Optional.of(requested)),
- NodeResources.unspecified(),
- Generation.initial(),
- false));
+ return node(type, requested, false);
+ }
+
+ private static Node node(NodeType type, DockerImage requested, boolean gpu) {
+ NodeResources resources = new NodeResources(4, 8, 100, 0.3);
+ if (gpu) {
+ resources = resources.with(new NodeResources.GpuResources(1, 16));
}
+ Node.Builder b = Node.reserve(Set.of("::1"), type + "1", "parent1", resources, type);
+ b.allocation(new Allocation(ApplicationId.defaultId(),
+ ClusterMembership.from("container/id1/4/37",
+ Version.fromString("1.2.3"),
+ Optional.ofNullable(requested)),
+ resources,
+ Generation.initial(),
+ false));
return b.build();
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
index 405d9578c95..110569a371a 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java
@@ -113,6 +113,7 @@ public class ProvisioningTester {
nameResolver,
containerImage,
Optional.empty(),
+ Optional.empty(),
flagSource,
new MemoryMetricsDb(clock),
orchestrator,