aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-09-28 11:58:40 +0200
committerHarald Musum <musum@yahooinc.com>2023-09-28 11:58:40 +0200
commit360d24d05499f5cc7b928a5c1bc7dcf28bc83618 (patch)
treed45ba4311763531bb1681c0d4cefbb6a3ad53594 /controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
parent49999d72e441418f31389918bb46866d3c2313fc (diff)
Don't check convergence for a specific version in manually deployed zones
In manually deployed zones we allow failure when building config models (as long as one model builds successfully). So we cannot check for convergence for a specific version, as we don't know which versions have built successfully.
Diffstat (limited to 'controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java43
1 files changed, 24 insertions, 19 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
index 919facee0c1..11c47d8f481 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -23,6 +23,7 @@ import com.yahoo.vespa.hosted.controller.Instance;
import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId;
import com.yahoo.vespa.hosted.controller.api.integration.LogEntry;
import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateException;
+import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServer;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.DeploymentResult;
import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node;
@@ -62,7 +63,6 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
-import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -362,21 +362,24 @@ public class InternalStepRunner implements StepRunner {
Version platform = setTheStage ? versions.sourcePlatform().orElse(versions.targetPlatform()) : versions.targetPlatform();
Run run = controller.jobController().run(id);
- Optional<ServiceConvergence> services = controller.serviceRegistry().configServer().serviceConvergence(new DeploymentId(id.application(), id.type().zone()),
- Optional.of(platform));
+ // In manually deployed zones it is allowed for some model versions not being built (e.g due to incompatibility)
+ // but deployment still succeeding, so we cannot use version when checking for config convergence
+ Optional<Version> platformVersion = id.type().environment().isManuallyDeployed() ? Optional.empty() : Optional.of(platform);
+ Optional<ServiceConvergence> services = configServer().serviceConvergence(new DeploymentId(id.application(), id.type().zone()),
+ platformVersion);
if (services.isEmpty()) {
logger.log("Config status not currently available -- will retry.");
return Optional.empty();
}
- List<Node> nodes = controller.serviceRegistry().configServer().nodeRepository().list(id.type().zone(),
- NodeFilter.all()
- .applications(id.application())
- .states(active));
+ List<Node> nodes = configServer().nodeRepository().list(id.type().zone(),
+ NodeFilter.all()
+ .applications(id.application())
+ .states(active));
Set<HostName> parentHostnames = nodes.stream().map(node -> node.parentHostname().get()).collect(toSet());
- List<Node> parents = controller.serviceRegistry().configServer().nodeRepository().list(id.type().zone(),
- NodeFilter.all()
- .hostnames(parentHostnames));
+ List<Node> parents = configServer().nodeRepository().list(id.type().zone(),
+ NodeFilter.all()
+ .hostnames(parentHostnames));
boolean firstTick = run.convergenceSummary().isEmpty();
NodeList nodeList = NodeList.of(nodes, parents, services.get());
ConvergenceSummary summary = nodeList.summary();
@@ -496,8 +499,8 @@ public class InternalStepRunner implements StepRunner {
ZoneId zone = id.type().zone();
ApplicationId testerId = id.tester().id();
- Optional<ServiceConvergence> services = controller.serviceRegistry().configServer().serviceConvergence(new DeploymentId(testerId, zone),
- Optional.of(platform));
+ Optional<ServiceConvergence> services = configServer().serviceConvergence(new DeploymentId(testerId, zone),
+ Optional.of(platform));
if (services.isEmpty()) {
if (run.stepInfo(installTester).get().startTime().get().isBefore(controller.clock().instant().minus(Duration.ofMinutes(30)))) {
logger.log(WARNING, "Config status not available after 30 minutes; giving up!");
@@ -508,14 +511,14 @@ public class InternalStepRunner implements StepRunner {
return Optional.empty();
}
}
- List<Node> nodes = controller.serviceRegistry().configServer().nodeRepository().list(zone,
- NodeFilter.all()
- .applications(testerId)
- .states(active, reserved));
+ List<Node> nodes = configServer().nodeRepository().list(zone,
+ NodeFilter.all()
+ .applications(testerId)
+ .states(active, reserved));
Set<HostName> parentHostnames = nodes.stream().map(node -> node.parentHostname().get()).collect(toSet());
- List<Node> parents = controller.serviceRegistry().configServer().nodeRepository().list(zone,
- NodeFilter.all()
- .hostnames(parentHostnames));
+ List<Node> parents = configServer().nodeRepository().list(zone,
+ NodeFilter.all()
+ .hostnames(parentHostnames));
NodeList nodeList = NodeList.of(nodes, parents, services.get());
logger.log(nodeList.asList().stream()
.flatMap(node -> nodeDetails(node, false))
@@ -534,6 +537,8 @@ public class InternalStepRunner implements StepRunner {
return Optional.empty();
}
+ private ConfigServer configServer() { return controller.serviceRegistry().configServer(); }
+
/** Returns true iff all containers in the tester deployment give 100 consecutive 200 OK responses on /status.html. */
private boolean testerContainersAreUp(ApplicationId id, ZoneId zoneId, DualLogger logger) {
DeploymentId deploymentId = new DeploymentId(id, zoneId);