summaryrefslogtreecommitdiffstats
path: root/configserver
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2021-08-12 09:28:25 +0200
committerHarald Musum <musum@verizonmedia.com>2021-08-12 09:28:25 +0200
commit4a96669b25874280bf49419ebe58edd6114237f8 (patch)
treea14c32938d72ebe4fe842bea942cf02080414f87 /configserver
parent6b10b52e73df77667eb60df3a7b8ae41d1129793 (diff)
Prepare all applications before activating when bootstrapping config server
When upgrading to a new version when bootstrapping config server: If we fail to prepare an application it might be because there is something wrong with the release. In that case we don't want to activate any application. This change makes sure we prepare all applications successfully before we activate any application..
Diffstat (limited to 'configserver')
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java165
1 files changed, 126 insertions, 39 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java
index 4c2aa33a886..7ea6f5f8504 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java
@@ -20,10 +20,9 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
+import java.util.Optional;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -32,6 +31,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.logging.Level;
import java.util.logging.Logger;
+import java.util.stream.Collectors;
import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.BOOTSTRAP_IN_CONSTRUCTOR;
import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.FOR_TESTING_NO_BOOTSTRAP_OF_APPS;
@@ -225,80 +225,167 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
private List<ApplicationId> redeployApplications(List<ApplicationId> applicationIds) throws InterruptedException {
ExecutorService executor = Executors.newFixedThreadPool(configserverConfig.numRedeploymentThreads(),
new DaemonThreadFactory("redeploy-apps-"));
- // Keep track of deployment status per application
- Map<ApplicationId, Future<?>> deployments = new HashMap<>();
log.log(Level.INFO, () -> "Redeploying " + applicationIds.size() + " apps: " + applicationIds);
- applicationIds.forEach(appId -> deployments.put(appId, executor.submit(() -> {
- log.log(Level.INFO, () -> "Starting redeployment of " + appId);
- applicationRepository.deployFromLocalActive(appId, true /* bootstrap */)
- .ifPresent(Deployment::activate);
- log.log(Level.INFO, () -> appId + " redeployed");
- })));
- List<ApplicationId> failedDeployments = checkDeployments(deployments);
+ PreparedApplications preparedApplications = prepare(applicationIds, executor);
+ if (preparedApplications.hasPrepareFailures()) {
+ log.log(Level.INFO, "Failed preparing applications: " + preparedApplications.failed());
+ return preparedApplications.failed();
+ }
+
+ List<ApplicationId> failed = activate(preparedApplications.deploymentInfos());
executor.shutdown();
- executor.awaitTermination(365, TimeUnit.DAYS); // Timeout should never happen
+ if ( ! executor.awaitTermination(1, TimeUnit.HOURS)) {
+ log.log(Level.WARNING, "Awaiting termination of executor failed");
+ executor.shutdownNow();
+ }
- return failedDeployments;
+ return failed;
}
- private enum DeploymentStatus { inProgress, done, failed};
+ private PreparedApplications prepare(List<ApplicationId> applicationIds, ExecutorService executor) {
+ Map<ApplicationId, Future<Optional<Deployment>>> prepared = new HashMap<>();
+ applicationIds.forEach(appId -> prepared.put(appId, executor.submit(() -> {
+ log.log(Level.INFO, () -> "Preparing " + appId);
+ Optional<Deployment> deployment = applicationRepository.deployFromLocalActive(appId, true /* bootstrap */);
+ if (deployment.isPresent()) {
+ deployment.get().prepare();
+ log.log(Level.INFO, () -> appId + " prepared");
+ } else {
+ log.log(Level.INFO, () -> "No deployment present for appId, not prepared");
+ }
+ return deployment;
+ })));
+
+ return waitForPrepare(prepared);
+ }
+
+ private List<ApplicationId> activate(List<DeploymentInfo> deployments) {
+ List<ApplicationId> failedActivations = new ArrayList<>();
+ deployments.forEach(d -> {
+ ApplicationId applicationId = d.applicationId();
+ log.log(Level.INFO, () -> "Activating " + applicationId);
+ try {
+ d.deployment().ifPresent(Deployment::activate);
+ } catch (Exception e) {
+ log.log(Level.INFO, () -> "Failed activating " + applicationId + ":" + e.getMessage());
+ failedActivations.add(applicationId);
+ }
+ log.log(Level.INFO, () -> applicationId + " activated");
+ });
+ return failedActivations;
+ }
- private List<ApplicationId> checkDeployments(Map<ApplicationId, Future<?>> deployments) {
+ private PreparedApplications waitForPrepare(Map<ApplicationId, Future<Optional<Deployment>>> deployments) {
int applicationCount = deployments.size();
- Set<ApplicationId> failedDeployments = new LinkedHashSet<>();
- Set<ApplicationId> finishedDeployments = new LinkedHashSet<>();
+ PreparedApplications applications = new PreparedApplications();
Instant lastLogged = Instant.EPOCH;
do {
deployments.forEach((applicationId, future) -> {
- if (finishedDeployments.contains(applicationId) || failedDeployments.contains(applicationId)) return;
+ if (applications.prepareFinished(applicationId)) return;
- DeploymentStatus status = getDeploymentStatus(applicationId, future);
- switch (status) {
- case done:
- finishedDeployments.add(applicationId);
+ DeploymentInfo status = getDeploymentStatus(applicationId, future);
+ switch (status.status()) {
+ case success:
+ case failed:
+ applications.add(status);
break;
case inProgress:
break;
- case failed:
- failedDeployments.add(applicationId);
- break;
default:
throw new IllegalArgumentException("Unknown deployment status " + status);
}
});
if ( ! Duration.between(lastLogged, Instant.now()).minus(Duration.ofSeconds(10)).isNegative()) {
- logProgress(applicationCount, failedDeployments, finishedDeployments);
+ logProgress(applicationCount, applications);
lastLogged = Instant.now();
}
- } while (failedDeployments.size() + finishedDeployments.size() < applicationCount);
+ } while (applications.failed().size() + applications.successCount() < applicationCount);
- logProgress(applicationCount, failedDeployments, finishedDeployments);
- return new ArrayList<>(failedDeployments);
+ logProgress(applicationCount, applications);
+ return applications;
}
- private void logProgress(int applicationCount, Set<ApplicationId> failedDeployments, Set<ApplicationId> finishedDeployments) {
- log.log(Level.INFO, () -> finishedDeployments.size() + " of " + applicationCount + " apps redeployed " +
- "(" + failedDeployments.size() + " failed)");
+ private void logProgress(int applicationCount, PreparedApplications preparedApplications) {
+ log.log(Level.INFO, () -> preparedApplications.successCount() + " of " + applicationCount + " apps prepared " +
+ "(" + preparedApplications.failCount() + " failed)");
}
- private DeploymentStatus getDeploymentStatus(ApplicationId applicationId, Future<?> future) {
+ private DeploymentInfo getDeploymentStatus(ApplicationId applicationId, Future<Optional<Deployment>> future) {
try {
- future.get(1, TimeUnit.MILLISECONDS);
- return DeploymentStatus.done;
+ Optional<Deployment> deployment = future.get(1, TimeUnit.MILLISECONDS);
+ return new DeploymentInfo(applicationId, DeploymentInfo.Status.success, deployment);
} catch (ExecutionException | InterruptedException e) {
if (e.getCause() instanceof TransientException) {
- log.log(Level.INFO, "Redeploying " + applicationId +
+ log.log(Level.INFO, "Preparing" + " " + applicationId +
" failed with transient error, will retry after bootstrap: " + Exceptions.toMessageString(e));
} else {
- log.log(Level.WARNING, "Redeploying " + applicationId + " failed, will retry", e);
+ log.log(Level.WARNING, "Preparing" + " " + applicationId + " failed, will retry", e);
}
- return DeploymentStatus.failed;
+ return new DeploymentInfo(applicationId, DeploymentInfo.Status.failed);
} catch (TimeoutException e) {
- return DeploymentStatus.inProgress;
+ return new DeploymentInfo(applicationId, DeploymentInfo.Status.inProgress);
+ }
+ }
+
+ private static class DeploymentInfo {
+
+ public enum Status { inProgress, success, failed }
+
+ private final ApplicationId applicationId;
+ private final Status status;
+ private final Optional<Deployment> deployment;
+
+ public DeploymentInfo(ApplicationId applicationId, Status status) {
+ this(applicationId, status, Optional.empty());
+ }
+
+ public DeploymentInfo(ApplicationId applicationId, Status status, Optional<Deployment> deployment) {
+ this.applicationId = applicationId;
+ this.status = status;
+ this.deployment = deployment;
+ }
+
+ public ApplicationId applicationId() { return applicationId; }
+
+ public Status status() { return status; }
+
+ public Optional<Deployment> deployment() { return deployment; }
+
+ }
+
+ private static class PreparedApplications {
+ private final List<DeploymentInfo> deploymentInfos = new ArrayList<>();
+
+ public void add(DeploymentInfo deploymentInfo) {
+ this.deploymentInfos.add(deploymentInfo);
}
+
+ List<ApplicationId> success() { return withStatus(DeploymentInfo.Status.success); }
+
+ List<ApplicationId> failed() { return withStatus(DeploymentInfo.Status.failed); }
+
+ List<ApplicationId> withStatus(DeploymentInfo.Status status) {
+ return deploymentInfos.stream()
+ .filter(deploymentInfo -> deploymentInfo.status() == status)
+ .map(DeploymentInfo::applicationId)
+ .collect(Collectors.toList());
+ }
+
+ int successCount() { return success().size(); }
+
+ int failCount() { return failed().size(); }
+
+ boolean hasPrepareFailures() { return failCount() > 0; }
+
+ List<DeploymentInfo> deploymentInfos() { return deploymentInfos; }
+
+ boolean prepareFinished(ApplicationId applicationId) {
+ return failed().contains(applicationId) || success().contains(applicationId);
+ }
+
}
}