diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2021-05-11 10:10:46 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-11 10:10:46 +0200 |
commit | 17d23bbc0506589ffa6058d08fea16a06e76bede (patch) | |
tree | 1c4a19710ec9e104a7306c695ee7e69cfa949f4f | |
parent | 1ccefefdc6ce348398692f17bc8539c24e3cb7fb (diff) | |
parent | e18a3708ef1c03f1910e5ee228064b1810da4502 (diff) |
Merge pull request #17814 from vespa-engine/hmusum/cleanup-9
Log more info when bootstrapping config server
-rw-r--r-- | configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java | 84 | ||||
-rw-r--r-- | configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java | 19 |
2 files changed, 63 insertions, 40 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java index 62f3e40cb50..772c2bf5125 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java @@ -16,22 +16,25 @@ import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.Optional; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Collectors; import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.BOOTSTRAP_IN_CONSTRUCTOR; +import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.FOR_TESTING_NO_BOOTSTRAP_OF_APPS; import static com.yahoo.vespa.config.server.ConfigServerBootstrap.RedeployingApplicationsFails.CONTINUE; import static com.yahoo.vespa.config.server.ConfigServerBootstrap.RedeployingApplicationsFails.EXIT_JVM; @@ -51,8 +54,7 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable private static final Logger log = Logger.getLogger(ConfigServerBootstrap.class.getName()); - // INITIALIZE_ONLY is for testing only - enum Mode { BOOTSTRAP_IN_CONSTRUCTOR, BOOTSTRAP_IN_SEPARATE_THREAD, INITIALIZE_ONLY } + enum Mode { BOOTSTRAP_IN_CONSTRUCTOR, FOR_TESTING_NO_BOOTSTRAP_OF_APPS} enum RedeployingApplicationsFails { EXIT_JVM, CONTINUE } enum VipStatusMode { VIP_STATUS_FILE, VIP_STATUS_PROGRAMMATICALLY } @@ -66,7 +68,6 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable private final Duration sleepTimeWhenRedeployingFails; private final RedeployingApplicationsFails exitIfRedeployingApplicationsFails; private final ExecutorService rpcServerExecutor; - private final Optional<ExecutorService> bootstrapExecutor; @Inject public ConfigServerBootstrap(ApplicationRepository applicationRepository, RpcServer server, @@ -79,8 +80,9 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable // For testing only ConfigServerBootstrap(ApplicationRepository applicationRepository, RpcServer server, VersionState versionState, - StateMonitor stateMonitor, VipStatus vipStatus, Mode mode, VipStatusMode vipStatusMode) { - this(applicationRepository, server, versionState, stateMonitor, vipStatus, mode, CONTINUE, vipStatusMode); + StateMonitor stateMonitor, VipStatus vipStatus, VipStatusMode vipStatusMode) { + this(applicationRepository, server, versionState, stateMonitor, vipStatus, + FOR_TESTING_NO_BOOTSTRAP_OF_APPS, CONTINUE, vipStatusMode); } private ConfigServerBootstrap(ApplicationRepository applicationRepository, RpcServer server, @@ -102,16 +104,10 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable initializing(vipStatusMode); switch (mode) { - case BOOTSTRAP_IN_SEPARATE_THREAD: - bootstrapExecutor = Optional.of(Executors.newSingleThreadExecutor(new DaemonThreadFactory("config server bootstrap"))); - bootstrapExecutor.get().execute(this); - break; case BOOTSTRAP_IN_CONSTRUCTOR: - bootstrapExecutor = Optional.empty(); start(); break; - case INITIALIZE_ONLY: - bootstrapExecutor = Optional.empty(); + case FOR_TESTING_NO_BOOTSTRAP_OF_APPS: break; default: throw new IllegalArgumentException("Unknown bootstrap mode " + mode + ", legal values: " + Arrays.toString(Mode.values())); @@ -125,7 +121,6 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable server.stop(); log.log(Level.FINE, "RPC server stopped"); rpcServerExecutor.shutdown(); - bootstrapExecutor.ifPresent(ExecutorService::shutdownNow); } @Override @@ -230,9 +225,9 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable private List<ApplicationId> redeployApplications(List<ApplicationId> applicationIds) throws InterruptedException { ExecutorService executor = Executors.newFixedThreadPool(configserverConfig.numRedeploymentThreads(), new DaemonThreadFactory("redeploy-apps-")); - // Keep track of deployment per application + // Keep track of deployment status per application Map<ApplicationId, Future<?>> deployments = new HashMap<>(); - log.log(Level.INFO, () -> "Redeploying " + applicationIds); + log.log(Level.INFO, () -> "Redeploying " + applicationIds.size() + " apps: " + applicationIds); applicationIds.forEach(appId -> deployments.put(appId, executor.submit(() -> { log.log(Level.INFO, () -> "Starting redeployment of " + appId); applicationRepository.deployFromLocalActive(appId, true /* bootstrap */) @@ -240,32 +235,65 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable log.log(Level.INFO, () -> appId + " redeployed"); }))); - List<ApplicationId> failedDeployments = - deployments.entrySet().stream() - .map(entry -> checkDeployment(entry.getKey(), entry.getValue())) - .filter(Optional::isPresent) - .map(Optional::get) - .collect(Collectors.toList()); + List<ApplicationId> failedDeployments = checkDeployments(deployments); executor.shutdown(); executor.awaitTermination(365, TimeUnit.DAYS); // Timeout should never happen + return failedDeployments; } - // Returns an application id if deployment failed - private Optional<ApplicationId> checkDeployment(ApplicationId applicationId, Future<?> future) { + private enum DeploymentStatus { inProgress, done, failed}; + + private List<ApplicationId> checkDeployments(Map<ApplicationId, Future<?>> deployments) { + int applicationCount = deployments.size(); + Set<ApplicationId> failedDeployments = new LinkedHashSet<>(); + Set<ApplicationId> finishedDeployments = new LinkedHashSet<>(); + Instant lastLogged = Instant.EPOCH; + + do { + deployments.forEach((applicationId, future) -> { + if (finishedDeployments.contains(applicationId) || failedDeployments.contains(applicationId)) return; + + DeploymentStatus status = getDeploymentStatus(applicationId, future); + switch (status) { + case done: + finishedDeployments.add(applicationId); + break; + case inProgress: + break; + case failed: + failedDeployments.add(applicationId); + break; + default: + throw new IllegalArgumentException("Unknown deployment status " + status); + } + }); + if ( ! Duration.between(lastLogged, Instant.now()).minus(Duration.ofSeconds(10)).isNegative()) { + log.log(Level.INFO, () -> finishedDeployments.size() + " of " + applicationCount + " apps redeployed " + + "(" + failedDeployments.size() + " failed)"); + lastLogged = Instant.now(); + } + } while (failedDeployments.size() + finishedDeployments.size() < applicationCount); + + return new ArrayList<>(failedDeployments); + } + + private DeploymentStatus getDeploymentStatus(ApplicationId applicationId, Future<?> future) { try { - future.get(); + future.get(1, TimeUnit.MILLISECONDS); + return DeploymentStatus.done; } catch (ExecutionException | InterruptedException e) { if (e.getCause() instanceof TransientException) { log.log(Level.INFO, "Redeploying " + applicationId + " failed with transient error, will retry after bootstrap: " + Exceptions.toMessageString(e)); } else { log.log(Level.WARNING, "Redeploying " + applicationId + " failed, will retry", e); - return Optional.of(applicationId); } + return DeploymentStatus.failed; + } catch (TimeoutException e) { + return DeploymentStatus.inProgress; } - return Optional.empty(); } } diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java index 9a08375887c..0df80c4d5c0 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java @@ -38,8 +38,6 @@ import java.util.List; import java.util.Optional; import java.util.function.BooleanSupplier; -import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.BOOTSTRAP_IN_SEPARATE_THREAD; -import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.INITIALIZE_ONLY; import static com.yahoo.vespa.config.server.ConfigServerBootstrap.VipStatusMode.VIP_STATUS_FILE; import static com.yahoo.vespa.config.server.ConfigServerBootstrap.VipStatusMode.VIP_STATUS_PROGRAMMATICALLY; import static com.yahoo.vespa.config.server.deploy.DeployTester.createHostedModelFactory; @@ -74,9 +72,8 @@ public class ConfigServerBootstrapTest { provisioner.allocations().values().iterator().next().remove(0); StateMonitor stateMonitor = StateMonitor.createForTesting(); VipStatus vipStatus = createVipStatus(stateMonitor); - ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, - versionState, stateMonitor, - vipStatus, INITIALIZE_ONLY, VIP_STATUS_PROGRAMMATICALLY); + ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, versionState, + stateMonitor, vipStatus, VIP_STATUS_PROGRAMMATICALLY); assertFalse(vipStatus.isInRotation()); bootstrap.start(); waitUntil(rpcServer::isRunning, "failed waiting for Rpc server running"); @@ -105,9 +102,8 @@ public class ConfigServerBootstrapTest { RpcServer rpcServer = createRpcServer(configserverConfig); StateMonitor stateMonitor = StateMonitor.createForTesting(); VipStatus vipStatus = createVipStatus(stateMonitor); - ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, - versionState, stateMonitor, - vipStatus, INITIALIZE_ONLY, VIP_STATUS_FILE); + ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, versionState, + stateMonitor, vipStatus, VIP_STATUS_FILE); assertTrue(vipStatus.isInRotation()); // default is in rotation when using status file bootstrap.start(); @@ -137,8 +133,7 @@ public class ConfigServerBootstrapTest { StateMonitor stateMonitor = StateMonitor.createForTesting(); VipStatus vipStatus = createVipStatus(stateMonitor); ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, versionState, - stateMonitor, - vipStatus, INITIALIZE_ONLY, VIP_STATUS_PROGRAMMATICALLY); + stateMonitor, vipStatus, VIP_STATUS_PROGRAMMATICALLY); assertFalse(vipStatus.isInRotation()); // Call method directly, to be sure that it is finished redeploying all applications and we can check status bootstrap.start(); @@ -181,8 +176,8 @@ public class ConfigServerBootstrapTest { StateMonitor stateMonitor = StateMonitor.createForTesting(); VipStatus vipStatus = createVipStatus(stateMonitor); ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, versionState, - stateMonitor, vipStatus, - BOOTSTRAP_IN_SEPARATE_THREAD, VIP_STATUS_PROGRAMMATICALLY); + stateMonitor, vipStatus, VIP_STATUS_PROGRAMMATICALLY); + bootstrap.start(); waitUntil(rpcServer::isRunning, "failed waiting for Rpc server running"); waitUntil(() -> bootstrap.status() == StateMonitor.Status.up, "failed waiting for status 'up'"); waitUntil(vipStatus::isInRotation, "failed waiting for server to be in rotation"); |