summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2021-05-11 10:10:46 +0200
committerGitHub <noreply@github.com>2021-05-11 10:10:46 +0200
commit17d23bbc0506589ffa6058d08fea16a06e76bede (patch)
tree1c4a19710ec9e104a7306c695ee7e69cfa949f4f
parent1ccefefdc6ce348398692f17bc8539c24e3cb7fb (diff)
parente18a3708ef1c03f1910e5ee228064b1810da4502 (diff)
Merge pull request #17814 from vespa-engine/hmusum/cleanup-9
Log more info when bootstrapping config server
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java84
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java19
2 files changed, 63 insertions, 40 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java
index 62f3e40cb50..772c2bf5125 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java
@@ -16,22 +16,25 @@ import com.yahoo.yolean.Exceptions;
import java.time.Duration;
import java.time.Instant;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
-import java.util.Optional;
+import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
import java.util.logging.Level;
import java.util.logging.Logger;
-import java.util.stream.Collectors;
import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.BOOTSTRAP_IN_CONSTRUCTOR;
+import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.FOR_TESTING_NO_BOOTSTRAP_OF_APPS;
import static com.yahoo.vespa.config.server.ConfigServerBootstrap.RedeployingApplicationsFails.CONTINUE;
import static com.yahoo.vespa.config.server.ConfigServerBootstrap.RedeployingApplicationsFails.EXIT_JVM;
@@ -51,8 +54,7 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
private static final Logger log = Logger.getLogger(ConfigServerBootstrap.class.getName());
- // INITIALIZE_ONLY is for testing only
- enum Mode { BOOTSTRAP_IN_CONSTRUCTOR, BOOTSTRAP_IN_SEPARATE_THREAD, INITIALIZE_ONLY }
+ enum Mode { BOOTSTRAP_IN_CONSTRUCTOR, FOR_TESTING_NO_BOOTSTRAP_OF_APPS}
enum RedeployingApplicationsFails { EXIT_JVM, CONTINUE }
enum VipStatusMode { VIP_STATUS_FILE, VIP_STATUS_PROGRAMMATICALLY }
@@ -66,7 +68,6 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
private final Duration sleepTimeWhenRedeployingFails;
private final RedeployingApplicationsFails exitIfRedeployingApplicationsFails;
private final ExecutorService rpcServerExecutor;
- private final Optional<ExecutorService> bootstrapExecutor;
@Inject
public ConfigServerBootstrap(ApplicationRepository applicationRepository, RpcServer server,
@@ -79,8 +80,9 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
// For testing only
ConfigServerBootstrap(ApplicationRepository applicationRepository, RpcServer server, VersionState versionState,
- StateMonitor stateMonitor, VipStatus vipStatus, Mode mode, VipStatusMode vipStatusMode) {
- this(applicationRepository, server, versionState, stateMonitor, vipStatus, mode, CONTINUE, vipStatusMode);
+ StateMonitor stateMonitor, VipStatus vipStatus, VipStatusMode vipStatusMode) {
+ this(applicationRepository, server, versionState, stateMonitor, vipStatus,
+ FOR_TESTING_NO_BOOTSTRAP_OF_APPS, CONTINUE, vipStatusMode);
}
private ConfigServerBootstrap(ApplicationRepository applicationRepository, RpcServer server,
@@ -102,16 +104,10 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
initializing(vipStatusMode);
switch (mode) {
- case BOOTSTRAP_IN_SEPARATE_THREAD:
- bootstrapExecutor = Optional.of(Executors.newSingleThreadExecutor(new DaemonThreadFactory("config server bootstrap")));
- bootstrapExecutor.get().execute(this);
- break;
case BOOTSTRAP_IN_CONSTRUCTOR:
- bootstrapExecutor = Optional.empty();
start();
break;
- case INITIALIZE_ONLY:
- bootstrapExecutor = Optional.empty();
+ case FOR_TESTING_NO_BOOTSTRAP_OF_APPS:
break;
default:
throw new IllegalArgumentException("Unknown bootstrap mode " + mode + ", legal values: " + Arrays.toString(Mode.values()));
@@ -125,7 +121,6 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
server.stop();
log.log(Level.FINE, "RPC server stopped");
rpcServerExecutor.shutdown();
- bootstrapExecutor.ifPresent(ExecutorService::shutdownNow);
}
@Override
@@ -230,9 +225,9 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
private List<ApplicationId> redeployApplications(List<ApplicationId> applicationIds) throws InterruptedException {
ExecutorService executor = Executors.newFixedThreadPool(configserverConfig.numRedeploymentThreads(),
new DaemonThreadFactory("redeploy-apps-"));
- // Keep track of deployment per application
+ // Keep track of deployment status per application
Map<ApplicationId, Future<?>> deployments = new HashMap<>();
- log.log(Level.INFO, () -> "Redeploying " + applicationIds);
+ log.log(Level.INFO, () -> "Redeploying " + applicationIds.size() + " apps: " + applicationIds);
applicationIds.forEach(appId -> deployments.put(appId, executor.submit(() -> {
log.log(Level.INFO, () -> "Starting redeployment of " + appId);
applicationRepository.deployFromLocalActive(appId, true /* bootstrap */)
@@ -240,32 +235,65 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
log.log(Level.INFO, () -> appId + " redeployed");
})));
- List<ApplicationId> failedDeployments =
- deployments.entrySet().stream()
- .map(entry -> checkDeployment(entry.getKey(), entry.getValue()))
- .filter(Optional::isPresent)
- .map(Optional::get)
- .collect(Collectors.toList());
+ List<ApplicationId> failedDeployments = checkDeployments(deployments);
executor.shutdown();
executor.awaitTermination(365, TimeUnit.DAYS); // Timeout should never happen
+
return failedDeployments;
}
- // Returns an application id if deployment failed
- private Optional<ApplicationId> checkDeployment(ApplicationId applicationId, Future<?> future) {
+ private enum DeploymentStatus { inProgress, done, failed};
+
+ private List<ApplicationId> checkDeployments(Map<ApplicationId, Future<?>> deployments) {
+ int applicationCount = deployments.size();
+ Set<ApplicationId> failedDeployments = new LinkedHashSet<>();
+ Set<ApplicationId> finishedDeployments = new LinkedHashSet<>();
+ Instant lastLogged = Instant.EPOCH;
+
+ do {
+ deployments.forEach((applicationId, future) -> {
+ if (finishedDeployments.contains(applicationId) || failedDeployments.contains(applicationId)) return;
+
+ DeploymentStatus status = getDeploymentStatus(applicationId, future);
+ switch (status) {
+ case done:
+ finishedDeployments.add(applicationId);
+ break;
+ case inProgress:
+ break;
+ case failed:
+ failedDeployments.add(applicationId);
+ break;
+ default:
+ throw new IllegalArgumentException("Unknown deployment status " + status);
+ }
+ });
+ if ( ! Duration.between(lastLogged, Instant.now()).minus(Duration.ofSeconds(10)).isNegative()) {
+ log.log(Level.INFO, () -> finishedDeployments.size() + " of " + applicationCount + " apps redeployed " +
+ "(" + failedDeployments.size() + " failed)");
+ lastLogged = Instant.now();
+ }
+ } while (failedDeployments.size() + finishedDeployments.size() < applicationCount);
+
+ return new ArrayList<>(failedDeployments);
+ }
+
+ private DeploymentStatus getDeploymentStatus(ApplicationId applicationId, Future<?> future) {
try {
- future.get();
+ future.get(1, TimeUnit.MILLISECONDS);
+ return DeploymentStatus.done;
} catch (ExecutionException | InterruptedException e) {
if (e.getCause() instanceof TransientException) {
log.log(Level.INFO, "Redeploying " + applicationId +
" failed with transient error, will retry after bootstrap: " + Exceptions.toMessageString(e));
} else {
log.log(Level.WARNING, "Redeploying " + applicationId + " failed, will retry", e);
- return Optional.of(applicationId);
}
+ return DeploymentStatus.failed;
+ } catch (TimeoutException e) {
+ return DeploymentStatus.inProgress;
}
- return Optional.empty();
}
}
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java
index 9a08375887c..0df80c4d5c0 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java
@@ -38,8 +38,6 @@ import java.util.List;
import java.util.Optional;
import java.util.function.BooleanSupplier;
-import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.BOOTSTRAP_IN_SEPARATE_THREAD;
-import static com.yahoo.vespa.config.server.ConfigServerBootstrap.Mode.INITIALIZE_ONLY;
import static com.yahoo.vespa.config.server.ConfigServerBootstrap.VipStatusMode.VIP_STATUS_FILE;
import static com.yahoo.vespa.config.server.ConfigServerBootstrap.VipStatusMode.VIP_STATUS_PROGRAMMATICALLY;
import static com.yahoo.vespa.config.server.deploy.DeployTester.createHostedModelFactory;
@@ -74,9 +72,8 @@ public class ConfigServerBootstrapTest {
provisioner.allocations().values().iterator().next().remove(0);
StateMonitor stateMonitor = StateMonitor.createForTesting();
VipStatus vipStatus = createVipStatus(stateMonitor);
- ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer,
- versionState, stateMonitor,
- vipStatus, INITIALIZE_ONLY, VIP_STATUS_PROGRAMMATICALLY);
+ ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, versionState,
+ stateMonitor, vipStatus, VIP_STATUS_PROGRAMMATICALLY);
assertFalse(vipStatus.isInRotation());
bootstrap.start();
waitUntil(rpcServer::isRunning, "failed waiting for Rpc server running");
@@ -105,9 +102,8 @@ public class ConfigServerBootstrapTest {
RpcServer rpcServer = createRpcServer(configserverConfig);
StateMonitor stateMonitor = StateMonitor.createForTesting();
VipStatus vipStatus = createVipStatus(stateMonitor);
- ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer,
- versionState, stateMonitor,
- vipStatus, INITIALIZE_ONLY, VIP_STATUS_FILE);
+ ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, versionState,
+ stateMonitor, vipStatus, VIP_STATUS_FILE);
assertTrue(vipStatus.isInRotation()); // default is in rotation when using status file
bootstrap.start();
@@ -137,8 +133,7 @@ public class ConfigServerBootstrapTest {
StateMonitor stateMonitor = StateMonitor.createForTesting();
VipStatus vipStatus = createVipStatus(stateMonitor);
ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, versionState,
- stateMonitor,
- vipStatus, INITIALIZE_ONLY, VIP_STATUS_PROGRAMMATICALLY);
+ stateMonitor, vipStatus, VIP_STATUS_PROGRAMMATICALLY);
assertFalse(vipStatus.isInRotation());
// Call method directly, to be sure that it is finished redeploying all applications and we can check status
bootstrap.start();
@@ -181,8 +176,8 @@ public class ConfigServerBootstrapTest {
StateMonitor stateMonitor = StateMonitor.createForTesting();
VipStatus vipStatus = createVipStatus(stateMonitor);
ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, versionState,
- stateMonitor, vipStatus,
- BOOTSTRAP_IN_SEPARATE_THREAD, VIP_STATUS_PROGRAMMATICALLY);
+ stateMonitor, vipStatus, VIP_STATUS_PROGRAMMATICALLY);
+ bootstrap.start();
waitUntil(rpcServer::isRunning, "failed waiting for Rpc server running");
waitUntil(() -> bootstrap.status() == StateMonitor.Status.up, "failed waiting for status 'up'");
waitUntil(vipStatus::isInRotation, "failed waiting for server to be in rotation");