summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@oath.com>2018-06-22 09:50:47 +0200
committerHarald Musum <musum@oath.com>2018-06-22 09:50:47 +0200
commit77df3ad739ba6eead6d5b8417c2974c1a00d566d (patch)
tree996f2e33c6e53cd4f7edb7ecf244b56968c6dd76
parentea9a1eea042617764dfe474cf161e422d1123338 (diff)
Retry redeployment of applications if that fails
Retry redeployment of applications if they fail when bootstrapping, do System.exit if if takes longer than a configured amount of time
-rw-r--r--configdefinitions/src/vespa/configserver.def3
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java46
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java24
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java7
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/deploy/DeployTester.java2
-rw-r--r--configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java1
6 files changed, 63 insertions, 20 deletions
diff --git a/configdefinitions/src/vespa/configserver.def b/configdefinitions/src/vespa/configserver.def
index bf4c9599f4a..228a5c6fb4f 100644
--- a/configdefinitions/src/vespa/configserver.def
+++ b/configdefinitions/src/vespa/configserver.def
@@ -58,3 +58,6 @@ nodeAdminInContainer bool default=true
maintainerIntervalMinutes int default=30
# TODO: Default set to a high value (1 year) => maintainer will not run, change when maintainer verified out in prod
tenantsMaintainerIntervalMinutes int default=525600
+
+# How long bootstrapping can take before giving up (in seconds)
+maxDurationOfBootstrap long default=7200
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java
index 821162353d6..ab7702e26d1 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java
@@ -110,16 +110,16 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye
public ApplicationRepository(TenantRepository tenantRepository,
Provisioner hostProvisioner,
Clock clock) {
- this(tenantRepository, new ConfigConvergenceChecker(), hostProvisioner, clock);
+ this(tenantRepository, hostProvisioner, clock, new ConfigserverConfig(new ConfigserverConfig.Builder()));
}
+ // For testing
public ApplicationRepository(TenantRepository tenantRepository,
- ConfigConvergenceChecker convergenceChecker,
Provisioner hostProvisioner,
- Clock clock) {
- this(tenantRepository, Optional.of(hostProvisioner),
- convergenceChecker, new HttpProxy(new SimpleHttpFetcher()),
- new ConfigserverConfig(new ConfigserverConfig.Builder()), clock, new FileDistributionStatus());
+ Clock clock,
+ ConfigserverConfig configserverConfig) {
+ this(tenantRepository, Optional.of(hostProvisioner), new ConfigConvergenceChecker(), new HttpProxy(new SimpleHttpFetcher()),
+ configserverConfig, clock, new FileDistributionStatus());
}
private ApplicationRepository(TenantRepository tenantRepository,
@@ -492,6 +492,10 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye
return getLocalSession(tenant, sessionId).getMetaData();
}
+ ConfigserverConfig configserverConfig() {
+ return configserverConfig;
+ }
+
private void validateThatLocalSessionIsNotActive(Tenant tenant, long sessionId) {
LocalSession session = getLocalSession(tenant, sessionId);
if (Session.Status.ACTIVATE.equals(session.getStatus())) {
@@ -557,24 +561,42 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye
}
}
- void redeployAllApplications() throws InterruptedException {
+ boolean redeployAllApplications(Duration maxDuration) throws InterruptedException {
+ Instant end = Instant.now().plus(maxDuration);
+ Set<ApplicationId> applicationIds = listApplications();
+ do {
+ applicationIds = redeployApplications(applicationIds);
+ } while ( ! applicationIds.isEmpty() && Instant.now().isBefore(end));
+
+ if ( ! applicationIds.isEmpty()) {
+ log.log(LogLevel.ERROR, "Redeploying applications not finished after " + maxDuration +
+ ", exiting, applications that failed redeployment: " + applicationIds);
+ return false;
+ }
+ return true;
+ }
+
+ // Returns the set of applications that failed to redeploy
+ private Set<ApplicationId> redeployApplications(Set<ApplicationId> applicationIds) throws InterruptedException {
ExecutorService executor = Executors.newFixedThreadPool(configserverConfig.numParallelTenantLoaders(),
new DaemonThreadFactory("redeploy apps"));
// Keep track of deployment per application
Map<ApplicationId, Future<?>> futures = new HashMap<>();
- tenantRepository.getAllTenants()
- .forEach(tenant -> listApplicationIds(tenant)
- .forEach(appId -> deployFromLocalActive(appId).ifPresent(
- deployment -> futures.put(appId,executor.submit(deployment::activate)))));
+ Set<ApplicationId> failedDeployments = new HashSet<>();
+ applicationIds.forEach(appId -> deployFromLocalActive(appId).ifPresent(
+ deployment -> futures.put(appId, executor.submit(deployment::activate))));
for (Map.Entry<ApplicationId, Future<?>> f : futures.entrySet()) {
try {
f.getValue().get();
} catch (ExecutionException e) {
- throw new RuntimeException("Redeploying of " + f.getKey() + " failed", e);
+ ApplicationId app = f.getKey();
+ log.log(LogLevel.WARNING, "Redeploying " + app + " failed, will retry");
+ failedDeployments.add(app);
}
}
executor.shutdown();
executor.awaitTermination(365, TimeUnit.DAYS); // Timeout should never happen
+ return failedDeployments;
}
private LocalSession getExistingSession(Tenant tenant, ApplicationId applicationId) {
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java
index 916fde97e35..6e6f60b29dd 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ConfigServerBootstrap.java
@@ -31,12 +31,17 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
private static final ExecutorService rpcServerExecutor = Executors.newSingleThreadExecutor(new DaemonThreadFactory("config server RPC server"));
private static final String vipStatusClusterIdentifier = "configserver";
+ enum MainThread {START, DO_NOT_START}
+ enum RedeployingApplicationsFails {EXIT_JVM, CONTINUE}
+
private final ApplicationRepository applicationRepository;
private final RpcServer server;
private final Thread serverThread;
private final VersionState versionState;
private final StateMonitor stateMonitor;
private final VipStatus vipStatus;
+ private final Duration maxDurationOfRedeployment;
+ private final RedeployingApplicationsFails exitIfRedeployingApplicationsFails;
// The tenants object is injected so that all initial requests handlers are
// added to the rpc server before it starts answering rpc requests.
@@ -44,20 +49,23 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
@Inject
public ConfigServerBootstrap(ApplicationRepository applicationRepository, RpcServer server,
VersionState versionState, StateMonitor stateMonitor, VipStatus vipStatus) {
- this(applicationRepository, server, versionState, stateMonitor, vipStatus, true);
+ this(applicationRepository, server, versionState, stateMonitor, vipStatus, MainThread.START, RedeployingApplicationsFails.EXIT_JVM);
}
// For testing only
ConfigServerBootstrap(ApplicationRepository applicationRepository, RpcServer server, VersionState versionState,
- StateMonitor stateMonitor, VipStatus vipStatus, boolean startMainThread) {
+ StateMonitor stateMonitor, VipStatus vipStatus, MainThread mainThread,
+ RedeployingApplicationsFails exitIfRedeployingApplicationsFails) {
this.applicationRepository = applicationRepository;
this.server = server;
this.versionState = versionState;
this.stateMonitor = stateMonitor;
this.serverThread = new Thread(this, "configserver main");
this.vipStatus = vipStatus;
+ this.maxDurationOfRedeployment = Duration.ofSeconds(applicationRepository.configserverConfig().maxDurationOfBootstrap());
+ this.exitIfRedeployingApplicationsFails = exitIfRedeployingApplicationsFails;
initializing(); // Initially take server out of rotation
- if (startMainThread)
+ if (mainThread == MainThread.START)
start();
}
@@ -80,11 +88,15 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
log.log(LogLevel.INFO, "Configserver upgrading from " + versionState.storedVersion() + " to "
+ versionState.currentVersion() + ". Redeploying all applications");
try {
- applicationRepository.redeployAllApplications();
+ if ( ! applicationRepository.redeployAllApplications(maxDurationOfRedeployment)) {
+ redeployingApplicationsFailed();
+ return; // Status will not be set to 'up' since we return here
+ }
versionState.saveNewVersion();
log.log(LogLevel.INFO, "All applications redeployed successfully");
} catch (Exception e) {
log.log(LogLevel.ERROR, "Redeployment of applications failed", e);
+ redeployingApplicationsFailed();
return; // Status will not be set to 'up' since we return here
}
}
@@ -144,5 +156,9 @@ public class ConfigServerBootstrap extends AbstractComponent implements Runnable
log.log(LogLevel.INFO, "RPC server started");
}
+ private void redeployingApplicationsFailed() {
+ if (exitIfRedeployingApplicationsFails == RedeployingApplicationsFails.EXIT_JVM) System.exit(1);
+ }
+
}
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java
index 992d46d3115..082c3058598 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ConfigServerBootstrapTest.java
@@ -76,7 +76,9 @@ public class ConfigServerBootstrapTest {
RpcServer rpcServer = createRpcServer(configserverConfig);
VipStatus vipStatus = new VipStatus();
ConfigServerBootstrap bootstrap = new ConfigServerBootstrap(tester.applicationRepository(), rpcServer, versionState,
- createStateMonitor(), vipStatus, false /* do not call run method */);
+ createStateMonitor(), vipStatus,
+ ConfigServerBootstrap.MainThread.DO_NOT_START,
+ ConfigServerBootstrap.RedeployingApplicationsFails.CONTINUE);
assertFalse(vipStatus.isInRotation());
// Call method directly, to be sure that it is finished redeploying all applications and we can check status
bootstrap.run();
@@ -112,7 +114,8 @@ public class ConfigServerBootstrapTest {
.configServerDBDir(temporaryFolder.newFolder("serverdb").getAbsolutePath())
.configDefinitionsDir(temporaryFolder.newFolder("configdefinitions").getAbsolutePath())
.hostedVespa(true)
- .multitenant(true));
+ .multitenant(true)
+ .maxDurationOfBootstrap(1) /* seconds */);
}
public static class MockRpc extends com.yahoo.vespa.config.server.rpc.MockRpc {
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/DeployTester.java b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/DeployTester.java
index ce53451ae2e..a4f5679aa39 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/DeployTester.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/deploy/DeployTester.java
@@ -112,7 +112,7 @@ public class DeployTester {
catch (Exception e) {
throw new IllegalArgumentException(e);
}
- applicationRepository = new ApplicationRepository(tenantRepository, new ProvisionerAdapter(provisioner), clock);
+ applicationRepository = new ApplicationRepository(tenantRepository, new ProvisionerAdapter(provisioner), clock, configserverConfig);
}
public Tenant tenant() {
diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java
index d8c5e33ca65..6680907e1c0 100644
--- a/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java
+++ b/configserver/src/test/java/com/yahoo/vespa/config/server/http/v2/ApplicationHandlerTest.java
@@ -69,7 +69,6 @@ public class ApplicationHandlerTest {
tenantRepository.addTenant(TenantBuilder.create(componentRegistry, foobar));
provisioner = new SessionHandlerTest.MockProvisioner();
applicationRepository = new ApplicationRepository(tenantRepository,
- new ConfigConvergenceChecker(stateApiFactory),
provisioner, Clock.systemUTC());
listApplicationsHandler = new ListApplicationsHandler(ListApplicationsHandler.testOnlyContext(),
tenantRepository,