aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@yahooinc.com>2022-12-21 14:27:40 +0100
committerHåkon Hallingstad <hakon@yahooinc.com>2022-12-21 14:27:40 +0100
commit43e57094436eac53198f5f1f7933ee2d208c0a97 (patch)
tree00378bc9be8d8f560221ff3534ced09c46243733 /node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance
parent925370f4cfa32f1ace7e3ec70d7817277a1db00a (diff)
Reduce NodeFailer activate timeout
Diffstat (limited to 'node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java6
1 files changed, 4 insertions, 2 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index 203bb664c1c..84a45de39d7 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -1,6 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.concurrent.UncheckedTimeoutException;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Deployment;
import com.yahoo.config.provision.NodeType;
@@ -186,7 +187,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
*/
private boolean failActive(FailingNode failing) {
Optional<Deployment> deployment =
- deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(30));
+ deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(5));
if (deployment.isEmpty()) return false;
// If the active node that we are trying to fail is of type host, we need to successfully fail all
@@ -214,11 +215,12 @@ public class NodeFailer extends NodeRepositoryMaintainer {
}
if (activeChildrenToFail.isEmpty()) {
+ log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason);
wantToFail(failing.node(), true, lock);
try {
deployment.get().activate();
return true;
- } catch (TransientException e) {
+ } catch (TransientException | UncheckedTimeoutException e) {
log.log(Level.INFO, "Failed to redeploy " + failing.node().allocation().get().owner() +
" with a transient error, will be retried by application maintainer: " +
Exceptions.toMessageString(e));