diff options
author | Valerij Fredriksen <valerij92@gmail.com> | 2019-06-29 14:28:32 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerij92@gmail.com> | 2019-06-29 14:28:32 +0200 |
commit | 6dcee1d4ed7999c81d098eeb8c299aa7441dd0a7 (patch) | |
tree | fa2dd3d60eaaacf6287998434203a134b3223275 /node-repository | |
parent | 35680fa4af2061d57c886624b422749219b77b52 (diff) |
Ignore TransientException in NodeFailer and RetiredExpirer
Diffstat (limited to 'node-repository')
2 files changed, 14 insertions, 2 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index b7e8395cc92..a7b750c4e46 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -5,7 +5,9 @@ import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Deployment; import com.yahoo.config.provision.HostLivenessTracker; import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.TransientException; import com.yahoo.jdisc.Metric; +import com.yahoo.log.LogLevel; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceInstance; @@ -21,6 +23,7 @@ import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.service.monitor.ServiceMonitor; +import com.yahoo.yolean.Exceptions; import java.time.Clock; import java.time.Duration; @@ -368,8 +371,11 @@ public class NodeFailer extends Maintainer { try { deployment.get().activate(); return true; - } - catch (RuntimeException e) { + } catch (TransientException e) { + log.log(LogLevel.INFO, "Failed to redeploy " + node.allocation().get().owner() + + " with a transient error, will be retried by application maintainer: " + Exceptions.toMessageString(e)); + return true; + } catch (RuntimeException e) { // The expected reason for deployment to fail here is that there is no capacity available to redeploy. // In that case we should leave the node in the active state to avoid failing additional nodes. nodeRepository().reactivate(node.hostname(), Agent.system, diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index c27989cb852..dea0b8c19d0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -4,12 +4,15 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Deployment; +import com.yahoo.config.provision.TransientException; +import com.yahoo.log.LogLevel; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.vespa.orchestrator.Orchestrator; +import com.yahoo.yolean.Exceptions; import java.time.Clock; import java.time.Duration; @@ -73,6 +76,9 @@ public class RetiredExpirer extends Maintainer { String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", ")); log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList); + } catch (TransientException e) { + log.log(LogLevel.INFO, "Failed to redeploy " + application + + " with a transient error, will be retried by application maintainer: " + Exceptions.toMessageString(e)); } catch (RuntimeException e) { String nodeList = retiredNodes.stream().map(Node::hostname).collect(Collectors.joining(", ")); log.log(Level.WARNING, "Exception trying to deactivate retired nodes from " + application |