aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorValerij Fredriksen <valerij92@gmail.com>2019-06-29 14:28:32 +0200
committerValerij Fredriksen <valerij92@gmail.com>2019-06-29 14:28:32 +0200
commit6dcee1d4ed7999c81d098eeb8c299aa7441dd0a7 (patch)
treefa2dd3d60eaaacf6287998434203a134b3223275 /node-repository
parent35680fa4af2061d57c886624b422749219b77b52 (diff)
Ignore TransientException in NodeFailer and RetiredExpirer
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java10
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java6
2 files changed, 14 insertions, 2 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index b7e8395cc92..a7b750c4e46 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -5,7 +5,9 @@ import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Deployment;
import com.yahoo.config.provision.HostLivenessTracker;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.TransientException;
import com.yahoo.jdisc.Metric;
+import com.yahoo.log.LogLevel;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
@@ -21,6 +23,7 @@ import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
import com.yahoo.vespa.orchestrator.status.HostStatus;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
+import com.yahoo.yolean.Exceptions;
import java.time.Clock;
import java.time.Duration;
@@ -368,8 +371,11 @@ public class NodeFailer extends Maintainer {
try {
deployment.get().activate();
return true;
- }
- catch (RuntimeException e) {
+ } catch (TransientException e) {
+ log.log(LogLevel.INFO, "Failed to redeploy " + node.allocation().get().owner() +
+ " with a transient error, will be retried by application maintainer: " + Exceptions.toMessageString(e));
+ return true;
+ } catch (RuntimeException e) {
// The expected reason for deployment to fail here is that there is no capacity available to redeploy.
// In that case we should leave the node in the active state to avoid failing additional nodes.
nodeRepository().reactivate(node.hostname(), Agent.system,
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
index c27989cb852..dea0b8c19d0 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
@@ -4,12 +4,15 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Deployment;
+import com.yahoo.config.provision.TransientException;
+import com.yahoo.log.LogLevel;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.Orchestrator;
+import com.yahoo.yolean.Exceptions;
import java.time.Clock;
import java.time.Duration;
@@ -73,6 +76,9 @@ public class RetiredExpirer extends Maintainer {
String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", "));
log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList);
+ } catch (TransientException e) {
+ log.log(LogLevel.INFO, "Failed to redeploy " + application +
+ " with a transient error, will be retried by application maintainer: " + Exceptions.toMessageString(e));
} catch (RuntimeException e) {
String nodeList = retiredNodes.stream().map(Node::hostname).collect(Collectors.joining(", "));
log.log(Level.WARNING, "Exception trying to deactivate retired nodes from " + application