aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <freva@users.noreply.github.com>2022-01-19 22:59:12 +0100
committerGitHub <noreply@github.com>2022-01-19 22:59:12 +0100
commit15040f5d9099587e2a766032e73ff100c934ba8f (patch)
tree8f185a532ee61fa4f3506abb50c23b77802c9419
parentb76f5147a898570ff5944b29b18d9bc18a831bd4 (diff)
parent0db0da43c891e1869247f6c2738bbbd4fcee2835 (diff)
Merge pull request #20878 from vespa-engine/jonmv/deployment-orchestration-for-long-pipelinesv7.531.17
Let deployment run for some time before giving up nodes which are down
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/documentmodel/DocumentModel.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Run.java2
3 files changed, 12 insertions, 14 deletions
diff --git a/config-model/src/main/java/com/yahoo/vespa/documentmodel/DocumentModel.java b/config-model/src/main/java/com/yahoo/vespa/documentmodel/DocumentModel.java
index bea7c95d412..112385e1801 100644
--- a/config-model/src/main/java/com/yahoo/vespa/documentmodel/DocumentModel.java
+++ b/config-model/src/main/java/com/yahoo/vespa/documentmodel/DocumentModel.java
@@ -9,12 +9,12 @@ import com.yahoo.documentmodel.DocumentTypeRepo;
* It contains a search manager managing all specified search definitions.
* It contains a storage manager managing all specified storage definitions.
*
- * @author baldersheim
- * @since 2010-02-19
+ * @author baldersheim
*/
public class DocumentModel {
- private DocumentTypeRepo documentMan = new DocumentTypeRepo();
- private SearchManager searchMan = new SearchManager();
+
+ private final DocumentTypeRepo documentMan = new DocumentTypeRepo();
+ private final SearchManager searchMan = new SearchManager();
/**
*
@@ -26,6 +26,6 @@ public class DocumentModel {
*
* @return Returns the SearchManager
*/
- public SearchManager getSearchManager() { return searchMan; }
+ public SearchManager getSearchManager() { return searchMan; }
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
index 28f48bd1cab..7b1cc1fd84d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java
@@ -350,14 +350,12 @@ public class InternalStepRunner implements StepRunner {
String failureReason = null;
- NodeList suspendedTooLong = nodeList
- .isStateful()
- .suspendedSince(controller.clock().instant().minus(timeouts.statefulNodesDown()))
- .and(nodeList
- .not().isStateful()
- .suspendedSince(controller.clock().instant().minus(timeouts.statelessNodesDown()))
- );
- if ( ! suspendedTooLong.isEmpty()) {
+ NodeList suspendedTooLong = nodeList.isStateful()
+ .suspendedSince(controller.clock().instant().minus(timeouts.statefulNodesDown()))
+ .and(nodeList.not().isStateful()
+ .suspendedSince(controller.clock().instant().minus(timeouts.statelessNodesDown()))
+ );
+ if ( ! suspendedTooLong.isEmpty() && deployment.get().at().plus(timeouts.statelessNodesDown()).isBefore(controller.clock().instant())) {
failureReason = "Some nodes have been suspended for more than the allowed threshold:\n" +
suspendedTooLong.asList().stream().map(node -> node.node().hostname().value()).collect(joining("\n"));
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Run.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Run.java
index 2b9e3dd0733..4b38306f905 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Run.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/Run.java
@@ -210,7 +210,7 @@ public class Run {
return lastVespaLogTimestamp;
}
- /** Returns the timestamp of the last time no nodes were allowed to be down. */
+ /** Returns since when no nodes have been allowed to be down. */
public Optional<Instant> noNodesDownSince() {
return noNodesDownSince;
}