summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2019-07-08 10:38:41 +0200
committerGitHub <noreply@github.com>2019-07-08 10:38:41 +0200
commit32bc35e1aaa884b9da3ce00154b127f6fb931939 (patch)
treea911c056d2638bfaeff160229520060c0de56f96
parentc5329679d1398949254b00321e80e4f607c1db1b (diff)
parente363dc115148782e882ae9789344f2fa4997c120 (diff)
Merge pull request #9979 from vespa-engine/vekterli/guard-state-version-publishing-with-pending-zk-writes
Do not allow states to be published when they have pending ZK writes
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java6
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java9
2 files changed, 15 insertions, 0 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index ba35243c14d..364184331a8 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -661,6 +661,12 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
}
private boolean broadcastClusterStateToEligibleNodes() {
+ // If there's a pending DB store we have not yet been able to store the
+ // current state bundle to ZK and must therefore _not_ allow it to be published.
+ if (database.hasPendingClusterStateMetaDataStore()) {
+ log.log(LogLevel.DEBUG, "Can't publish current cluster state as it has one or more pending ZooKeeper stores");
+ return false;
+ }
boolean sentAny = false;
// Give nodes a fair chance to respond first time to state gathering requests, so we don't
// disturb system when we take over. Allow anyways if we have states from all nodes.
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
index f2b1b523aba..f30b86130c2 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
@@ -352,6 +352,15 @@ public class DatabaseHandler {
doNextZooKeeperTask(context);
}
+ // TODO should we expand this to cover _any_ pending ZK write?
+ public boolean hasPendingClusterStateMetaDataStore() {
+ synchronized (databaseMonitor) {
+ return ((zooKeeperAddress != null) &&
+ ((pendingStore.clusterStateBundle != null) ||
+ (pendingStore.lastSystemStateVersion != null)));
+ }
+ }
+
public ClusterStateBundle getLatestClusterStateBundle() throws InterruptedException {
log.log(LogLevel.DEBUG, () -> String.format("Fleetcontroller %d: Retrieving latest cluster state bundle from ZooKeeper", nodeIndex));
synchronized (databaseMonitor) {