summaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@yahoo-inc.com>2017-05-22 12:36:02 +0200
committerTor Brede Vekterli <vekterli@yahoo-inc.com>2017-05-22 12:36:02 +0200
commit5912c3c2683c343ffc15146032ae0ebb04a811e8 (patch)
treedd8688306cbf4464f07523a9fa39bf802d1486df /clustercontroller-core
parentacb59b485f4df8f3ced1fedb277532b8db17d743 (diff)
Write to ZooKeeper must be timing invariant
Previously could risk that state transition grace period would elide write to ZooKeeper if state changes happened within previous grace period.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java17
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java4
2 files changed, 17 insertions, 4 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 2a9c1e0e9b3..9ea067db650 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -348,6 +348,23 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
newStates.add(state);
metricUpdater.updateClusterStateMetrics(cluster, state);
systemStateBroadcaster.handleNewSystemState(state);
+ // Iff master, always store new version in ZooKeeper _before_ publishing to any
+ // nodes so that a cluster controller crash after publishing but before a successful
+ // ZK store will not risk reusing the same version number.
+ if (masterElectionHandler.isMaster()) {
+ storeClusterStateVersionToZooKeeper(state);
+ }
+ }
+
+ private void storeClusterStateVersionToZooKeeper(ClusterState state) {
+ try {
+ database.saveLatestSystemStateVersion(databaseContext, state.getVersion());
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ // Rethrow as RuntimeException to propagate exception up to main thread method.
+ // Don't want to hide failures to write cluster state version.
+ throw new RuntimeException("ZooKeeper write interrupted", e);
+ }
}
/**
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java
index ec07a83c65e..33d8ad27738 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java
@@ -110,11 +110,7 @@ public class SystemStateBroadcaster {
if (systemState == null) return false;
List<NodeInfo> recipients = resolveStateVersionSendSet(dbContext);
- // Store new version in ZooKeeper _before_ publishing to any nodes so that a
- // cluster controller crash after publishing but before a successful ZK store
- // will not risk reusing the same version number.
if (!systemState.isOfficial()) {
- database.saveLatestSystemStateVersion(dbContext, systemState.getVersion());
systemState.setOfficial(true);
}