diff options
author | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-05-22 12:36:02 +0200 |
---|---|---|
committer | Tor Brede Vekterli <vekterli@yahoo-inc.com> | 2017-05-22 12:36:02 +0200 |
commit | 5912c3c2683c343ffc15146032ae0ebb04a811e8 (patch) | |
tree | dd8688306cbf4464f07523a9fa39bf802d1486df /clustercontroller-core | |
parent | acb59b485f4df8f3ced1fedb277532b8db17d743 (diff) |
Write to ZooKeeper must be timing invariant
Previously could risk that state transition grace period would elide
write to ZooKeeper if state changes happened within previous grace
period.
Diffstat (limited to 'clustercontroller-core')
2 files changed, 17 insertions, 4 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 2a9c1e0e9b3..9ea067db650 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -348,6 +348,23 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd newStates.add(state); metricUpdater.updateClusterStateMetrics(cluster, state); systemStateBroadcaster.handleNewSystemState(state); + // Iff master, always store new version in ZooKeeper _before_ publishing to any + // nodes so that a cluster controller crash after publishing but before a successful + // ZK store will not risk reusing the same version number. + if (masterElectionHandler.isMaster()) { + storeClusterStateVersionToZooKeeper(state); + } + } + + private void storeClusterStateVersionToZooKeeper(ClusterState state) { + try { + database.saveLatestSystemStateVersion(databaseContext, state.getVersion()); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + // Rethrow as RuntimeException to propagate exception up to main thread method. + // Don't want to hide failures to write cluster state version. + throw new RuntimeException("ZooKeeper write interrupted", e); + } } /** diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java index ec07a83c65e..33d8ad27738 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/SystemStateBroadcaster.java @@ -110,11 +110,7 @@ public class SystemStateBroadcaster { if (systemState == null) return false; List<NodeInfo> recipients = resolveStateVersionSendSet(dbContext); - // Store new version in ZooKeeper _before_ publishing to any nodes so that a - // cluster controller crash after publishing but before a successful ZK store - // will not risk reusing the same version number. if (!systemState.isOfficial()) { - database.saveLatestSystemStateVersion(dbContext, systemState.getVersion()); systemState.setOfficial(true); } |