aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2021-03-08 13:11:46 +0100
committerTor Brede Vekterli <vekterli@verizonmedia.com>2021-03-08 14:37:20 +0100
commitaf8915becf15db9d2c2c1dbb3408e89d54dd074f (patch)
treed42e517235f0bbfdc3f8c753bc643b6f1cc2e6b3 /clustercontroller-core
parentb583805ecb68e347776097b6f4ac3622b4d53004 (diff)
Better handling of ZK connectivity issues concurrent with elections
Adds the following safeguards/improvements: - Do not clear pending (non-persisted) writes over a `connect()` edge. Avoids having the controller eternally wait for a doomed pending write to be completed when it has no other events that can trigger a new write. - Trigger `lostDatabaseConnection()` whenever ZK is reconfigured to ensure we reload the newest state before trying to compute/publish any new states. - Explicitly drop leadership in `lostDatabaseConnection()` to immediately prevent controller from trying any funny leader-related business since it no longer can depend on ZK watches triggering. - When falling back to default state/cluster bundle, ensure that any subsequent dependent znode write is predicated on the pre-existing znode version being 0, i.e. did not previously exist.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java30
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java51
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java6
3 files changed, 59 insertions, 28 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index e8222d73a63..bafd7168f03 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -284,7 +284,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
}
log.log(Level.INFO, "Fleetcontroller done shutting down event thread.");
controllerThreadId = Thread.currentThread().getId();
- database.shutdown(this);
+ database.shutdown(databaseContext);
if (statusPageServer != null) {
statusPageServer.shutdown();
@@ -436,7 +436,13 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
*/
public void lostDatabaseConnection() {
verifyInControllerThread();
+ boolean wasMaster = masterElectionHandler.isMaster();
masterElectionHandler.lostDatabaseConnection();
+ if (wasMaster) {
+ // Enforce that we re-fetch all state information from ZooKeeper upon the next tick if we're still master.
+ dropLeadershipState();
+ metricUpdater.updateMasterState(false);
+ }
}
private void failAllVersionDependentTasks() {
@@ -501,8 +507,8 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
cluster.setPollingFrequency(options.statePollingFrequency);
cluster.setDistribution(options.storageDistribution);
cluster.setNodes(options.nodes);
- database.setZooKeeperAddress(options.zooKeeperServerAddress);
- database.setZooKeeperSessionTimeout(options.zooKeeperSessionTimeout);
+ database.setZooKeeperAddress(options.zooKeeperServerAddress, databaseContext);
+ database.setZooKeeperSessionTimeout(options.zooKeeperSessionTimeout, databaseContext);
stateGatherer.setMaxSlobrokDisconnectGracePeriod(options.maxSlobrokDisconnectGracePeriod);
stateGatherer.setNodeStateRequestTimeout(options.nodeStateRequestTimeoutMS);
@@ -1066,18 +1072,22 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
wantedStateChanged = false;
}
} else {
- if (isMaster) {
- eventLog.add(new ClusterEvent(ClusterEvent.Type.MASTER_ELECTION, "This node is no longer fleetcontroller master.", timer.getCurrentTimeInMillis()));
- firstAllowedStateBroadcast = Long.MAX_VALUE;
- failAllVersionDependentTasks();
- }
- wantedStateChanged = false;
- isMaster = false;
+ dropLeadershipState();
}
metricUpdater.updateMasterState(isMaster);
return didWork;
}
+ private void dropLeadershipState() {
+ if (isMaster) {
+ eventLog.add(new ClusterEvent(ClusterEvent.Type.MASTER_ELECTION, "This node is no longer fleetcontroller master.", timer.getCurrentTimeInMillis()));
+ firstAllowedStateBroadcast = Long.MAX_VALUE;
+ failAllVersionDependentTasks();
+ }
+ wantedStateChanged = false;
+ isMaster = false;
+ }
+
public void run() {
controllerThreadId = Thread.currentThread().getId();
try {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
index 5d5bb674d4f..94a76763f34 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
@@ -43,11 +43,15 @@ public class DatabaseHandler {
ClusterStateBundle clusterStateBundle;
void clear() {
- masterVote = null;
+ clearNonClusterStateFields();
lastSystemStateVersion = null;
+ clusterStateBundle = null;
+ }
+
+ void clearNonClusterStateFields() {
+ masterVote = null;
wantedStates = null;
startTimestamps = null;
- clusterStateBundle = null;
}
}
private class DatabaseListener implements Database.DatabaseListener {
@@ -96,7 +100,7 @@ public class DatabaseHandler {
this.nodeIndex = ourIndex;
pendingStore.masterVote = ourIndex; // To begin with we'll vote for ourselves.
this.monitor = monitor;
- // TODO: Require non-null, not possible now since at least ClusterFeedBlockTest usese null address
+ // TODO: Require non-null, not possible now since at least ClusterFeedBlockTest uses null address
this.zooKeeperAddress = zooKeeperAddress;
}
@@ -106,8 +110,8 @@ public class DatabaseHandler {
}
}
- public void shutdown(FleetController fleetController) {
- relinquishDatabaseConnectivity(fleetController);
+ public void shutdown(Context context) {
+ relinquishDatabaseConnectivity(context);
}
public boolean isClosed() { return database == null || database.isClosed(); }
@@ -116,7 +120,7 @@ public class DatabaseHandler {
return lastKnownStateBundleVersionWrittenBySelf;
}
- public void reset() {
+ public void reset(Context context) {
final boolean wasRunning;
synchronized (databaseMonitor) {
wasRunning = database != null;
@@ -126,37 +130,46 @@ public class DatabaseHandler {
database = null;
}
}
- clearSessionMetaData();
+ clearSessionMetaData(true);
+ context.getFleetController().lostDatabaseConnection();
if (wasRunning) {
log.log(Level.INFO, "Fleetcontroller " + nodeIndex + ": Done resetting database state");
}
}
- private void clearSessionMetaData() {
+ private void clearSessionMetaData(boolean clearPendingStateWrites) {
// Preserve who we want to vote for
Integer currentVote = (pendingStore.masterVote != null ? pendingStore.masterVote : currentlyStored.masterVote);
currentlyStored.clear();
- pendingStore.clear();
+ if (clearPendingStateWrites) {
+ pendingStore.clear();
+ } else {
+ // If we have pending cluster state writes we cannot drop these on the floor, as otherwise the
+ // core CC logic may keep thinking it has persisted writes it really has not. Clearing pending
+ // state writes woudl also prevent the controller from detecting itself being out of sync by
+ // triggering CaS violations upon znode writes.
+ pendingStore.clearNonClusterStateFields();
+ }
pendingStore.masterVote = currentVote;
log.log(Level.FINE, "Cleared session metadata. Pending master vote is now " + pendingStore.masterVote);
}
- public void setZooKeeperAddress(String address) {
+ public void setZooKeeperAddress(String address, Context context) {
if (address == null && zooKeeperAddress == null) return;
if (address != null && address.equals(zooKeeperAddress)) return;
if (zooKeeperAddress != null) {
log.log(Level.INFO, "Fleetcontroller " + nodeIndex + ": " + (address == null ? "Stopped using ZooKeeper." : "Got new ZooKeeper address to use: " + address));
}
zooKeeperAddress = address;
- reset();
+ reset(context);
}
- public void setZooKeeperSessionTimeout(int timeout) {
+ public void setZooKeeperSessionTimeout(int timeout, Context context) {
if (timeout == zooKeeperSessionTimeout) return;
log.log(Level.FINE, "Fleetcontroller " + nodeIndex + ": Got new ZooKeeper session timeout of " + timeout + " milliseconds.");
zooKeeperSessionTimeout = timeout;
- reset();
+ reset(context);
}
private boolean usingZooKeeper() { return (zooKeeperAddress != null); }
@@ -169,7 +182,9 @@ public class DatabaseHandler {
database.close();
}
// We still hold the database lock while calling this, we want to block callers.
- clearSessionMetaData();
+ // Don't clear pending state writes in case they were attempted prior to connect()
+ // being called, but after receiving a database loss event.
+ clearSessionMetaData(false);
log.log(Level.INFO,
"Fleetcontroller " + nodeIndex + ": Setting up new ZooKeeper session at " + zooKeeperAddress);
DatabaseFactory.Params params = new DatabaseFactory.Params()
@@ -247,7 +262,7 @@ public class DatabaseHandler {
"has likely taken over ownership: %s", e.getMessage()));
// Clear DB and master election state. This shall trigger a full re-fetch of all
// version and election-related metadata.
- relinquishDatabaseConnectivity(context.getFleetController());
+ relinquishDatabaseConnectivity(context);
}
return didWork;
}
@@ -257,9 +272,9 @@ public class DatabaseHandler {
return zooKeeperAddress != null;
}
- private void relinquishDatabaseConnectivity(FleetController fleetController) {
- reset();
- fleetController.lostDatabaseConnection();
+ private void relinquishDatabaseConnectivity(Context context) {
+ // reset() will handle both session clearing and trigger a database loss callback into the CC.
+ reset(context);
}
private boolean performZooKeeperWrites() throws InterruptedException {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java
index 60835d5bb4f..4cdbb49dedc 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/ZooKeeperDatabase.java
@@ -238,6 +238,9 @@ public class ZooKeeperDatabase extends Database {
} catch (InterruptedException e) {
throw (InterruptedException) new InterruptedException("Interrupted").initCause(e);
} catch (Exception e) {
+ // If we return a default, empty version, writes dependent on this bundle should only
+ // succeed if the previous znode version is 0, i.e. not yet created.
+ lastKnownStateVersionZNodeVersion = 0;
maybeLogExceptionWarning(e, "Failed to retrieve latest system state version used. Returning null");
return null;
}
@@ -390,6 +393,9 @@ public class ZooKeeperDatabase extends Database {
maybeLogExceptionWarning(e, "Failed to retrieve last published cluster state bundle from " +
"ZooKeeper, will use an empty state as baseline");
}
+ // If we return a default, empty bundle, writes dependent on this bundle should only
+ // succeed if the previous znode version is 0, i.e. not yet created.
+ lastKnownStateBundleZNodeVersion = 0;
return ClusterStateBundle.ofBaselineOnly(AnnotatedClusterState.emptyState());
}