diff options
Diffstat (limited to 'clustercontroller-core')
7 files changed, 50 insertions, 12 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index ed8e39347e5..2a07f9ac300 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -209,7 +209,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd public boolean isMaster() { synchronized (monitor) { - return masterElectionHandler.isMaster(); + return isMaster; } } @@ -387,7 +387,9 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd // Iff master, always store new version in ZooKeeper _before_ publishing to any // nodes so that a cluster controller crash after publishing but before a successful // ZK store will not risk reusing the same version number. - if (masterElectionHandler.isMaster()) { + // Use isMaster instead of election handler state, as isMaster is set _after_ we have + // completed a leadership event edge, so we know we have read from ZooKeeper. + if (isMaster) { storeClusterStateMetaDataToZooKeeper(stateBundle); } } @@ -439,7 +441,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd */ public void lostDatabaseConnection() { verifyInControllerThread(); - boolean wasMaster = masterElectionHandler.isMaster(); + boolean wasMaster = isMaster; masterElectionHandler.lostDatabaseConnection(); if (wasMaster) { // Enforce that we re-fetch all state information from ZooKeeper upon the next tick if we're still master. @@ -522,6 +524,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd masterElectionHandler.setFleetControllerCount(options.fleetControllerCount); masterElectionHandler.setMasterZooKeeperCooldownPeriod(options.masterZooKeeperCooldownPeriod); + masterElectionHandler.setUsingZooKeeper(options.zooKeeperServerAddress != null && !options.zooKeeperServerAddress.isEmpty()); if (rpcServer != null) { rpcServer.setMasterElectionHandler(masterElectionHandler); @@ -618,7 +621,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd if ( ! isRunning()) { return; } didWork |= systemStateBroadcaster.processResponses(); if ( ! isRunning()) { return; } - if (masterElectionHandler.isMaster()) { + if (isMaster) { didWork |= broadcastClusterStateToEligibleNodes(); systemStateBroadcaster.checkIfClusterStateIsAckedByAllDistributors(database, databaseContext, this); } @@ -776,7 +779,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd private boolean taskMayBeCompletedImmediately(RemoteClusterControllerTask task) { // We cannot introduce a version barrier for tasks when we're not the master (and therefore will not publish new versions). - return (!task.hasVersionAckDependency() || task.isFailed() || !masterElectionHandler.isMaster()); + return (!task.hasVersionAckDependency() || task.isFailed() || !isMaster); } private RemoteClusterControllerTask.Context createRemoteTaskProcessingContext() { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java index 6e968fef7ce..2c03520ec01 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java @@ -25,6 +25,7 @@ public class MasterElectionHandler implements MasterInterface { private Map<Integer, Integer> nextMasterData; private long masterGoneFromZooKeeperTime; // Set to time master fleet controller disappears from zookeeper private long masterZooKeeperCooldownPeriod; // The period in ms that we won't take over unless master come back. + private boolean usingZooKeeper = false; // Unit tests may not use ZooKeeper at all. public MasterElectionHandler(int index, int totalCount, Object monitor, Timer timer) { this.monitor = monitor; @@ -42,7 +43,7 @@ public class MasterElectionHandler implements MasterInterface { public void setFleetControllerCount(int count) { totalCount = count; - if (count == 1) { + if (count == 1 && !usingZooKeeper) { masterCandidate = 0; followers = 1; nextInLineCount = 0; @@ -53,6 +54,14 @@ public class MasterElectionHandler implements MasterInterface { masterZooKeeperCooldownPeriod = period; } + public void setUsingZooKeeper(boolean usingZK) { + if (!usingZooKeeper && usingZK) { + // Reset any shortcuts taken by non-ZK election logic. + resetElectionProgress(); + } + usingZooKeeper = usingZK; + } + @Override public boolean isMaster() { Integer master = getMaster(); @@ -111,7 +120,9 @@ public class MasterElectionHandler implements MasterInterface { public boolean watchMasterElection(DatabaseHandler database, DatabaseHandler.Context dbContext) throws InterruptedException { - if (totalCount == 1) return false; // No point in doing master election with only one node configured to be cluster controller + if (totalCount == 1 && !usingZooKeeper) { + return false; // Allow single configured node to become master implicitly if no ZK configured + } if (nextMasterData == null) { if (masterCandidate == null) { log.log(Level.FINEST, "Cluster controller " + index + ": No current master candidate. Waiting for data to do master election."); @@ -222,15 +233,19 @@ public class MasterElectionHandler implements MasterInterface { } public void lostDatabaseConnection() { - if (totalCount > 1) { + if (totalCount > 1 || usingZooKeeper) { log.log(Level.INFO, "Cluster controller " + index + ": Clearing master data as we lost connection on node " + index); - masterData = null; - masterCandidate = null; - followers = 0; - nextMasterData = null; + resetElectionProgress(); } } + private void resetElectionProgress() { + masterData = null; + masterCandidate = null; + followers = 0; + nextMasterData = null; + } + public void writeHtmlState(StringBuilder sb, int stateGatherCount) { sb.append("<h2>Master state</h2>\n"); Integer master = getMaster(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java index ceb81e91b7d..65b97a3ae82 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java @@ -12,4 +12,11 @@ public interface NodeLookup { boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener); + /** + * Returns whether the lookup instance has been able to bootstrap itself with information about nodes. + * + * Calling updateCluster() _before_ isReady has returned true may not provide any useful data. + */ + boolean isReady(); + } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java index 3f04bbd9200..d19425a7c95 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java @@ -91,6 +91,7 @@ public class DatabaseHandler { private long lastZooKeeperConnectionAttempt = 0; private static final int minimumWaitBetweenFailedConnectionAttempts = 10000; private boolean lostZooKeeperConnectionEvent = false; + private boolean connectionEstablishmentIsAllowed = false; private Map<Integer, Integer> masterDataEvent = null; public DatabaseHandler(DatabaseFactory databaseFactory, Timer timer, String zooKeeperAddress, int ourIndex, Object monitor) throws InterruptedException diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java index b3bb458ed74..8649e7cc11a 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java @@ -59,6 +59,7 @@ public class SlobrokClient implements NodeLookup { freshMirror = true; } + @Override public void shutdown() { if (supervisor != null) { supervisor.transport().shutdown().join(); @@ -67,6 +68,12 @@ public class SlobrokClient implements NodeLookup { public Mirror getMirror() { return mirror; } + @Override + public boolean isReady() { + return mirror != null && mirror.ready(); + } + + @Override public boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener) { if (mirror == null) return false; int mirrorVersion = mirror.updates(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java index 9c0a94309a5..f6add77423a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java @@ -10,6 +10,7 @@ import org.junit.Test; import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java index b322c62967a..d7bca47026f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java @@ -154,4 +154,8 @@ public class DummyCommunicator implements Communicator, NodeLookup { return false; } + @Override + public boolean isReady() { + return true; + } } |