aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java13
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java29
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java7
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java1
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java7
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java1
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java4
7 files changed, 50 insertions, 12 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index ed8e39347e5..2a07f9ac300 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -209,7 +209,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
public boolean isMaster() {
synchronized (monitor) {
- return masterElectionHandler.isMaster();
+ return isMaster;
}
}
@@ -387,7 +387,9 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
// Iff master, always store new version in ZooKeeper _before_ publishing to any
// nodes so that a cluster controller crash after publishing but before a successful
// ZK store will not risk reusing the same version number.
- if (masterElectionHandler.isMaster()) {
+ // Use isMaster instead of election handler state, as isMaster is set _after_ we have
+ // completed a leadership event edge, so we know we have read from ZooKeeper.
+ if (isMaster) {
storeClusterStateMetaDataToZooKeeper(stateBundle);
}
}
@@ -439,7 +441,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
*/
public void lostDatabaseConnection() {
verifyInControllerThread();
- boolean wasMaster = masterElectionHandler.isMaster();
+ boolean wasMaster = isMaster;
masterElectionHandler.lostDatabaseConnection();
if (wasMaster) {
// Enforce that we re-fetch all state information from ZooKeeper upon the next tick if we're still master.
@@ -522,6 +524,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
masterElectionHandler.setFleetControllerCount(options.fleetControllerCount);
masterElectionHandler.setMasterZooKeeperCooldownPeriod(options.masterZooKeeperCooldownPeriod);
+ masterElectionHandler.setUsingZooKeeper(options.zooKeeperServerAddress != null && !options.zooKeeperServerAddress.isEmpty());
if (rpcServer != null) {
rpcServer.setMasterElectionHandler(masterElectionHandler);
@@ -618,7 +621,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
if ( ! isRunning()) { return; }
didWork |= systemStateBroadcaster.processResponses();
if ( ! isRunning()) { return; }
- if (masterElectionHandler.isMaster()) {
+ if (isMaster) {
didWork |= broadcastClusterStateToEligibleNodes();
systemStateBroadcaster.checkIfClusterStateIsAckedByAllDistributors(database, databaseContext, this);
}
@@ -776,7 +779,7 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
private boolean taskMayBeCompletedImmediately(RemoteClusterControllerTask task) {
// We cannot introduce a version barrier for tasks when we're not the master (and therefore will not publish new versions).
- return (!task.hasVersionAckDependency() || task.isFailed() || !masterElectionHandler.isMaster());
+ return (!task.hasVersionAckDependency() || task.isFailed() || !isMaster);
}
private RemoteClusterControllerTask.Context createRemoteTaskProcessingContext() {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
index 6e968fef7ce..2c03520ec01 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
@@ -25,6 +25,7 @@ public class MasterElectionHandler implements MasterInterface {
private Map<Integer, Integer> nextMasterData;
private long masterGoneFromZooKeeperTime; // Set to time master fleet controller disappears from zookeeper
private long masterZooKeeperCooldownPeriod; // The period in ms that we won't take over unless master come back.
+ private boolean usingZooKeeper = false; // Unit tests may not use ZooKeeper at all.
public MasterElectionHandler(int index, int totalCount, Object monitor, Timer timer) {
this.monitor = monitor;
@@ -42,7 +43,7 @@ public class MasterElectionHandler implements MasterInterface {
public void setFleetControllerCount(int count) {
totalCount = count;
- if (count == 1) {
+ if (count == 1 && !usingZooKeeper) {
masterCandidate = 0;
followers = 1;
nextInLineCount = 0;
@@ -53,6 +54,14 @@ public class MasterElectionHandler implements MasterInterface {
masterZooKeeperCooldownPeriod = period;
}
+ public void setUsingZooKeeper(boolean usingZK) {
+ if (!usingZooKeeper && usingZK) {
+ // Reset any shortcuts taken by non-ZK election logic.
+ resetElectionProgress();
+ }
+ usingZooKeeper = usingZK;
+ }
+
@Override
public boolean isMaster() {
Integer master = getMaster();
@@ -111,7 +120,9 @@ public class MasterElectionHandler implements MasterInterface {
public boolean watchMasterElection(DatabaseHandler database,
DatabaseHandler.Context dbContext) throws InterruptedException {
- if (totalCount == 1) return false; // No point in doing master election with only one node configured to be cluster controller
+ if (totalCount == 1 && !usingZooKeeper) {
+ return false; // Allow single configured node to become master implicitly if no ZK configured
+ }
if (nextMasterData == null) {
if (masterCandidate == null) {
log.log(Level.FINEST, "Cluster controller " + index + ": No current master candidate. Waiting for data to do master election.");
@@ -222,15 +233,19 @@ public class MasterElectionHandler implements MasterInterface {
}
public void lostDatabaseConnection() {
- if (totalCount > 1) {
+ if (totalCount > 1 || usingZooKeeper) {
log.log(Level.INFO, "Cluster controller " + index + ": Clearing master data as we lost connection on node " + index);
- masterData = null;
- masterCandidate = null;
- followers = 0;
- nextMasterData = null;
+ resetElectionProgress();
}
}
+ private void resetElectionProgress() {
+ masterData = null;
+ masterCandidate = null;
+ followers = 0;
+ nextMasterData = null;
+ }
+
public void writeHtmlState(StringBuilder sb, int stateGatherCount) {
sb.append("<h2>Master state</h2>\n");
Integer master = getMaster();
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java
index ceb81e91b7d..65b97a3ae82 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java
@@ -12,4 +12,11 @@ public interface NodeLookup {
boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener);
+ /**
+ * Returns whether the lookup instance has been able to bootstrap itself with information about nodes.
+ *
+ * Calling updateCluster() _before_ isReady has returned true may not provide any useful data.
+ */
+ boolean isReady();
+
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
index 3f04bbd9200..d19425a7c95 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
@@ -91,6 +91,7 @@ public class DatabaseHandler {
private long lastZooKeeperConnectionAttempt = 0;
private static final int minimumWaitBetweenFailedConnectionAttempts = 10000;
private boolean lostZooKeeperConnectionEvent = false;
+ private boolean connectionEstablishmentIsAllowed = false;
private Map<Integer, Integer> masterDataEvent = null;
public DatabaseHandler(DatabaseFactory databaseFactory, Timer timer, String zooKeeperAddress, int ourIndex, Object monitor) throws InterruptedException
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
index b3bb458ed74..8649e7cc11a 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
@@ -59,6 +59,7 @@ public class SlobrokClient implements NodeLookup {
freshMirror = true;
}
+ @Override
public void shutdown() {
if (supervisor != null) {
supervisor.transport().shutdown().join();
@@ -67,6 +68,12 @@ public class SlobrokClient implements NodeLookup {
public Mirror getMirror() { return mirror; }
+ @Override
+ public boolean isReady() {
+ return mirror != null && mirror.ready();
+ }
+
+ @Override
public boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener) {
if (mirror == null) return false;
int mirrorVersion = mirror.updates();
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java
index 9c0a94309a5..f6add77423a 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java
@@ -10,6 +10,7 @@ import org.junit.Test;
import static org.hamcrest.Matchers.equalTo;
import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.eq;
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
index b322c62967a..d7bca47026f 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
@@ -154,4 +154,8 @@ public class DummyCommunicator implements Communicator, NodeLookup {
return false;
}
+ @Override
+ public boolean isReady() {
+ return true;
+ }
}