aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core
diff options
context:
space:
mode:
authorTor Brede Vekterli <vekterli@verizonmedia.com>2021-03-18 12:07:45 +0100
committerTor Brede Vekterli <vekterli@verizonmedia.com>2021-03-19 13:30:31 +0100
commita831f99cb087121e74e4ed6adb496db78909d2c3 (patch)
tree0f6d4f4a8a8b811bc915f88647d8573c8e82db11 /clustercontroller-core
parent0b563fff5e00b45df8550e197961b6c5d868c0ff (diff)
Inhibit ZooKeeper connections until our local Slobrok mirror is ready.
Otherwise, if there are transient Slobrok issues during CC startup and we end up winning the election, we risk publishing a cluster state where the entire cluster appears down (since we do not have any knowledge of Slobrok node mapping state). This will adversely affect availability for all the obvious reasons.
Diffstat (limited to 'clustercontroller-core')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java4
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java7
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java7
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java7
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java14
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java4
6 files changed, 41 insertions, 2 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 5d5ffb917d2..898ebe243b0 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -597,6 +597,10 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd
public void tick() throws Exception {
synchronized (monitor) {
boolean didWork;
+ // We defer ZooKeeper connections until our local Slobrok mirror is ready.
+ // Otherwise we risk winning an election without having any clue about the state of
+ // the nodes in the cluster, causing us to spuriously publish cluster down-states.
+ database.setConnectionEstablishmentIsAllowed(nodeLookup.isReady());
didWork = database.doNextZooKeeperTask(databaseContext);
didWork |= updateMasterElectionState();
didWork |= handleLeadershipEdgeTransitions();
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java
index ceb81e91b7d..65b97a3ae82 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java
@@ -12,4 +12,11 @@ public interface NodeLookup {
boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener);
+ /**
+ * Returns whether the lookup instance has been able to bootstrap itself with information about nodes.
+ *
+ * Calling updateCluster() _before_ isReady has returned true may not provide any useful data.
+ */
+ boolean isReady();
+
}
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
index 3f04bbd9200..54dc4cc2f31 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
@@ -91,6 +91,7 @@ public class DatabaseHandler {
private long lastZooKeeperConnectionAttempt = 0;
private static final int minimumWaitBetweenFailedConnectionAttempts = 10000;
private boolean lostZooKeeperConnectionEvent = false;
+ private boolean connectionEstablishmentIsAllowed = false;
private Map<Integer, Integer> masterDataEvent = null;
public DatabaseHandler(DatabaseFactory databaseFactory, Timer timer, String zooKeeperAddress, int ourIndex, Object monitor) throws InterruptedException
@@ -242,7 +243,7 @@ public class DatabaseHandler {
didWork = true;
}
}
- if (isDatabaseClosedSafe() && zooKeeperIsConfigured()) {
+ if (isDatabaseClosedSafe() && zooKeeperIsConfigured() && connectionEstablishmentIsAllowed) {
long currentTime = timer.getCurrentTimeInMillis();
if (currentTime - lastZooKeeperConnectionAttempt < minimumWaitBetweenFailedConnectionAttempts) {
return false; // Not time to attempt connection yet.
@@ -267,6 +268,10 @@ public class DatabaseHandler {
return didWork;
}
+ public void setConnectionEstablishmentIsAllowed(boolean allowed) {
+ connectionEstablishmentIsAllowed = allowed;
+ }
+
private boolean zooKeeperIsConfigured() {
// This should only ever be null during unit testing.
return zooKeeperAddress != null;
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
index b3bb458ed74..8649e7cc11a 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java
@@ -59,6 +59,7 @@ public class SlobrokClient implements NodeLookup {
freshMirror = true;
}
+ @Override
public void shutdown() {
if (supervisor != null) {
supervisor.transport().shutdown().join();
@@ -67,6 +68,12 @@ public class SlobrokClient implements NodeLookup {
public Mirror getMirror() { return mirror; }
+ @Override
+ public boolean isReady() {
+ return mirror != null && mirror.ready();
+ }
+
+ @Override
public boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener) {
if (mirror == null) return false;
int mirrorVersion = mirror.updates();
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java
index 9c0a94309a5..7f664e65dc0 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java
@@ -10,6 +10,7 @@ import org.junit.Test;
import static org.hamcrest.Matchers.equalTo;
import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.eq;
@@ -65,11 +66,22 @@ public class DatabaseHandlerTest {
}
DatabaseHandler createHandler() throws Exception {
- return new DatabaseHandler(mockDbFactory, mockTimer, databaseAddress, 0, monitor);
+ var handler = new DatabaseHandler(mockDbFactory, mockTimer, databaseAddress, 0, monitor);
+ handler.setConnectionEstablishmentIsAllowed(true);
+ return handler;
}
}
@Test
+ public void can_not_connect_to_database_if_connectivity_is_not_allowed() throws Exception {
+ Fixture f = new Fixture();
+ DatabaseHandler handler = f.createHandler();
+ handler.setConnectionEstablishmentIsAllowed(false);
+ handler.doNextZooKeeperTask(f.createMockContext());
+ assertTrue(handler.isClosed()); // No connectivity allowed yet
+ }
+
+ @Test
public void can_store_latest_cluster_state_bundle() throws Exception {
Fixture f = new Fixture();
DatabaseHandler handler = f.createHandler();
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
index b322c62967a..d7bca47026f 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java
@@ -154,4 +154,8 @@ public class DummyCommunicator implements Communicator, NodeLookup {
return false;
}
+ @Override
+ public boolean isReady() {
+ return true;
+ }
}