diff options
author | Tor Brede Vekterli <vekterli@verizonmedia.com> | 2021-03-18 13:31:50 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-18 13:31:50 +0100 |
commit | 85ad07ca7b5a1625048155ee3d55dffcd4da38c5 (patch) | |
tree | 4a38a87ee532c4a091d2834e9581943a65242d48 /clustercontroller-core | |
parent | 97cab75cbccc5bacbf0ad1fcb4795180801277cd (diff) | |
parent | 5b135fb388bd6e5c98b10383cc79692d370646da (diff) |
Merge pull request #17029 from vespa-engine/vekterli/inhibit-db-connectivity-until-slobrok-is-ready
Inhibit ZooKeeper connections until our local Slobrok mirror is ready.
Diffstat (limited to 'clustercontroller-core')
6 files changed, 41 insertions, 2 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 5d5ffb917d2..898ebe243b0 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -597,6 +597,10 @@ public class FleetController implements NodeStateOrHostInfoChangeHandler, NodeAd public void tick() throws Exception { synchronized (monitor) { boolean didWork; + // We defer ZooKeeper connections until our local Slobrok mirror is ready. + // Otherwise we risk winning an election without having any clue about the state of + // the nodes in the cluster, causing us to spuriously publish cluster down-states. + database.setConnectionEstablishmentIsAllowed(nodeLookup.isReady()); didWork = database.doNextZooKeeperTask(databaseContext); didWork |= updateMasterElectionState(); didWork |= handleLeadershipEdgeTransitions(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java index ceb81e91b7d..65b97a3ae82 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/NodeLookup.java @@ -12,4 +12,11 @@ public interface NodeLookup { boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener); + /** + * Returns whether the lookup instance has been able to bootstrap itself with information about nodes. + * + * Calling updateCluster() _before_ isReady has returned true may not provide any useful data. + */ + boolean isReady(); + } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java index 4f02e31b426..654a2aaae7f 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java @@ -91,6 +91,7 @@ public class DatabaseHandler { private long lastZooKeeperConnectionAttempt = 0; private static final int minimumWaitBetweenFailedConnectionAttempts = 10000; private boolean lostZooKeeperConnectionEvent = false; + private boolean connectionEstablishmentIsAllowed = false; private Map<Integer, Integer> masterDataEvent = null; public DatabaseHandler(DatabaseFactory databaseFactory, Timer timer, String zooKeeperAddress, int ourIndex, Object monitor) throws InterruptedException @@ -242,7 +243,7 @@ public class DatabaseHandler { didWork = true; } } - if (isDatabaseClosedSafe() && zooKeeperIsConfigured()) { + if (isDatabaseClosedSafe() && zooKeeperIsConfigured() && connectionEstablishmentIsAllowed) { long currentTime = timer.getCurrentTimeInMillis(); if (currentTime - lastZooKeeperConnectionAttempt < minimumWaitBetweenFailedConnectionAttempts) { return false; // Not time to attempt connection yet. @@ -267,6 +268,10 @@ public class DatabaseHandler { return didWork; } + public void setConnectionEstablishmentIsAllowed(boolean allowed) { + connectionEstablishmentIsAllowed = allowed; + } + private boolean zooKeeperIsConfigured() { // This should only ever be null during unit testing. return zooKeeperAddress != null; diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java index b3bb458ed74..8649e7cc11a 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/rpc/SlobrokClient.java @@ -59,6 +59,7 @@ public class SlobrokClient implements NodeLookup { freshMirror = true; } + @Override public void shutdown() { if (supervisor != null) { supervisor.transport().shutdown().join(); @@ -67,6 +68,12 @@ public class SlobrokClient implements NodeLookup { public Mirror getMirror() { return mirror; } + @Override + public boolean isReady() { + return mirror != null && mirror.ready(); + } + + @Override public boolean updateCluster(ContentCluster cluster, NodeAddedOrRemovedListener listener) { if (mirror == null) return false; int mirrorVersion = mirror.updates(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java index 9c0a94309a5..7f664e65dc0 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseHandlerTest.java @@ -10,6 +10,7 @@ import org.junit.Test; import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; @@ -65,11 +66,22 @@ public class DatabaseHandlerTest { } DatabaseHandler createHandler() throws Exception { - return new DatabaseHandler(mockDbFactory, mockTimer, databaseAddress, 0, monitor); + var handler = new DatabaseHandler(mockDbFactory, mockTimer, databaseAddress, 0, monitor); + handler.setConnectionEstablishmentIsAllowed(true); + return handler; } } @Test + public void can_not_connect_to_database_if_connectivity_is_not_allowed() throws Exception { + Fixture f = new Fixture(); + DatabaseHandler handler = f.createHandler(); + handler.setConnectionEstablishmentIsAllowed(false); + handler.doNextZooKeeperTask(f.createMockContext()); + assertTrue(handler.isClosed()); // No connectivity allowed yet + } + + @Test public void can_store_latest_cluster_state_bundle() throws Exception { Fixture f = new Fixture(); DatabaseHandler handler = f.createHandler(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java index b322c62967a..d7bca47026f 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyCommunicator.java @@ -154,4 +154,8 @@ public class DummyCommunicator implements Communicator, NodeLookup { return false; } + @Override + public boolean isReady() { + return true; + } } |