summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-06-01 23:38:17 +0200
committerHarald Musum <musum@yahooinc.com>2023-06-01 23:38:17 +0200
commitc6745b5664382049528febda569400930f245475 (patch)
treec1a51b9ab26126cd253ca44a310559dbc17d01c6
parentb681924943ac81a2e183a097ba5b0735a9ff632d (diff)
ZooKeeper is always used, simplify
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java3
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java25
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java50
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java16
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java2
-rw-r--r--clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java26
7 files changed, 28 insertions, 96 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 01e9a173d64..d996253627f 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -488,7 +488,6 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
masterElectionHandler.setFleetControllerCount(options.fleetControllerCount());
masterElectionHandler.setMasterZooKeeperCooldownPeriod(options.masterZooKeeperCooldownPeriod());
- masterElectionHandler.setUsingZooKeeper(options.zooKeeperServerAddress() != null && !options.zooKeeperServerAddress().isEmpty());
if (rpcServer != null) {
rpcServer.setMasterElectionHandler(masterElectionHandler);
@@ -560,7 +559,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
if ( ! isRunning()) { return; }
- if (masterElectionHandler.isFirstInLine()) {
+ if (masterElectionHandler.isFirstInLine() || options.fleetControllerCount() == 1) {
didWork |= resyncLocallyCachedState(); // Calls to metricUpdate.forWork inside method
} else {
stepDownAsStateGatherer();
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
index fa303533355..df95b4c8fe5 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
@@ -26,7 +26,6 @@ public class MasterElectionHandler implements MasterInterface {
private Map<Integer, Integer> nextMasterData;
private long masterGoneFromZooKeeperTime; // Set to time master fleet controller disappears from zookeeper
private long masterZooKeeperCooldownPeriod; // The period in ms that we won't take over unless master come back.
- private boolean usingZooKeeper = false; // Unit tests may not use ZooKeeper at all.
public MasterElectionHandler(FleetControllerContext context, int index, int totalCount, Object monitor, Timer timer) {
this.context = context;
@@ -43,25 +42,12 @@ public class MasterElectionHandler implements MasterInterface {
public void setFleetControllerCount(int count) {
totalCount = count;
- if (count == 1 && !usingZooKeeper) {
- masterCandidate = 0;
- followers = 1;
- nextInLineCount = 0;
- }
}
public void setMasterZooKeeperCooldownPeriod(int period) {
masterZooKeeperCooldownPeriod = period;
}
- public void setUsingZooKeeper(boolean usingZK) {
- if (!usingZooKeeper && usingZK) {
- // Reset any shortcuts taken by non-ZK election logic.
- resetElectionProgress();
- }
- usingZooKeeper = usingZK;
- }
-
@Override
public boolean isMaster() {
Integer master = getMaster();
@@ -121,15 +107,13 @@ public class MasterElectionHandler implements MasterInterface {
public boolean isFirstInLine() { return (nextInLineCount < 1); }
public boolean watchMasterElection(DatabaseHandler database, DatabaseHandler.DatabaseContext dbContext) {
- if (totalCount == 1 && !usingZooKeeper) {
- return false; // Allow single configured node to become master implicitly if no ZK configured
- }
if (nextMasterData == null) {
if (masterCandidate == null) {
context.log(logger, Level.FINEST, () -> "No current master candidate. Waiting for data to do master election.");
}
return false; // Nothing have happened since last time.
}
+
// Move next data to temporary, such that we don't need to keep lock, and such that we don't retry
// if we happen to fail processing the data.
Map<Integer, Integer> state;
@@ -140,6 +124,7 @@ public class MasterElectionHandler implements MasterInterface {
}
context.log(logger, Level.INFO, "Got master election state " + toString(state) + ".");
if (state.isEmpty()) throw new IllegalStateException("Database has no master data. We should at least have data for ourselves.");
+
Map.Entry<Integer, Integer> first = state.entrySet().iterator().next();
Integer currentMaster = getMaster();
if (currentMaster != null && first.getKey().intValue() != currentMaster.intValue()) {
@@ -238,10 +223,8 @@ public class MasterElectionHandler implements MasterInterface {
}
public void lostDatabaseConnection() {
- if (totalCount > 1 || usingZooKeeper) {
- context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index);
- resetElectionProgress();
- }
+ context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index);
+ resetElectionProgress();
}
private void resetElectionProgress() {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
index efb97a4a69e..ed194776d78 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
@@ -15,6 +15,7 @@ import org.apache.zookeeper.KeeperException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Map;
+import java.util.Objects;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -101,8 +102,7 @@ public class DatabaseHandler {
this.timer = timer;
pendingStore.masterVote = fleetControllerContext.id().index(); // To begin with we'll vote for ourselves.
this.monitor = monitor;
- // TODO: Require non-null, not possible now since at least ClusterFeedBlockTest uses null address
- this.zooKeeperAddress = zooKeeperAddress;
+ this.zooKeeperAddress = Objects.requireNonNull(zooKeeperAddress, "zooKeeperAddress cannot be null");
}
private boolean isDatabaseClosedSafe() {
@@ -161,11 +161,9 @@ public class DatabaseHandler {
}
public void setZooKeeperAddress(String address, DatabaseContext databaseContext) {
- if (address == null && zooKeeperAddress == null) return;
- if (address != null && address.equals(zooKeeperAddress)) return;
- if (zooKeeperAddress != null) {
- fleetControllerContext.log(logger, Level.INFO, "" + (address == null ? "Stopped using ZooKeeper." : "Got new ZooKeeper address to use: " + address));
- }
+ Objects.requireNonNull(address, "address cannot be null");
+ if (address.equals(zooKeeperAddress)) return;
+ fleetControllerContext.log(logger, Level.INFO, "Got new ZooKeeper address to use: " + address);
zooKeeperAddress = address;
reset(databaseContext);
}
@@ -177,8 +175,6 @@ public class DatabaseHandler {
reset(databaseContext);
}
- private boolean usingZooKeeper() { return (zooKeeperAddress != null); }
-
private void connect(long currentTime) {
try {
lastZooKeeperConnectionAttempt = currentTime;
@@ -245,7 +241,7 @@ public class DatabaseHandler {
didWork = true;
}
}
- if (isDatabaseClosedSafe() && zooKeeperIsConfigured()) {
+ if (isDatabaseClosedSafe()) {
long currentTime = timer.getCurrentTimeInMillis();
if (currentTime - lastZooKeeperConnectionAttempt < minimumWaitBetweenFailedConnectionAttempts) {
return false; // Not time to attempt connection yet.
@@ -270,11 +266,6 @@ public class DatabaseHandler {
return didWork;
}
- private boolean zooKeeperIsConfigured() {
- // This should only ever be null during unit testing.
- return zooKeeperAddress != null;
- }
-
private void relinquishDatabaseConnectivity(DatabaseContext databaseContext) {
// reset() will handle both session clearing and trigger a database loss callback into the CC.
reset(databaseContext);
@@ -383,9 +374,7 @@ public class DatabaseHandler {
}
Integer version = currentlyStored.lastSystemStateVersion;
if (version == null) {
- if (usingZooKeeper()) {
- fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0.");
- }
+ fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0.");
return 0; // FIXME "fail-oblivious" is not a good error handling mode for such a critical component!
}
return version;
@@ -395,22 +384,13 @@ public class DatabaseHandler {
fleetControllerContext.log(logger, Level.FINE, () -> "Scheduling bundle " + clusterStateBundle + " to be saved to ZooKeeper");
pendingStore.clusterStateBundle = clusterStateBundle;
doNextZooKeeperTask(databaseContext);
- // FIXME this is a nasty hack to get around the fact that a massive amount of unit tests
- // set up the system with a null ZooKeeper server address. If we don't fake that we have
- // written the state version, the tests will never progress past waiting for state broadcasts.
- if (zooKeeperAddress == null) {
- logger.warning(() -> "Simulating ZK write of version " + clusterStateBundle.getVersion() +
- ". This should not happen in production!");
- lastKnownStateBundleVersionWrittenBySelf = clusterStateBundle.getVersion();
- }
}
// TODO should we expand this to cover _any_ pending ZK write?
public boolean hasPendingClusterStateMetaDataStore() {
synchronized (databaseMonitor) {
- return ((zooKeeperAddress != null) &&
- ((pendingStore.clusterStateBundle != null) ||
- (pendingStore.lastSystemStateVersion != null)));
+ return ((pendingStore.clusterStateBundle != null) ||
+ (pendingStore.lastSystemStateVersion != null));
}
}
@@ -458,11 +438,9 @@ public class DatabaseHandler {
}
Map<Node, NodeState> wantedStates = currentlyStored.wantedStates;
if (wantedStates == null) {
- if (usingZooKeeper()) {
- // We get here if the ZooKeeper client has lost the connection to ZooKeeper.
- // TODO: Should instead fail the tick until connected!?
- fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes.");
- }
+ // We get here if the ZooKeeper client has lost connection to ZooKeeper.
+ // TODO: Should instead fail the tick until connected!?
+ fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes.");
wantedStates = new TreeMap<>();
}
boolean altered = false;
@@ -510,9 +488,7 @@ public class DatabaseHandler {
}
Map<Node, Long> startTimestamps = currentlyStored.startTimestamps;
if (startTimestamps == null) {
- if (usingZooKeeper()) {
- fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set.");
- }
+ fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set.");
startTimestamps = new TreeMap<>();
}
for (Map.Entry<Node, Long> e : startTimestamps.entrySet()) {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
index 5aae401e157..7a9bea91b9c 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
@@ -228,7 +228,7 @@ public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHa
sb.append("<tr><td><nobr>RPC port</nobr></td><td align=\"right\">").append(options.rpcPort() == 0 ? "Pick random available" : options.rpcPort()).append("</td></tr>");
sb.append("<tr><td><nobr>HTTP port</nobr></td><td align=\"right\">").append(options.httpPort() == 0 ? "Pick random available" : options.httpPort()).append("</td></tr>");
sb.append("<tr><td><nobr>Master cooldown period</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.masterZooKeeperCooldownPeriod())).append("</td></tr>");
- String zooKeeperAddress = (options.zooKeeperServerAddress() == null ? "Not using Zookeeper" : splitZooKeeperAddress(options.zooKeeperServerAddress()));
+ String zooKeeperAddress = splitZooKeeperAddress(options.zooKeeperServerAddress());
sb.append("<tr><td><nobr>Zookeeper server address</nobr></td><td align=\"right\">").append(zooKeeperAddress).append("</td></tr>");
sb.append("<tr><td><nobr>Zookeeper session timeout</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.zooKeeperSessionTimeout())).append("</td></tr>");
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
index d4eea261767..55e256cf89c 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java
@@ -32,15 +32,17 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
private FleetController ctrl;
private DummyCommunicator communicator;
- private void initialize(FleetControllerOptions options) throws Exception {
+ private void initialize(FleetControllerOptions.Builder builder) throws Exception {
List<Node> nodes = new ArrayList<>();
- for (int i = 0; i < options.nodes().size(); ++i) {
+ for (int i = 0; i < builder.nodes().size(); ++i) {
nodes.add(new Node(NodeType.STORAGE, i));
nodes.add(new Node(NodeType.DISTRIBUTOR, i));
}
- var context = new TestFleetControllerContext(options);
communicator = new DummyCommunicator(nodes, timer);
+ setUpZooKeeperServer(builder);
+ options = builder.build();
+ var context = new TestFleetControllerContext(options);
boolean start = false;
ctrl = createFleetController(timer, options, context, communicator, communicator, null, start);
@@ -57,16 +59,16 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
ctrl.tick();
}
- private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits, double clusterFeedBlockNoiseLevel) {
+ private static FleetControllerOptions.Builder createOptions(Map<String, Double> feedBlockLimits, double clusterFeedBlockNoiseLevel) {
return defaultOptions()
.setStorageDistribution(DistributionBuilder.forFlatCluster(NODE_COUNT))
.setNodes(new HashSet<>(DistributionBuilder.buildConfiguredNodes(NODE_COUNT)))
.setClusterFeedBlockEnabled(true)
.setClusterFeedBlockLimit(feedBlockLimits)
- .setClusterFeedBlockNoiseLevel(clusterFeedBlockNoiseLevel).build();
+ .setClusterFeedBlockNoiseLevel(clusterFeedBlockNoiseLevel);
}
- private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) {
+ private static FleetControllerOptions.Builder createOptions(Map<String, Double> feedBlockLimits) {
return createOptions(feedBlockLimits, 0.0);
}
@@ -109,7 +111,7 @@ public class ClusterFeedBlockTest extends FleetControllerTest {
assertTrue(ctrl.getClusterStateBundle().clusterFeedIsBlocked());
// Increase cheese allowance. Should now automatically unblock since reported usage is lower.
- ctrl.updateOptions(createOptions(mapOf(usage("cheese", 0.9), usage("wine", 0.4))));
+ ctrl.updateOptions(createOptions(mapOf(usage("cheese", 0.9), usage("wine", 0.4))).build());
ctrl.tick(); // Options propagation
ctrl.tick(); // State recomputation
assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked());
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
index 77c89d77ba5..93a96be71a0 100644
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
+++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java
@@ -6,7 +6,6 @@ import com.yahoo.jrt.Spec;
import com.yahoo.jrt.Supervisor;
import com.yahoo.jrt.Target;
import com.yahoo.jrt.Transport;
-import com.yahoo.jrt.slobrok.server.Slobrok;
import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
@@ -53,7 +52,6 @@ public class MasterElectionTest extends FleetControllerTest {
if (zooKeeperServer == null) {
zooKeeperServer = new ZooKeeperTestServer();
}
- slobrok = new Slobrok();
builder.setZooKeeperSessionTimeout(defaultZkSessionTimeoutInMillis())
.setZooKeeperServerAddress(zooKeeperServer.getAddress())
.setSlobrokConnectionSpecs(getSlobrokConnectionSpecs(slobrok))
diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java
deleted file mode 100644
index 3d3a38aacd4..00000000000
--- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.clustercontroller.core;
-
-import org.junit.jupiter.api.Test;
-
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class NoZooKeeperTest extends FleetControllerTest {
-
- @Test
- void testWantedStatesInZooKeeper() throws Exception {
- // Null is the default for zooKeeperServerAddress
- FleetControllerOptions.Builder builder = defaultOptions();
- Timer timer = new FakeTimer();
- setUpFleetController(timer, builder);
- setUpVdsNodes(timer);
- waitForStableSystem();
-
- assertTrue(nodes.get(0).isDistributor());
- nodes.get(0).disconnect();
- waitForState("version:\\d+ distributor:10 .0.s:d storage:10");
-
- nodes.get(0).connect();
- waitForState("version:\\d+ distributor:10 storage:10");
- }
-}