aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core/src/main/java/com
diff options
context:
space:
mode:
authorHarald Musum <musum@yahooinc.com>2023-06-01 23:38:17 +0200
committerHarald Musum <musum@yahooinc.com>2023-06-01 23:38:17 +0200
commitc6745b5664382049528febda569400930f245475 (patch)
treec1a51b9ab26126cd253ca44a310559dbc17d01c6 /clustercontroller-core/src/main/java/com
parentb681924943ac81a2e183a097ba5b0735a9ff632d (diff)
ZooKeeper is always used, simplify
Diffstat (limited to 'clustercontroller-core/src/main/java/com')
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java3
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java25
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java50
-rw-r--r--clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java2
4 files changed, 19 insertions, 61 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
index 01e9a173d64..d996253627f 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java
@@ -488,7 +488,6 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
masterElectionHandler.setFleetControllerCount(options.fleetControllerCount());
masterElectionHandler.setMasterZooKeeperCooldownPeriod(options.masterZooKeeperCooldownPeriod());
- masterElectionHandler.setUsingZooKeeper(options.zooKeeperServerAddress() != null && !options.zooKeeperServerAddress().isEmpty());
if (rpcServer != null) {
rpcServer.setMasterElectionHandler(masterElectionHandler);
@@ -560,7 +559,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta
if ( ! isRunning()) { return; }
- if (masterElectionHandler.isFirstInLine()) {
+ if (masterElectionHandler.isFirstInLine() || options.fleetControllerCount() == 1) {
didWork |= resyncLocallyCachedState(); // Calls to metricUpdate.forWork inside method
} else {
stepDownAsStateGatherer();
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
index fa303533355..df95b4c8fe5 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java
@@ -26,7 +26,6 @@ public class MasterElectionHandler implements MasterInterface {
private Map<Integer, Integer> nextMasterData;
private long masterGoneFromZooKeeperTime; // Set to time master fleet controller disappears from zookeeper
private long masterZooKeeperCooldownPeriod; // The period in ms that we won't take over unless master come back.
- private boolean usingZooKeeper = false; // Unit tests may not use ZooKeeper at all.
public MasterElectionHandler(FleetControllerContext context, int index, int totalCount, Object monitor, Timer timer) {
this.context = context;
@@ -43,25 +42,12 @@ public class MasterElectionHandler implements MasterInterface {
public void setFleetControllerCount(int count) {
totalCount = count;
- if (count == 1 && !usingZooKeeper) {
- masterCandidate = 0;
- followers = 1;
- nextInLineCount = 0;
- }
}
public void setMasterZooKeeperCooldownPeriod(int period) {
masterZooKeeperCooldownPeriod = period;
}
- public void setUsingZooKeeper(boolean usingZK) {
- if (!usingZooKeeper && usingZK) {
- // Reset any shortcuts taken by non-ZK election logic.
- resetElectionProgress();
- }
- usingZooKeeper = usingZK;
- }
-
@Override
public boolean isMaster() {
Integer master = getMaster();
@@ -121,15 +107,13 @@ public class MasterElectionHandler implements MasterInterface {
public boolean isFirstInLine() { return (nextInLineCount < 1); }
public boolean watchMasterElection(DatabaseHandler database, DatabaseHandler.DatabaseContext dbContext) {
- if (totalCount == 1 && !usingZooKeeper) {
- return false; // Allow single configured node to become master implicitly if no ZK configured
- }
if (nextMasterData == null) {
if (masterCandidate == null) {
context.log(logger, Level.FINEST, () -> "No current master candidate. Waiting for data to do master election.");
}
return false; // Nothing have happened since last time.
}
+
// Move next data to temporary, such that we don't need to keep lock, and such that we don't retry
// if we happen to fail processing the data.
Map<Integer, Integer> state;
@@ -140,6 +124,7 @@ public class MasterElectionHandler implements MasterInterface {
}
context.log(logger, Level.INFO, "Got master election state " + toString(state) + ".");
if (state.isEmpty()) throw new IllegalStateException("Database has no master data. We should at least have data for ourselves.");
+
Map.Entry<Integer, Integer> first = state.entrySet().iterator().next();
Integer currentMaster = getMaster();
if (currentMaster != null && first.getKey().intValue() != currentMaster.intValue()) {
@@ -238,10 +223,8 @@ public class MasterElectionHandler implements MasterInterface {
}
public void lostDatabaseConnection() {
- if (totalCount > 1 || usingZooKeeper) {
- context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index);
- resetElectionProgress();
- }
+ context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index);
+ resetElectionProgress();
}
private void resetElectionProgress() {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
index efb97a4a69e..ed194776d78 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java
@@ -15,6 +15,7 @@ import org.apache.zookeeper.KeeperException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Map;
+import java.util.Objects;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -101,8 +102,7 @@ public class DatabaseHandler {
this.timer = timer;
pendingStore.masterVote = fleetControllerContext.id().index(); // To begin with we'll vote for ourselves.
this.monitor = monitor;
- // TODO: Require non-null, not possible now since at least ClusterFeedBlockTest uses null address
- this.zooKeeperAddress = zooKeeperAddress;
+ this.zooKeeperAddress = Objects.requireNonNull(zooKeeperAddress, "zooKeeperAddress cannot be null");
}
private boolean isDatabaseClosedSafe() {
@@ -161,11 +161,9 @@ public class DatabaseHandler {
}
public void setZooKeeperAddress(String address, DatabaseContext databaseContext) {
- if (address == null && zooKeeperAddress == null) return;
- if (address != null && address.equals(zooKeeperAddress)) return;
- if (zooKeeperAddress != null) {
- fleetControllerContext.log(logger, Level.INFO, "" + (address == null ? "Stopped using ZooKeeper." : "Got new ZooKeeper address to use: " + address));
- }
+ Objects.requireNonNull(address, "address cannot be null");
+ if (address.equals(zooKeeperAddress)) return;
+ fleetControllerContext.log(logger, Level.INFO, "Got new ZooKeeper address to use: " + address);
zooKeeperAddress = address;
reset(databaseContext);
}
@@ -177,8 +175,6 @@ public class DatabaseHandler {
reset(databaseContext);
}
- private boolean usingZooKeeper() { return (zooKeeperAddress != null); }
-
private void connect(long currentTime) {
try {
lastZooKeeperConnectionAttempt = currentTime;
@@ -245,7 +241,7 @@ public class DatabaseHandler {
didWork = true;
}
}
- if (isDatabaseClosedSafe() && zooKeeperIsConfigured()) {
+ if (isDatabaseClosedSafe()) {
long currentTime = timer.getCurrentTimeInMillis();
if (currentTime - lastZooKeeperConnectionAttempt < minimumWaitBetweenFailedConnectionAttempts) {
return false; // Not time to attempt connection yet.
@@ -270,11 +266,6 @@ public class DatabaseHandler {
return didWork;
}
- private boolean zooKeeperIsConfigured() {
- // This should only ever be null during unit testing.
- return zooKeeperAddress != null;
- }
-
private void relinquishDatabaseConnectivity(DatabaseContext databaseContext) {
// reset() will handle both session clearing and trigger a database loss callback into the CC.
reset(databaseContext);
@@ -383,9 +374,7 @@ public class DatabaseHandler {
}
Integer version = currentlyStored.lastSystemStateVersion;
if (version == null) {
- if (usingZooKeeper()) {
- fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0.");
- }
+ fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0.");
return 0; // FIXME "fail-oblivious" is not a good error handling mode for such a critical component!
}
return version;
@@ -395,22 +384,13 @@ public class DatabaseHandler {
fleetControllerContext.log(logger, Level.FINE, () -> "Scheduling bundle " + clusterStateBundle + " to be saved to ZooKeeper");
pendingStore.clusterStateBundle = clusterStateBundle;
doNextZooKeeperTask(databaseContext);
- // FIXME this is a nasty hack to get around the fact that a massive amount of unit tests
- // set up the system with a null ZooKeeper server address. If we don't fake that we have
- // written the state version, the tests will never progress past waiting for state broadcasts.
- if (zooKeeperAddress == null) {
- logger.warning(() -> "Simulating ZK write of version " + clusterStateBundle.getVersion() +
- ". This should not happen in production!");
- lastKnownStateBundleVersionWrittenBySelf = clusterStateBundle.getVersion();
- }
}
// TODO should we expand this to cover _any_ pending ZK write?
public boolean hasPendingClusterStateMetaDataStore() {
synchronized (databaseMonitor) {
- return ((zooKeeperAddress != null) &&
- ((pendingStore.clusterStateBundle != null) ||
- (pendingStore.lastSystemStateVersion != null)));
+ return ((pendingStore.clusterStateBundle != null) ||
+ (pendingStore.lastSystemStateVersion != null));
}
}
@@ -458,11 +438,9 @@ public class DatabaseHandler {
}
Map<Node, NodeState> wantedStates = currentlyStored.wantedStates;
if (wantedStates == null) {
- if (usingZooKeeper()) {
- // We get here if the ZooKeeper client has lost the connection to ZooKeeper.
- // TODO: Should instead fail the tick until connected!?
- fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes.");
- }
+ // We get here if the ZooKeeper client has lost connection to ZooKeeper.
+ // TODO: Should instead fail the tick until connected!?
+ fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes.");
wantedStates = new TreeMap<>();
}
boolean altered = false;
@@ -510,9 +488,7 @@ public class DatabaseHandler {
}
Map<Node, Long> startTimestamps = currentlyStored.startTimestamps;
if (startTimestamps == null) {
- if (usingZooKeeper()) {
- fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set.");
- }
+ fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set.");
startTimestamps = new TreeMap<>();
}
for (Map.Entry<Node, Long> e : startTimestamps.entrySet()) {
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
index 5aae401e157..7a9bea91b9c 100644
--- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
+++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java
@@ -228,7 +228,7 @@ public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHa
sb.append("<tr><td><nobr>RPC port</nobr></td><td align=\"right\">").append(options.rpcPort() == 0 ? "Pick random available" : options.rpcPort()).append("</td></tr>");
sb.append("<tr><td><nobr>HTTP port</nobr></td><td align=\"right\">").append(options.httpPort() == 0 ? "Pick random available" : options.httpPort()).append("</td></tr>");
sb.append("<tr><td><nobr>Master cooldown period</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.masterZooKeeperCooldownPeriod())).append("</td></tr>");
- String zooKeeperAddress = (options.zooKeeperServerAddress() == null ? "Not using Zookeeper" : splitZooKeeperAddress(options.zooKeeperServerAddress()));
+ String zooKeeperAddress = splitZooKeeperAddress(options.zooKeeperServerAddress());
sb.append("<tr><td><nobr>Zookeeper server address</nobr></td><td align=\"right\">").append(zooKeeperAddress).append("</td></tr>");
sb.append("<tr><td><nobr>Zookeeper session timeout</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.zooKeeperSessionTimeout())).append("</td></tr>");