diff options
author | Harald Musum <musum@yahooinc.com> | 2023-06-01 23:38:17 +0200 |
---|---|---|
committer | Harald Musum <musum@yahooinc.com> | 2023-06-01 23:38:17 +0200 |
commit | c6745b5664382049528febda569400930f245475 (patch) | |
tree | c1a51b9ab26126cd253ca44a310559dbc17d01c6 /clustercontroller-core/src/main/java/com | |
parent | b681924943ac81a2e183a097ba5b0735a9ff632d (diff) |
ZooKeeper is always used, simplify
Diffstat (limited to 'clustercontroller-core/src/main/java/com')
4 files changed, 19 insertions, 61 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 01e9a173d64..d996253627f 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -488,7 +488,6 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta masterElectionHandler.setFleetControllerCount(options.fleetControllerCount()); masterElectionHandler.setMasterZooKeeperCooldownPeriod(options.masterZooKeeperCooldownPeriod()); - masterElectionHandler.setUsingZooKeeper(options.zooKeeperServerAddress() != null && !options.zooKeeperServerAddress().isEmpty()); if (rpcServer != null) { rpcServer.setMasterElectionHandler(masterElectionHandler); @@ -560,7 +559,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta if ( ! isRunning()) { return; } - if (masterElectionHandler.isFirstInLine()) { + if (masterElectionHandler.isFirstInLine() || options.fleetControllerCount() == 1) { didWork |= resyncLocallyCachedState(); // Calls to metricUpdate.forWork inside method } else { stepDownAsStateGatherer(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java index fa303533355..df95b4c8fe5 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java @@ -26,7 +26,6 @@ public class MasterElectionHandler implements MasterInterface { private Map<Integer, Integer> nextMasterData; private long masterGoneFromZooKeeperTime; // Set to time master fleet controller disappears from zookeeper private long masterZooKeeperCooldownPeriod; // The period in ms that we won't take over unless master come back. - private boolean usingZooKeeper = false; // Unit tests may not use ZooKeeper at all. public MasterElectionHandler(FleetControllerContext context, int index, int totalCount, Object monitor, Timer timer) { this.context = context; @@ -43,25 +42,12 @@ public class MasterElectionHandler implements MasterInterface { public void setFleetControllerCount(int count) { totalCount = count; - if (count == 1 && !usingZooKeeper) { - masterCandidate = 0; - followers = 1; - nextInLineCount = 0; - } } public void setMasterZooKeeperCooldownPeriod(int period) { masterZooKeeperCooldownPeriod = period; } - public void setUsingZooKeeper(boolean usingZK) { - if (!usingZooKeeper && usingZK) { - // Reset any shortcuts taken by non-ZK election logic. - resetElectionProgress(); - } - usingZooKeeper = usingZK; - } - @Override public boolean isMaster() { Integer master = getMaster(); @@ -121,15 +107,13 @@ public class MasterElectionHandler implements MasterInterface { public boolean isFirstInLine() { return (nextInLineCount < 1); } public boolean watchMasterElection(DatabaseHandler database, DatabaseHandler.DatabaseContext dbContext) { - if (totalCount == 1 && !usingZooKeeper) { - return false; // Allow single configured node to become master implicitly if no ZK configured - } if (nextMasterData == null) { if (masterCandidate == null) { context.log(logger, Level.FINEST, () -> "No current master candidate. Waiting for data to do master election."); } return false; // Nothing have happened since last time. } + // Move next data to temporary, such that we don't need to keep lock, and such that we don't retry // if we happen to fail processing the data. Map<Integer, Integer> state; @@ -140,6 +124,7 @@ public class MasterElectionHandler implements MasterInterface { } context.log(logger, Level.INFO, "Got master election state " + toString(state) + "."); if (state.isEmpty()) throw new IllegalStateException("Database has no master data. We should at least have data for ourselves."); + Map.Entry<Integer, Integer> first = state.entrySet().iterator().next(); Integer currentMaster = getMaster(); if (currentMaster != null && first.getKey().intValue() != currentMaster.intValue()) { @@ -238,10 +223,8 @@ public class MasterElectionHandler implements MasterInterface { } public void lostDatabaseConnection() { - if (totalCount > 1 || usingZooKeeper) { - context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index); - resetElectionProgress(); - } + context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index); + resetElectionProgress(); } private void resetElectionProgress() { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java index efb97a4a69e..ed194776d78 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java @@ -15,6 +15,7 @@ import org.apache.zookeeper.KeeperException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.Map; +import java.util.Objects; import java.util.TreeMap; import java.util.logging.Level; import java.util.logging.Logger; @@ -101,8 +102,7 @@ public class DatabaseHandler { this.timer = timer; pendingStore.masterVote = fleetControllerContext.id().index(); // To begin with we'll vote for ourselves. this.monitor = monitor; - // TODO: Require non-null, not possible now since at least ClusterFeedBlockTest uses null address - this.zooKeeperAddress = zooKeeperAddress; + this.zooKeeperAddress = Objects.requireNonNull(zooKeeperAddress, "zooKeeperAddress cannot be null"); } private boolean isDatabaseClosedSafe() { @@ -161,11 +161,9 @@ public class DatabaseHandler { } public void setZooKeeperAddress(String address, DatabaseContext databaseContext) { - if (address == null && zooKeeperAddress == null) return; - if (address != null && address.equals(zooKeeperAddress)) return; - if (zooKeeperAddress != null) { - fleetControllerContext.log(logger, Level.INFO, "" + (address == null ? "Stopped using ZooKeeper." : "Got new ZooKeeper address to use: " + address)); - } + Objects.requireNonNull(address, "address cannot be null"); + if (address.equals(zooKeeperAddress)) return; + fleetControllerContext.log(logger, Level.INFO, "Got new ZooKeeper address to use: " + address); zooKeeperAddress = address; reset(databaseContext); } @@ -177,8 +175,6 @@ public class DatabaseHandler { reset(databaseContext); } - private boolean usingZooKeeper() { return (zooKeeperAddress != null); } - private void connect(long currentTime) { try { lastZooKeeperConnectionAttempt = currentTime; @@ -245,7 +241,7 @@ public class DatabaseHandler { didWork = true; } } - if (isDatabaseClosedSafe() && zooKeeperIsConfigured()) { + if (isDatabaseClosedSafe()) { long currentTime = timer.getCurrentTimeInMillis(); if (currentTime - lastZooKeeperConnectionAttempt < minimumWaitBetweenFailedConnectionAttempts) { return false; // Not time to attempt connection yet. @@ -270,11 +266,6 @@ public class DatabaseHandler { return didWork; } - private boolean zooKeeperIsConfigured() { - // This should only ever be null during unit testing. - return zooKeeperAddress != null; - } - private void relinquishDatabaseConnectivity(DatabaseContext databaseContext) { // reset() will handle both session clearing and trigger a database loss callback into the CC. reset(databaseContext); @@ -383,9 +374,7 @@ public class DatabaseHandler { } Integer version = currentlyStored.lastSystemStateVersion; if (version == null) { - if (usingZooKeeper()) { - fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0."); - } + fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0."); return 0; // FIXME "fail-oblivious" is not a good error handling mode for such a critical component! } return version; @@ -395,22 +384,13 @@ public class DatabaseHandler { fleetControllerContext.log(logger, Level.FINE, () -> "Scheduling bundle " + clusterStateBundle + " to be saved to ZooKeeper"); pendingStore.clusterStateBundle = clusterStateBundle; doNextZooKeeperTask(databaseContext); - // FIXME this is a nasty hack to get around the fact that a massive amount of unit tests - // set up the system with a null ZooKeeper server address. If we don't fake that we have - // written the state version, the tests will never progress past waiting for state broadcasts. - if (zooKeeperAddress == null) { - logger.warning(() -> "Simulating ZK write of version " + clusterStateBundle.getVersion() + - ". This should not happen in production!"); - lastKnownStateBundleVersionWrittenBySelf = clusterStateBundle.getVersion(); - } } // TODO should we expand this to cover _any_ pending ZK write? public boolean hasPendingClusterStateMetaDataStore() { synchronized (databaseMonitor) { - return ((zooKeeperAddress != null) && - ((pendingStore.clusterStateBundle != null) || - (pendingStore.lastSystemStateVersion != null))); + return ((pendingStore.clusterStateBundle != null) || + (pendingStore.lastSystemStateVersion != null)); } } @@ -458,11 +438,9 @@ public class DatabaseHandler { } Map<Node, NodeState> wantedStates = currentlyStored.wantedStates; if (wantedStates == null) { - if (usingZooKeeper()) { - // We get here if the ZooKeeper client has lost the connection to ZooKeeper. - // TODO: Should instead fail the tick until connected!? - fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes."); - } + // We get here if the ZooKeeper client has lost connection to ZooKeeper. + // TODO: Should instead fail the tick until connected!? + fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes."); wantedStates = new TreeMap<>(); } boolean altered = false; @@ -510,9 +488,7 @@ public class DatabaseHandler { } Map<Node, Long> startTimestamps = currentlyStored.startTimestamps; if (startTimestamps == null) { - if (usingZooKeeper()) { - fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set."); - } + fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set."); startTimestamps = new TreeMap<>(); } for (Map.Entry<Node, Long> e : startTimestamps.entrySet()) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java index 5aae401e157..7a9bea91b9c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java @@ -228,7 +228,7 @@ public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHa sb.append("<tr><td><nobr>RPC port</nobr></td><td align=\"right\">").append(options.rpcPort() == 0 ? "Pick random available" : options.rpcPort()).append("</td></tr>"); sb.append("<tr><td><nobr>HTTP port</nobr></td><td align=\"right\">").append(options.httpPort() == 0 ? "Pick random available" : options.httpPort()).append("</td></tr>"); sb.append("<tr><td><nobr>Master cooldown period</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.masterZooKeeperCooldownPeriod())).append("</td></tr>"); - String zooKeeperAddress = (options.zooKeeperServerAddress() == null ? "Not using Zookeeper" : splitZooKeeperAddress(options.zooKeeperServerAddress())); + String zooKeeperAddress = splitZooKeeperAddress(options.zooKeeperServerAddress()); sb.append("<tr><td><nobr>Zookeeper server address</nobr></td><td align=\"right\">").append(zooKeeperAddress).append("</td></tr>"); sb.append("<tr><td><nobr>Zookeeper session timeout</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.zooKeeperSessionTimeout())).append("</td></tr>"); |