diff options
Diffstat (limited to 'clustercontroller-core/src')
7 files changed, 28 insertions, 96 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 01e9a173d64..d996253627f 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -488,7 +488,6 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta masterElectionHandler.setFleetControllerCount(options.fleetControllerCount()); masterElectionHandler.setMasterZooKeeperCooldownPeriod(options.masterZooKeeperCooldownPeriod()); - masterElectionHandler.setUsingZooKeeper(options.zooKeeperServerAddress() != null && !options.zooKeeperServerAddress().isEmpty()); if (rpcServer != null) { rpcServer.setMasterElectionHandler(masterElectionHandler); @@ -560,7 +559,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta if ( ! isRunning()) { return; } - if (masterElectionHandler.isFirstInLine()) { + if (masterElectionHandler.isFirstInLine() || options.fleetControllerCount() == 1) { didWork |= resyncLocallyCachedState(); // Calls to metricUpdate.forWork inside method } else { stepDownAsStateGatherer(); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java index fa303533355..df95b4c8fe5 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/MasterElectionHandler.java @@ -26,7 +26,6 @@ public class MasterElectionHandler implements MasterInterface { private Map<Integer, Integer> nextMasterData; private long masterGoneFromZooKeeperTime; // Set to time master fleet controller disappears from zookeeper private long masterZooKeeperCooldownPeriod; // The period in ms that we won't take over unless master come back. - private boolean usingZooKeeper = false; // Unit tests may not use ZooKeeper at all. public MasterElectionHandler(FleetControllerContext context, int index, int totalCount, Object monitor, Timer timer) { this.context = context; @@ -43,25 +42,12 @@ public class MasterElectionHandler implements MasterInterface { public void setFleetControllerCount(int count) { totalCount = count; - if (count == 1 && !usingZooKeeper) { - masterCandidate = 0; - followers = 1; - nextInLineCount = 0; - } } public void setMasterZooKeeperCooldownPeriod(int period) { masterZooKeeperCooldownPeriod = period; } - public void setUsingZooKeeper(boolean usingZK) { - if (!usingZooKeeper && usingZK) { - // Reset any shortcuts taken by non-ZK election logic. - resetElectionProgress(); - } - usingZooKeeper = usingZK; - } - @Override public boolean isMaster() { Integer master = getMaster(); @@ -121,15 +107,13 @@ public class MasterElectionHandler implements MasterInterface { public boolean isFirstInLine() { return (nextInLineCount < 1); } public boolean watchMasterElection(DatabaseHandler database, DatabaseHandler.DatabaseContext dbContext) { - if (totalCount == 1 && !usingZooKeeper) { - return false; // Allow single configured node to become master implicitly if no ZK configured - } if (nextMasterData == null) { if (masterCandidate == null) { context.log(logger, Level.FINEST, () -> "No current master candidate. Waiting for data to do master election."); } return false; // Nothing have happened since last time. } + // Move next data to temporary, such that we don't need to keep lock, and such that we don't retry // if we happen to fail processing the data. Map<Integer, Integer> state; @@ -140,6 +124,7 @@ public class MasterElectionHandler implements MasterInterface { } context.log(logger, Level.INFO, "Got master election state " + toString(state) + "."); if (state.isEmpty()) throw new IllegalStateException("Database has no master data. We should at least have data for ourselves."); + Map.Entry<Integer, Integer> first = state.entrySet().iterator().next(); Integer currentMaster = getMaster(); if (currentMaster != null && first.getKey().intValue() != currentMaster.intValue()) { @@ -238,10 +223,8 @@ public class MasterElectionHandler implements MasterInterface { } public void lostDatabaseConnection() { - if (totalCount > 1 || usingZooKeeper) { - context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index); - resetElectionProgress(); - } + context.log(logger, Level.INFO, "Clearing master data as we lost connection on node " + index); + resetElectionProgress(); } private void resetElectionProgress() { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java index efb97a4a69e..ed194776d78 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/database/DatabaseHandler.java @@ -15,6 +15,7 @@ import org.apache.zookeeper.KeeperException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.Map; +import java.util.Objects; import java.util.TreeMap; import java.util.logging.Level; import java.util.logging.Logger; @@ -101,8 +102,7 @@ public class DatabaseHandler { this.timer = timer; pendingStore.masterVote = fleetControllerContext.id().index(); // To begin with we'll vote for ourselves. this.monitor = monitor; - // TODO: Require non-null, not possible now since at least ClusterFeedBlockTest uses null address - this.zooKeeperAddress = zooKeeperAddress; + this.zooKeeperAddress = Objects.requireNonNull(zooKeeperAddress, "zooKeeperAddress cannot be null"); } private boolean isDatabaseClosedSafe() { @@ -161,11 +161,9 @@ public class DatabaseHandler { } public void setZooKeeperAddress(String address, DatabaseContext databaseContext) { - if (address == null && zooKeeperAddress == null) return; - if (address != null && address.equals(zooKeeperAddress)) return; - if (zooKeeperAddress != null) { - fleetControllerContext.log(logger, Level.INFO, "" + (address == null ? "Stopped using ZooKeeper." : "Got new ZooKeeper address to use: " + address)); - } + Objects.requireNonNull(address, "address cannot be null"); + if (address.equals(zooKeeperAddress)) return; + fleetControllerContext.log(logger, Level.INFO, "Got new ZooKeeper address to use: " + address); zooKeeperAddress = address; reset(databaseContext); } @@ -177,8 +175,6 @@ public class DatabaseHandler { reset(databaseContext); } - private boolean usingZooKeeper() { return (zooKeeperAddress != null); } - private void connect(long currentTime) { try { lastZooKeeperConnectionAttempt = currentTime; @@ -245,7 +241,7 @@ public class DatabaseHandler { didWork = true; } } - if (isDatabaseClosedSafe() && zooKeeperIsConfigured()) { + if (isDatabaseClosedSafe()) { long currentTime = timer.getCurrentTimeInMillis(); if (currentTime - lastZooKeeperConnectionAttempt < minimumWaitBetweenFailedConnectionAttempts) { return false; // Not time to attempt connection yet. @@ -270,11 +266,6 @@ public class DatabaseHandler { return didWork; } - private boolean zooKeeperIsConfigured() { - // This should only ever be null during unit testing. - return zooKeeperAddress != null; - } - private void relinquishDatabaseConnectivity(DatabaseContext databaseContext) { // reset() will handle both session clearing and trigger a database loss callback into the CC. reset(databaseContext); @@ -383,9 +374,7 @@ public class DatabaseHandler { } Integer version = currentlyStored.lastSystemStateVersion; if (version == null) { - if (usingZooKeeper()) { - fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0."); - } + fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve latest system state version from ZooKeeper. Returning version 0."); return 0; // FIXME "fail-oblivious" is not a good error handling mode for such a critical component! } return version; @@ -395,22 +384,13 @@ public class DatabaseHandler { fleetControllerContext.log(logger, Level.FINE, () -> "Scheduling bundle " + clusterStateBundle + " to be saved to ZooKeeper"); pendingStore.clusterStateBundle = clusterStateBundle; doNextZooKeeperTask(databaseContext); - // FIXME this is a nasty hack to get around the fact that a massive amount of unit tests - // set up the system with a null ZooKeeper server address. If we don't fake that we have - // written the state version, the tests will never progress past waiting for state broadcasts. - if (zooKeeperAddress == null) { - logger.warning(() -> "Simulating ZK write of version " + clusterStateBundle.getVersion() + - ". This should not happen in production!"); - lastKnownStateBundleVersionWrittenBySelf = clusterStateBundle.getVersion(); - } } // TODO should we expand this to cover _any_ pending ZK write? public boolean hasPendingClusterStateMetaDataStore() { synchronized (databaseMonitor) { - return ((zooKeeperAddress != null) && - ((pendingStore.clusterStateBundle != null) || - (pendingStore.lastSystemStateVersion != null))); + return ((pendingStore.clusterStateBundle != null) || + (pendingStore.lastSystemStateVersion != null)); } } @@ -458,11 +438,9 @@ public class DatabaseHandler { } Map<Node, NodeState> wantedStates = currentlyStored.wantedStates; if (wantedStates == null) { - if (usingZooKeeper()) { - // We get here if the ZooKeeper client has lost the connection to ZooKeeper. - // TODO: Should instead fail the tick until connected!? - fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes."); - } + // We get here if the ZooKeeper client has lost connection to ZooKeeper. + // TODO: Should instead fail the tick until connected!? + fleetControllerContext.log(logger, Level.FINE, () -> "Failed to retrieve wanted states from ZooKeeper. Assuming UP for all nodes."); wantedStates = new TreeMap<>(); } boolean altered = false; @@ -510,9 +488,7 @@ public class DatabaseHandler { } Map<Node, Long> startTimestamps = currentlyStored.startTimestamps; if (startTimestamps == null) { - if (usingZooKeeper()) { - fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set."); - } + fleetControllerContext.log(logger, Level.WARNING, "Failed to retrieve start timestamps from ZooKeeper. Cluster state will be bloated with timestamps until we get them set."); startTimestamps = new TreeMap<>(); } for (Map.Entry<Node, Long> e : startTimestamps.entrySet()) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java index 5aae401e157..7a9bea91b9c 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java @@ -228,7 +228,7 @@ public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHa sb.append("<tr><td><nobr>RPC port</nobr></td><td align=\"right\">").append(options.rpcPort() == 0 ? "Pick random available" : options.rpcPort()).append("</td></tr>"); sb.append("<tr><td><nobr>HTTP port</nobr></td><td align=\"right\">").append(options.httpPort() == 0 ? "Pick random available" : options.httpPort()).append("</td></tr>"); sb.append("<tr><td><nobr>Master cooldown period</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.masterZooKeeperCooldownPeriod())).append("</td></tr>"); - String zooKeeperAddress = (options.zooKeeperServerAddress() == null ? "Not using Zookeeper" : splitZooKeeperAddress(options.zooKeeperServerAddress())); + String zooKeeperAddress = splitZooKeeperAddress(options.zooKeeperServerAddress()); sb.append("<tr><td><nobr>Zookeeper server address</nobr></td><td align=\"right\">").append(zooKeeperAddress).append("</td></tr>"); sb.append("<tr><td><nobr>Zookeeper session timeout</nobr></td><td align=\"right\">").append(RealTimer.printDuration(options.zooKeeperSessionTimeout())).append("</td></tr>"); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java index d4eea261767..55e256cf89c 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -32,15 +32,17 @@ public class ClusterFeedBlockTest extends FleetControllerTest { private FleetController ctrl; private DummyCommunicator communicator; - private void initialize(FleetControllerOptions options) throws Exception { + private void initialize(FleetControllerOptions.Builder builder) throws Exception { List<Node> nodes = new ArrayList<>(); - for (int i = 0; i < options.nodes().size(); ++i) { + for (int i = 0; i < builder.nodes().size(); ++i) { nodes.add(new Node(NodeType.STORAGE, i)); nodes.add(new Node(NodeType.DISTRIBUTOR, i)); } - var context = new TestFleetControllerContext(options); communicator = new DummyCommunicator(nodes, timer); + setUpZooKeeperServer(builder); + options = builder.build(); + var context = new TestFleetControllerContext(options); boolean start = false; ctrl = createFleetController(timer, options, context, communicator, communicator, null, start); @@ -57,16 +59,16 @@ public class ClusterFeedBlockTest extends FleetControllerTest { ctrl.tick(); } - private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits, double clusterFeedBlockNoiseLevel) { + private static FleetControllerOptions.Builder createOptions(Map<String, Double> feedBlockLimits, double clusterFeedBlockNoiseLevel) { return defaultOptions() .setStorageDistribution(DistributionBuilder.forFlatCluster(NODE_COUNT)) .setNodes(new HashSet<>(DistributionBuilder.buildConfiguredNodes(NODE_COUNT))) .setClusterFeedBlockEnabled(true) .setClusterFeedBlockLimit(feedBlockLimits) - .setClusterFeedBlockNoiseLevel(clusterFeedBlockNoiseLevel).build(); + .setClusterFeedBlockNoiseLevel(clusterFeedBlockNoiseLevel); } - private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits) { + private static FleetControllerOptions.Builder createOptions(Map<String, Double> feedBlockLimits) { return createOptions(feedBlockLimits, 0.0); } @@ -109,7 +111,7 @@ public class ClusterFeedBlockTest extends FleetControllerTest { assertTrue(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); // Increase cheese allowance. Should now automatically unblock since reported usage is lower. - ctrl.updateOptions(createOptions(mapOf(usage("cheese", 0.9), usage("wine", 0.4)))); + ctrl.updateOptions(createOptions(mapOf(usage("cheese", 0.9), usage("wine", 0.4))).build()); ctrl.tick(); // Options propagation ctrl.tick(); // State recomputation assertFalse(ctrl.getClusterStateBundle().clusterFeedIsBlocked()); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java index 77c89d77ba5..93a96be71a0 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java @@ -6,7 +6,6 @@ import com.yahoo.jrt.Spec; import com.yahoo.jrt.Supervisor; import com.yahoo.jrt.Target; import com.yahoo.jrt.Transport; -import com.yahoo.jrt.slobrok.server.Slobrok; import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.NodeType; @@ -53,7 +52,6 @@ public class MasterElectionTest extends FleetControllerTest { if (zooKeeperServer == null) { zooKeeperServer = new ZooKeeperTestServer(); } - slobrok = new Slobrok(); builder.setZooKeeperSessionTimeout(defaultZkSessionTimeoutInMillis()) .setZooKeeperServerAddress(zooKeeperServer.getAddress()) .setSlobrokConnectionSpecs(getSlobrokConnectionSpecs(slobrok)) diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java deleted file mode 100644 index 3d3a38aacd4..00000000000 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.clustercontroller.core; - -import org.junit.jupiter.api.Test; - -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class NoZooKeeperTest extends FleetControllerTest { - - @Test - void testWantedStatesInZooKeeper() throws Exception { - // Null is the default for zooKeeperServerAddress - FleetControllerOptions.Builder builder = defaultOptions(); - Timer timer = new FakeTimer(); - setUpFleetController(timer, builder); - setUpVdsNodes(timer); - waitForStableSystem(); - - assertTrue(nodes.get(0).isDistributor()); - nodes.get(0).disconnect(); - waitForState("version:\\d+ distributor:10 .0.s:d storage:10"); - - nodes.get(0).connect(); - waitForState("version:\\d+ distributor:10 storage:10"); - } -} |