diff options
author | HÃ¥kon Hallingstad <hakon.hallingstad@gmail.com> | 2023-05-15 10:59:25 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-15 10:59:25 +0200 |
commit | 7e39f8036dbeb3722fca3a50fdc9c86129011cf0 (patch) | |
tree | 8890478b5436815b10e7fd55827511420a15a41b | |
parent | fb6b55dd2a28ec87f63ecea0b68b380ddb89cafd (diff) | |
parent | a2074370aab00d3de6387e0397e8dd7aaf94ac20 (diff) |
Merge pull request #27104 from vespa-engine/hmusum/cluster-controller-cleanup-2a
Hmusum/cluster controller cleanup 2
22 files changed, 387 insertions, 437 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 7a4934fe175..5f2a6daf39e 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -113,7 +113,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta MasterElectionHandler masterElectionHandler, MetricUpdater metricUpdater, FleetControllerOptions options) { - context.log(logger, Level.INFO, "Created"); + context.log(logger, Level.FINE, "Created"); this.context = context; this.timer = timer; this.monitor = timer; @@ -131,19 +131,11 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta this.statusPageServer = new StatusHandler.ContainerStatusPageServer(); this.rpcServer = server; this.masterElectionHandler = masterElectionHandler; - this.statusRequestRouter.addHandler( - "^/node=([a-z]+)\\.(\\d+)$", - new LegacyNodePageRequestHandler(timer, eventLog, cluster)); - this.statusRequestRouter.addHandler( - "^/state.*", - new NodeHealthRequestHandler()); - this.statusRequestRouter.addHandler( - "^/clusterstate", - new ClusterStateRequestHandler(stateVersionTracker)); + this.statusRequestRouter.addHandler(new LegacyNodePageRequestHandler(timer, eventLog, cluster)); + this.statusRequestRouter.addHandler(new NodeHealthRequestHandler()); + this.statusRequestRouter.addHandler(new ClusterStateRequestHandler(stateVersionTracker)); this.indexPageRequestHandler = new LegacyIndexPageRequestHandler(timer, cluster, masterElectionHandler, stateVersionTracker, eventLog, options); - this.statusRequestRouter.addHandler( - "^/$", - indexPageRequestHandler); + this.statusRequestRouter.addHandler(indexPageRequestHandler); propagateOptions(); } @@ -378,6 +370,8 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta } } + public EventLog getEventLog() { return eventLog; } + private boolean maybePublishOldMetrics() { verifyInControllerThread(); if (isMaster() && cycleCount > 300 + lastMetricUpdateCycleCount) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/ClusterStateRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/ClusterStateRequestHandler.java index 0d5cd9d68cf..340e6726c6b 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/ClusterStateRequestHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/ClusterStateRequestHandler.java @@ -23,4 +23,7 @@ public class ClusterStateRequestHandler implements StatusPageServer.RequestHandl return response; } + @Override + public String pattern() { return "^/clusterstate"; } + } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java index 61aa7a79e24..3297d511469 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyIndexPageRequestHandler.java @@ -102,6 +102,9 @@ public class LegacyIndexPageRequestHandler implements StatusPageServer.RequestHa return response; } + @Override + public String pattern() { return "^/$"; } + public void writeHtmlState(StateVersionTracker stateVersionTracker, StringBuilder sb) { sb.append("<h2 id=\"clusterstates\">Cluster states</h2>\n"); writeClusterStates(sb, stateVersionTracker.getVersionedClusterStateBundle()); diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyNodePageRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyNodePageRequestHandler.java index 2e401feb8a8..ae29f50e097 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyNodePageRequestHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/LegacyNodePageRequestHandler.java @@ -59,4 +59,7 @@ public class LegacyNodePageRequestHandler implements StatusPageServer.RequestHan return response; } + @Override + public String pattern() { return "^/node=([a-z]+)\\.(\\d+)$"; } + } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/NodeHealthRequestHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/NodeHealthRequestHandler.java index 6a683ce6c04..aad67e87914 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/NodeHealthRequestHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/NodeHealthRequestHandler.java @@ -23,4 +23,7 @@ public class NodeHealthRequestHandler implements StatusPageServer.RequestHandler return response; } + @Override + public String pattern() { return "^/state.*"; } + } diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StatusHandler.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StatusHandler.java index a8a858b5d31..65b06afb0c5 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StatusHandler.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/StatusHandler.java @@ -98,7 +98,6 @@ public class StatusHandler implements HttpRequestHandler { fleetControllerPath = "/"; } StatusPageServer.HttpRequest req = new StatusPageServer.HttpRequest(fleetControllerPath); - req.setPathPrefix("/clustercontroller-status/v1"); StatusPageResponse response = statusServer.getStatus(req); HttpResult result = new HttpResult(); if (response.getResponseCode() != null) { diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServer.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServer.java index ec5760181e5..7070d754248 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServer.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/status/statuspage/StatusPageServer.java @@ -2,9 +2,7 @@ package com.yahoo.vespa.clustercontroller.core.status.statuspage; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -22,8 +20,6 @@ public class StatusPageServer { */ public static class HttpRequest { private final String request; - private String pathPrefix = ""; - private final Map<String, String> params = new HashMap<String, String>(); private final String path; static Pattern pathPattern; @@ -32,7 +28,7 @@ public class StatusPageServer { // status pages. // If you stare at it for long enough, this sorta looks like one of those // magic eye pictures. - pathPattern = Pattern.compile("^(/([\\w=\\./]+)?)(?:\\?((?:&?\\w+(?:=[\\w\\.]*)?)*))?$"); + pathPattern = Pattern.compile("^(/([\\w=./]+)?)(?:\\?((?:&?\\w+(?:=[\\w.]*)?)*))?$"); } public HttpRequest(String request) { @@ -42,18 +38,8 @@ public class StatusPageServer { throw new IllegalArgumentException("Illegal HTTP request path: " + request); } path = m.group(1); - if (m.group(3) != null) { - String[] rawParams = m.group(3).split("&"); - for (String param : rawParams) { - // Parameter values are optional. - String[] queryParts = param.split("="); - params.put(queryParts[0], queryParts.length > 1 ? queryParts[1] : null); - } - } } - public String getPathPrefix() { return pathPrefix; } - public String toString() { return "HttpRequest(" + request + ")"; } @@ -66,25 +52,11 @@ public class StatusPageServer { return path; } - public boolean hasQueryParameters() { - return !params.isEmpty(); - } - - public String getQueryParameter(String name) { - return params.get(name); - } - - public boolean hasQueryParameter(String name) { - return params.containsKey(name); - } - - public void setPathPrefix(String pathPrefix) { - this.pathPrefix = pathPrefix; - } } public interface RequestHandler { StatusPageResponse handle(HttpRequest request); + String pattern(); } public interface RequestRouter { @@ -114,12 +86,8 @@ public class StatusPageServer { private final List<PatternRouting> patterns = new ArrayList<>(); - public void addHandler(Pattern pattern, RequestHandler handler) { - patterns.add(new PatternRouting(pattern, handler)); - } - - public void addHandler(String pattern, RequestHandler handler) { - addHandler(Pattern.compile(pattern), handler); + public void addHandler(RequestHandler handler) { + patterns.add(new PatternRouting(Pattern.compile(handler.pattern()), handler)); } @Override diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java index f167fbc7231..d4eea261767 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/ClusterFeedBlockTest.java @@ -5,11 +5,9 @@ import com.yahoo.vdslib.state.Node; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; -import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; -import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory; -import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; + import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -29,7 +27,8 @@ public class ClusterFeedBlockTest extends FleetControllerTest { private static final int NODE_COUNT = 3; - // TODO dedupe fixture and setup stuff with other tests + private final Timer timer = new FakeTimer(); + private FleetController ctrl; private DummyCommunicator communicator; @@ -42,16 +41,8 @@ public class ClusterFeedBlockTest extends FleetControllerTest { var context = new TestFleetControllerContext(options); communicator = new DummyCommunicator(nodes, timer); - var metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex(), options.clusterName()); - var eventLog = new EventLog(timer, metricUpdater); - var cluster = new ContentCluster(options); - var stateGatherer = new NodeStateGatherer(timer, timer, eventLog); - var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer); - var stateGenerator = new StateChangeHandler(context, timer, eventLog); - var stateBroadcaster = new SystemStateBroadcaster(context, timer, timer); - var masterElectionHandler = new MasterElectionHandler(context, options.fleetControllerIndex(), options.fleetControllerCount(), timer, timer); - ctrl = new FleetController(context, timer, eventLog, cluster, stateGatherer, communicator, null, communicator, database, - stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options); + boolean start = false; + ctrl = createFleetController(timer, options, context, communicator, communicator, null, start); ctrl.tick(); markAllNodesAsUp(options); @@ -67,7 +58,7 @@ public class ClusterFeedBlockTest extends FleetControllerTest { } private static FleetControllerOptions createOptions(Map<String, Double> feedBlockLimits, double clusterFeedBlockNoiseLevel) { - return defaultOptions("mycluster") + return defaultOptions() .setStorageDistribution(DistributionBuilder.forFlatCluster(NODE_COUNT)) .setNodes(new HashSet<>(DistributionBuilder.buildConfiguredNodes(NODE_COUNT))) .setClusterFeedBlockEnabled(true) diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java index a66294851b8..a210f219b08 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java @@ -35,11 +35,11 @@ public class DatabaseTest extends FleetControllerTest { @Test void testWantedStatesInZooKeeper() throws Exception { - startingTest("DatabaseTest::testWantedStatesInZooKeeper"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster"); + FleetControllerOptions.Builder builder = defaultOptions(); builder.setZooKeeperServerAddress("127.0.0.1"); - setUpFleetController(true, builder); - setUpVdsNodes(true); + Timer timer = new FakeTimer(); + setUpFleetController(timer, builder); + setUpVdsNodes(timer); log.info("WAITING FOR STABLE SYSTEM"); waitForStableSystem(); @@ -68,7 +68,8 @@ public class DatabaseTest extends FleetControllerTest { log.info("CHECK THAT WANTED STATES PERSIST FLEETCONTROLLER RESTART"); stopFleetController(); - startFleetController(false); + timer = new RealTimer(); + startFleetController(timer); waitForState("version:\\d+ distributor:10 .2.s:d storage:10 .3.s:m .7.s:r"); assertWantedStates(wantedStates); @@ -89,13 +90,13 @@ public class DatabaseTest extends FleetControllerTest { @Test void testWantedStateOfUnknownNode() throws Exception { - startingTest("DatabaseTest::testWantedStatesOfUnknownNode"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster") + FleetControllerOptions.Builder builder = defaultOptions() .setMinRatioOfDistributorNodesUp(0) .setMinRatioOfStorageNodesUp(0) .setZooKeeperServerAddress("localhost"); - setUpFleetController(true, builder); - setUpVdsNodes(true); + Timer timer = new FakeTimer(); + setUpFleetController(timer, builder); + setUpVdsNodes(timer); waitForStableSystem(); // Populate map of wanted states we should have @@ -132,7 +133,8 @@ public class DatabaseTest extends FleetControllerTest { stopFleetController(); for (int i = 6; i < nodes.size(); ++i) nodes.get(i).disconnect(); - startFleetController(false); + timer = new RealTimer(); + startFleetController(timer); waitForState("version:\\d+ distributor:3 storage:7 .1.s:m .3.s:d .4.s:d .5.s:d .6.s:m"); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java index cec23930ea8..95b9d13cad5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DistributionBitCountTest.java @@ -17,16 +17,16 @@ import static org.junit.jupiter.api.Assertions.assertEquals; @ExtendWith(CleanupZookeeperLogsOnSuccess.class) public class DistributionBitCountTest extends FleetControllerTest { - private FleetControllerOptions setUpSystem(String testName) throws Exception { + private FleetControllerOptions setUpSystem() throws Exception { List<ConfiguredNode> configuredNodes = new ArrayList<>(); for (int i = 0 ; i < 10; i++) { configuredNodes.add(new ConfiguredNode(i, false)); } var builder = defaultOptions("mycluster", configuredNodes); builder.setDistributionBits(17); - setUpFleetController(false, builder); - startingTest(testName); - List<DummyVdsNode> nodes = setUpVdsNodes(false, true, configuredNodes); + Timer timer = new RealTimer(); + setUpFleetController(timer, builder); + List<DummyVdsNode> nodes = setUpVdsNodes(timer, true, configuredNodes); for (DummyVdsNode node : nodes) { node.setNodeState(new NodeState(node.getType(), State.UP).setMinUsedBits(20)); node.connect(); @@ -41,7 +41,7 @@ public class DistributionBitCountTest extends FleetControllerTest { */ @Test void testDistributionBitCountConfigIncrease() throws Exception { - var options = setUpSystem("DistributionBitCountTest::testDistributionBitCountConfigIncrease"); + var options = setUpSystem(); var builder = FleetControllerOptions.Builder.copy(options); builder.setDistributionBits(20); fleetController().updateOptions(builder.build()); @@ -58,7 +58,7 @@ public class DistributionBitCountTest extends FleetControllerTest { */ @Test void testDistributionBitCountConfigDecrease() throws Exception { - FleetControllerOptions options = setUpSystem("DistributionBitCountTest::testDistributionBitCountConfigDecrease"); + FleetControllerOptions options = setUpSystem(); var builder = FleetControllerOptions.Builder.copy(options); builder.setDistributionBits(12); fleetController().updateOptions(builder.build()); @@ -68,13 +68,13 @@ public class DistributionBitCountTest extends FleetControllerTest { /** * Test that when storage node reports higher bit count, but another storage * node has equally low bitcount, the fleetcontroller does nothing. - * + * <p * Test that when storage node reports higher bit count, but another storage * node now being lowest, the fleetcontroller adjusts to use that bit in system state. */ @Test void testStorageNodeReportingHigherBitCount() throws Exception { - setUpSystem("DistributionBitCountTest::testStorageNodeReportingHigherBitCount"); + setUpSystem(); nodes.get(1).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(11)); nodes.get(3).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(11)); @@ -97,7 +97,7 @@ public class DistributionBitCountTest extends FleetControllerTest { */ @Test void testStorageNodeReportingLowerBitCount() throws Exception { - setUpSystem("DistributionBitCountTest::testStorageNodeReportingLowerBitCount"); + setUpSystem(); nodes.get(1).setNodeState(new NodeState(NodeType.STORAGE, State.UP).setMinUsedBits(13)); ClusterState currentState = waitForState("version:\\d+ bits:13 distributor:10 storage:10"); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index 6abd5dae23f..238dfd42da5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; +import com.yahoo.jrt.ListenFailedException; import com.yahoo.jrt.Request; import com.yahoo.jrt.Spec; import com.yahoo.jrt.StringValue; @@ -50,54 +51,44 @@ public abstract class FleetControllerTest implements Waiter { private static final int DEFAULT_NODE_COUNT = 10; private final Duration timeout = Duration.ofSeconds(30); - protected final FakeTimer timer = new FakeTimer(); - protected Slobrok slobrok; + protected FleetControllerOptions options; ZooKeeperTestServer zooKeeperServer; protected final List<FleetController> fleetControllers = new ArrayList<>(); protected List<DummyVdsNode> nodes = new ArrayList<>(); - private String testName; - - private final Waiter waiter = new Waiter.Impl(new DataRetriever() { - @Override - public Object getMonitor() { return timer; } - @Override - public FleetController getFleetController() { return fleetController(); } - @Override - public List<DummyVdsNode> getDummyNodes() { return nodes; } - @Override - public Duration getTimeout() { return timeout; } - }); + private Waiter waiter; - static { - LogSetup.initVespaLogging("fleetcontroller"); + FleetControllerTest() { + try { + slobrok = new Slobrok(); + } catch (ListenFailedException e) { + throw new RuntimeException(e); + } } - protected void startingTest(String name) { - System.err.println("STARTING TEST: " + name); - testName = name; + static { + LogSetup.initVespaLogging("fleetcontroller"); } - static protected FleetControllerOptions.Builder defaultOptions(String clusterName) { - return defaultOptions(clusterName, DEFAULT_NODE_COUNT); + protected static FleetControllerOptions.Builder defaultOptions() { + return defaultOptions(DEFAULT_NODE_COUNT); } - static protected FleetControllerOptions.Builder defaultOptions(String clusterName, int nodeCount) { - return defaultOptions(clusterName, IntStream.range(0, nodeCount) + protected static FleetControllerOptions.Builder defaultOptions(int nodeCount) { + return defaultOptions("mycluster", IntStream.range(0, nodeCount) .mapToObj(i -> new ConfiguredNode(i, false)) .collect(Collectors.toSet())); } - static protected FleetControllerOptions.Builder defaultOptions(String clusterName, Collection<ConfiguredNode> nodes) { + protected static FleetControllerOptions.Builder defaultOptions(String clusterName, Collection<ConfiguredNode> nodes) { var builder = new FleetControllerOptions.Builder(clusterName, nodes); builder.enableTwoPhaseClusterStateActivation(true); // Enable by default, tests can explicitly disable. return builder; } - void setUpSystem(FleetControllerOptions.Builder builder) throws Exception { + private void setUpSystem(FleetControllerOptions.Builder builder) throws Exception { log.log(Level.FINE, "Setting up system"); - slobrok = new Slobrok(); if (builder.zooKeeperServerAddress() != null) { zooKeeperServer = new ZooKeeperTestServer(); // Need to set zookeeper address again, as port number is not known until ZooKeeperTestServer has been created @@ -105,16 +96,10 @@ public abstract class FleetControllerTest implements Waiter { log.log(Level.FINE, "Set up new zookeeper server at " + zooKeeperServer.getAddress()); } builder.setSlobrokConnectionSpecs(getSlobrokConnectionSpecs(slobrok)); - this.options = builder.build(); } - FleetController createFleetController(boolean useFakeTimer, FleetControllerOptions options) throws Exception { + FleetController createFleetController(Timer timer, FleetControllerOptions options) { var context = new TestFleetControllerContext(options); - Timer timer = useFakeTimer ? this.timer : new RealTimer(); - var metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex(), options.clusterName()); - var log = new EventLog(timer, metricUpdater); - var cluster = new ContentCluster(options.clusterName(), options.nodes(), options.storageDistribution()); - var stateGatherer = new NodeStateGatherer(timer, timer, log); var communicator = new RPCCommunicator( RPCCommunicator.createRealSupervisor(), timer, @@ -125,8 +110,22 @@ public abstract class FleetControllerTest implements Waiter { options.nodeStateRequestRoundTripTimeMaxSeconds()); var lookUp = new SlobrokClient(context, timer, new String[0]); var rpcServer = new RpcServer(timer, options.clusterName(), options.fleetControllerIndex()); - var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer); + return createFleetController(timer, options, context, communicator, lookUp, rpcServer, true); + } + FleetController createFleetController(Timer timer, + FleetControllerOptions options, + TestFleetControllerContext context, + Communicator communicator, + NodeLookup nodeLookup, + RpcServer rpcServer, + boolean start) { + waiter = createWaiter(timer); + var metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex(), options.clusterName()); + var log = new EventLog(timer, metricUpdater); + var cluster = new ContentCluster(options.clusterName(), options.nodes(), options.storageDistribution()); + var stateGatherer = new NodeStateGatherer(timer, timer, log); + var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer); // Setting this <1000 ms causes ECONNREFUSED on socket trying to connect to ZK server, in ZooKeeper, // after creating a new ZooKeeper (session). This causes ~10s extra time to connect after connection loss. // Reasons unknown. Larger values like the default 10_000 causes that much additional running time for some tests. @@ -136,16 +135,17 @@ public abstract class FleetControllerTest implements Waiter { var stateBroadcaster = new SystemStateBroadcaster(context, timer, timer); var masterElectionHandler = new MasterElectionHandler(context, options.fleetControllerIndex(), options.fleetControllerCount(), timer, timer); - var controller = new FleetController(context, timer, log, cluster, stateGatherer, communicator, rpcServer, lookUp, + var controller = new FleetController(context, timer, log, cluster, stateGatherer, communicator, rpcServer, nodeLookup, database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options); - controller.start(); + if (start) + controller.start(); return controller; } - protected FleetControllerOptions setUpFleetController(boolean useFakeTimer, FleetControllerOptions.Builder builder) throws Exception { - if (slobrok == null) setUpSystem(builder); + protected FleetControllerOptions setUpFleetController(Timer timer, FleetControllerOptions.Builder builder) throws Exception { + setUpSystem(builder); options = builder.build(); - startFleetController(useFakeTimer); + startFleetController(timer); return options; } @@ -160,39 +160,38 @@ public abstract class FleetControllerTest implements Waiter { fleetControllers.clear(); } - void startFleetController(boolean useFakeTimer) throws Exception { + void startFleetController(Timer timer) { if ( ! fleetControllers.isEmpty()) throw new IllegalStateException("already started fleetcontroller, not starting another"); - fleetControllers.add(createFleetController(useFakeTimer, options)); + fleetControllers.add(createFleetController(timer, options)); } - protected void setUpVdsNodes(boolean useFakeTimer) throws Exception { - setUpVdsNodes(useFakeTimer, false); + protected void setUpVdsNodes(Timer timer) throws Exception { + setUpVdsNodes(timer, false); } - protected void setUpVdsNodes(boolean useFakeTimer, boolean startDisconnected) throws Exception { - setUpVdsNodes(useFakeTimer, startDisconnected, DEFAULT_NODE_COUNT); + protected void setUpVdsNodes(Timer timer, boolean startDisconnected) throws Exception { + setUpVdsNodes(timer, startDisconnected, DEFAULT_NODE_COUNT); } - protected void setUpVdsNodes(boolean useFakeTimer, boolean startDisconnected, int nodeCount) throws Exception { + protected void setUpVdsNodes(Timer timer, boolean startDisconnected, int nodeCount) throws Exception { TreeSet<Integer> nodeIndexes = new TreeSet<>(); for (int i = 0; i < nodeCount; ++i) nodeIndexes.add(this.nodes.size()/2 + i); // divide by 2 because there are 2 nodes (storage and distributor) per index - setUpVdsNodes(useFakeTimer, startDisconnected, nodeIndexes); + setUpVdsNodes(timer, startDisconnected, nodeIndexes); } - protected void setUpVdsNodes(boolean useFakeTimer, boolean startDisconnected, Set<Integer> nodeIndexes) throws Exception { + protected void setUpVdsNodes(Timer timer, boolean startDisconnected, Set<Integer> nodeIndexes) throws Exception { for (int nodeIndex : nodeIndexes) { - nodes.add(createNode(useFakeTimer, startDisconnected, DISTRIBUTOR, nodeIndex)); - nodes.add(createNode(useFakeTimer, startDisconnected, STORAGE, nodeIndex)); + nodes.add(createNode(timer, startDisconnected, DISTRIBUTOR, nodeIndex)); + nodes.add(createNode(timer, startDisconnected, STORAGE, nodeIndex)); } } - private DummyVdsNode createNode(boolean useFakeTimer, boolean startDisconnected, + private DummyVdsNode createNode(Timer timer, boolean startDisconnected, NodeType nodeType, int nodeIndex) throws Exception { String[] connectionSpecs = getSlobrokConnectionSpecs(slobrok); - DummyVdsNode node = new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), connectionSpecs, - options.clusterName(), nodeType, nodeIndex); + DummyVdsNode node = new DummyVdsNode(timer, connectionSpecs, options.clusterName(), nodeType, nodeIndex); if ( ! startDisconnected) node.connect(); return node; @@ -205,11 +204,11 @@ public abstract class FleetControllerTest implements Waiter { * As two dummy nodes are created for each configured node - one distributor and one storage node - * the returned list is twice as large as configuredNodes. */ - protected List<DummyVdsNode> setUpVdsNodes(boolean useFakeTimer, boolean startDisconnected, List<ConfiguredNode> configuredNodes) throws Exception { + protected List<DummyVdsNode> setUpVdsNodes(Timer timer, boolean startDisconnected, List<ConfiguredNode> configuredNodes) throws Exception { nodes = new ArrayList<>(); for (ConfiguredNode configuredNode : configuredNodes) { - nodes.add(createNode(useFakeTimer, startDisconnected, DISTRIBUTOR, configuredNode.index())); - nodes.add(createNode(useFakeTimer, startDisconnected, STORAGE, configuredNode.index())); + nodes.add(createNode(timer, startDisconnected, DISTRIBUTOR, configuredNode.index())); + nodes.add(createNode(timer, startDisconnected, STORAGE, configuredNode.index())); } return nodes; } @@ -222,7 +221,7 @@ public abstract class FleetControllerTest implements Waiter { return indices.stream().map(idx -> new ConfiguredNode(idx, false)).collect(Collectors.toSet()); } - void waitForStateExcludingNodeSubset(String expectedState, Set<Integer> excludedNodes) throws Exception { + void waitForStateExcludingNodeSubset(String expectedState, Set<Integer> excludedNodes, Timer timer) throws Exception { // Due to the implementation details of the test base, this.waitForState() will always // wait until all nodes added in the test have received the latest cluster state. Since we // want to entirely ignore node #6, it won't get a cluster state at all and the test will @@ -248,11 +247,6 @@ public abstract class FleetControllerTest implements Waiter { @AfterEach public void tearDown() { - if (testName != null) { - //log.log(Level.INFO, "STOPPING TEST " + testName); - System.err.println("STOPPING TEST " + testName); - testName = null; - } fleetControllers.forEach(f -> { try { f.shutdown(); @@ -265,10 +259,7 @@ public abstract class FleetControllerTest implements Waiter { node.shutdown(); nodes = null; } - if (slobrok != null) { - slobrok.stop(); - slobrok = null; - } + slobrok.stop(); } public ClusterState waitForStableSystem() throws Exception { return waiter.waitForStableSystem(); } @@ -288,12 +279,6 @@ public abstract class FleetControllerTest implements Waiter { fleetController().waitForCompleteCycle(timeout); } - public static Set<ConfiguredNode> toNodes(Integer ... indexes) { - return Arrays.stream(indexes) - .map(i -> new ConfiguredNode(i, false)) - .collect(Collectors.toSet()); - } - void setWantedState(DummyVdsNode node, State state, String reason, Supervisor supervisor) { setWantedState(new NodeState(node.getType(), state), reason, node.getSlobrokName(), supervisor); } @@ -324,4 +309,21 @@ public abstract class FleetControllerTest implements Waiter { Duration timeout() { return timeout; } + // Note: This should use the same timer as the fleet controller as monitor + private Impl createWaiter(Timer timer) { + return new Impl(new DataRetriever() { + @Override + public Object getMonitor() { return timer; } + + @Override + public FleetController getFleetController() { return fleetController(); } + + @Override + public List<DummyVdsNode> getDummyNodes() { return nodes; } + + @Override + public Duration getTimeout() { return timeout; } + }); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java index a498f26fb7d..3a6dedafad0 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/GroupAutoTakedownLiveConfigTest.java @@ -12,8 +12,10 @@ import static org.junit.jupiter.api.Assertions.assertFalse; @ExtendWith(CleanupZookeeperLogsOnSuccess.class) public class GroupAutoTakedownLiveConfigTest extends FleetControllerTest { + private final Timer timer = new FakeTimer(); + private static FleetControllerOptions.Builder createOptions(DistributionBuilder.GroupBuilder groupBuilder, double minNodeRatio) { - return defaultOptions("mycluster") + return defaultOptions() .setStorageDistribution(DistributionBuilder.forHierarchicCluster(groupBuilder)) .setNodes(new HashSet<>(DistributionBuilder.buildConfiguredNodes(groupBuilder.totalNodeCount()))) .setMinNodeRatioPerGroup(minNodeRatio) @@ -41,8 +43,8 @@ public class GroupAutoTakedownLiveConfigTest extends FleetControllerTest { private FleetControllerOptions setUp3x3ClusterWithMinNodeRatio(double minNodeRatio) throws Exception { FleetControllerOptions.Builder options = createOptions(DistributionBuilder.withGroups(3).eachWithNodeCount(3), minNodeRatio); - setUpFleetController(true, options); - setUpVdsNodes(true, false, 9); + setUpFleetController(timer, options); + setUpVdsNodes(timer, false, 9); waitForState("version:\\d+ distributor:9 storage:9"); return options.build(); } @@ -59,7 +61,7 @@ public class GroupAutoTakedownLiveConfigTest extends FleetControllerTest { void bootstrap_min_ratio_option_is_propagated_to_group_availability_logic() throws Exception { setUp3x3ClusterWithMinNodeRatio(0.67); takeDownContentNode(0); - waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .0.s:d .1.s:d .2.s:d", asIntSet(0)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .0.s:d .1.s:d .2.s:d", asIntSet(0), timer); } @Test @@ -67,14 +69,14 @@ public class GroupAutoTakedownLiveConfigTest extends FleetControllerTest { // Initially, arbitrarily many nodes may be down in a group. var options = setUp3x3ClusterWithMinNodeRatio(0.0); takeDownContentNode(3); - waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d", asIntSet(3)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d", asIntSet(3), timer); reconfigureWithMinNodeRatio(options, 0.67); - waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d .4.s:d .5.s:d", asIntSet(3)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d .4.s:d .5.s:d", asIntSet(3), timer); reconfigureWithMinNodeRatio(options, 0.0); // Aaaand back up again! - waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d", asIntSet(3)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .3.s:d", asIntSet(3), timer); } @Test @@ -83,10 +85,10 @@ public class GroupAutoTakedownLiveConfigTest extends FleetControllerTest { takeDownContentNode(6); // Not enough nodes down to trigger group take-down yet - waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .6.s:d", asIntSet(6)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:9 storage:9 .6.s:d", asIntSet(6), timer); // Removing a node from the same group as node 6 will dip it under the configured threshold, // taking down the entire group. In this case we configure out node 8. reconfigureWithDistribution(options, DistributionBuilder.withGroupNodes(3, 3, 2)); - waitForStateExcludingNodeSubset("version:\\d+ distributor:8 storage:6", asIntSet(6, 8)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:8 storage:6", asIntSet(6, 8), timer); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java index 941ec6e23f3..77c89d77ba5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java @@ -49,7 +49,7 @@ public class MasterElectionTest extends FleetControllerTest { supervisor.transport().shutdown().join(); } - protected void setUpFleetControllers(int count, boolean useFakeTimer, FleetControllerOptions.Builder builder) throws Exception { + protected void setUpFleetControllers(int count, Timer timer, FleetControllerOptions.Builder builder) throws Exception { if (zooKeeperServer == null) { zooKeeperServer = new ZooKeeperTestServer(); } @@ -62,7 +62,7 @@ public class MasterElectionTest extends FleetControllerTest { for (int i = 0; i < count; ++i) { FleetControllerOptions.Builder b = FleetControllerOptions.Builder.copy(options); b.setIndex(i); - fleetControllers.add(createFleetController(useFakeTimer, b.build())); + fleetControllers.add(createFleetController(timer, b.build())); } } @@ -100,12 +100,10 @@ public class MasterElectionTest extends FleetControllerTest { @Test void testMasterElection() throws Exception { - startingTest("MasterElectionTest::testMasterElection"); - log.log(Level.INFO, "STARTING TEST: MasterElectionTest::testMasterElection()"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster"); + FleetControllerOptions.Builder builder = defaultOptions(); builder.setMasterZooKeeperCooldownPeriod(100); - boolean usingFakeTimer = false; - setUpFleetControllers(3, usingFakeTimer, builder); + Timer timer = new RealTimer(); + setUpFleetControllers(3, timer, builder); waitForMaster(0); log.log(Level.INFO, "SHUTTING DOWN FLEET CONTROLLER 0"); fleetControllers.get(0).shutdown(); @@ -120,20 +118,18 @@ public class MasterElectionTest extends FleetControllerTest { } log.log(Level.INFO, "STARTING FLEET CONTROLLER 1"); - fleetControllers.set(1, createFleetController(usingFakeTimer, fleetControllers.get(1).getOptions())); + fleetControllers.set(1, createFleetController(timer, fleetControllers.get(1).getOptions())); waitForMaster(1); log.log(Level.INFO, "STARTING FLEET CONTROLLER 0"); - fleetControllers.set(0, createFleetController(usingFakeTimer, fleetControllers.get(0).getOptions())); + fleetControllers.set(0, createFleetController(timer, fleetControllers.get(0).getOptions())); waitForMaster(0); } @Test void testMasterElectionWith5FleetControllers() throws Exception { - startingTest("MasterElectionTest::testMasterElectionWith5FleetControllers"); - log.log(Level.INFO, "STARTING TEST: MasterElectionTest::testMasterElectionWith5FleetControllers()"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster"); - boolean usingFakeTimer = false; - setUpFleetControllers(5, usingFakeTimer, builder); + FleetControllerOptions.Builder builder = defaultOptions(); + RealTimer timer = new RealTimer(); + setUpFleetControllers(5, timer, builder); waitForMaster(0); } @@ -143,7 +139,6 @@ public class MasterElectionTest extends FleetControllerTest { for (int i = 0; i < timeout().toMillis(); i += 100) { if (!fleetControllers.get(master).isMaster()) { log.log(Level.INFO, "Node " + master + " is not master yet, sleeping more"); - timer.advanceTime(100); waitForCompleteCycle(master); } else { log.log(Level.INFO, "Node " + master + " is master. Checking that no one else is master"); @@ -195,13 +190,13 @@ public class MasterElectionTest extends FleetControllerTest { @Test void testClusterStateVersionIncreasesAcrossMasterElections() throws Exception { - startingTest("MasterElectionTest::testClusterStateVersionIncreasesAcrossMasterElections"); - FleetControllerOptions.Builder options = defaultOptions("mycluster"); + FleetControllerOptions.Builder options = defaultOptions(); options.setMasterZooKeeperCooldownPeriod(1); - setUpFleetControllers(3, false, options); + Timer timer = new RealTimer(); + setUpFleetControllers(3, timer, options); // Currently need to have content nodes present for the cluster controller to even bother // attempting to persisting its cluster state version to ZK. - setUpVdsNodes(false); + setUpVdsNodes(timer); waitForStableSystem(); waitForMaster(0); Stream.of(0, 1, 2).forEach(this::waitForCompleteCycle); @@ -215,12 +210,12 @@ public class MasterElectionTest extends FleetControllerTest { @Test void testVotingCorrectnessInFaceOfZKDisconnect() throws Exception { - startingTest("MasterElectionTest::testVotingCorrectnessInFaceOfZKDisconnect"); - FleetControllerOptions.Builder options = defaultOptions("mycluster"); + FleetControllerOptions.Builder options = defaultOptions(); // "Magic" port value is in range allocated to module for testing. zooKeeperServer = ZooKeeperTestServer.createWithFixedPort(18342); options.setMasterZooKeeperCooldownPeriod(100); - setUpFleetControllers(2, false, options); + Timer timer = new RealTimer(); + setUpFleetControllers(2, timer, options); waitForMaster(0); zooKeeperServer.shutdown(true); @@ -236,11 +231,11 @@ public class MasterElectionTest extends FleetControllerTest { @Test void testZooKeeperUnavailable() throws Exception { - startingTest("MasterElectionTest::testZooKeeperUnavailable"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster") + FleetControllerOptions.Builder builder = defaultOptions() .setMasterZooKeeperCooldownPeriod(100) .setZooKeeperServerAddress("localhost"); - setUpFleetControllers(3, false, builder); + Timer timer = new RealTimer(); + setUpFleetControllers(3, timer, builder); waitForMaster(0); log.log(Level.INFO, "STOPPING ZOOKEEPER SERVER AT " + zooKeeperServer.getAddress()); @@ -269,10 +264,10 @@ public class MasterElectionTest extends FleetControllerTest { @Test @Disabled("Unstable, disable test, as functionality is not deemed critical") void testMasterZooKeeperCooldown() throws Exception { - startingTest("MasterElectionTest::testMasterZooKeeperCooldown"); - FleetControllerOptions.Builder options = defaultOptions("mycluster"); + FleetControllerOptions.Builder options = defaultOptions(); options.setMasterZooKeeperCooldownPeriod(3600 * 1000); // An hour - setUpFleetControllers(3, true, options); + FakeTimer timer = new FakeTimer(); + setUpFleetControllers(3, timer, options); waitForMaster(0); timer.advanceTime(24 * 3600 * 1000); // A day waitForCompleteCycle(1); @@ -317,10 +312,10 @@ public class MasterElectionTest extends FleetControllerTest { @Test void testGetMaster() throws Exception { - startingTest("MasterElectionTest::testGetMaster"); - FleetControllerOptions.Builder options = defaultOptions("mycluster"); + FleetControllerOptions.Builder options = defaultOptions(); options.setMasterZooKeeperCooldownPeriod(3600 * 1000); // An hour - setUpFleetControllers(3, true, options); + FakeTimer timer = new FakeTimer(); + setUpFleetControllers(3, timer, options); waitForMaster(0); List<Target> connections = new ArrayList<>(); @@ -398,10 +393,10 @@ public class MasterElectionTest extends FleetControllerTest { @Test void testReconfigure() throws Exception { - startingTest("MasterElectionTest::testReconfigure"); - FleetControllerOptions.Builder options = defaultOptions("mycluster"); + FleetControllerOptions.Builder options = defaultOptions(); options.setMasterZooKeeperCooldownPeriod(1); - setUpFleetControllers(3, false, options); + Timer timer = new RealTimer(); + setUpFleetControllers(3, timer, options); waitForMaster(0); FleetControllerOptions newOptions = FleetControllerOptions.Builder.copy(options.build()).build(); @@ -423,15 +418,15 @@ public class MasterElectionTest extends FleetControllerTest { */ @Test void cluster_state_version_written_to_zookeeper_even_with_empty_send_set() throws Exception { - startingTest("MasterElectionTest::cluster_state_version_written_to_zookeeper_even_with_empty_send_set"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster") + FleetControllerOptions.Builder builder = defaultOptions() .setMasterZooKeeperCooldownPeriod(1) .setMinRatioOfDistributorNodesUp(0) .setMinRatioOfStorageNodesUp(0) .setMinDistributorNodesUp(0) .setMinStorageNodesUp(1); - setUpFleetControllers(3, false, builder); - setUpVdsNodes(false); + Timer timer = new RealTimer(); + setUpFleetControllers(3, timer, builder); + setUpVdsNodes(timer); waitForStableSystem(); waitForMaster(0); @@ -468,14 +463,13 @@ public class MasterElectionTest extends FleetControllerTest { @Test void previously_published_state_is_taken_into_account_for_default_space_when_controller_bootstraps() throws Exception { - startingTest("MasterElectionTest::previously_published_state_is_taken_into_account_for_default_space_when_controller_bootstraps"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster") + FleetControllerOptions.Builder builder = defaultOptions() .setClusterHasGlobalDocumentTypes(true) .setMasterZooKeeperCooldownPeriod(1) .setMinTimeBeforeFirstSystemStateBroadcast(100000); - boolean useFakeTimer = false; - setUpFleetControllers(3, useFakeTimer, builder); - setUpVdsNodes(false); + Timer timer = new RealTimer(); + setUpFleetControllers(3, timer, builder); + setUpVdsNodes(timer); waitForMaster(0); waitForStableSystem(); log.info("Waiting for full maintenance mode in default space"); @@ -499,7 +493,7 @@ public class MasterElectionTest extends FleetControllerTest { waitForMaster(1); waitForCompleteCycle(1); - fleetControllers.set(0, createFleetController(useFakeTimer, fleetControllers.get(0).getOptions())); + fleetControllers.set(0, createFleetController(timer, fleetControllers.get(0).getOptions())); waitForMaster(0); waitForCompleteCycle(0); @@ -511,13 +505,13 @@ public class MasterElectionTest extends FleetControllerTest { @Test void default_space_nodes_not_marked_as_maintenance_when_cluster_has_no_global_document_types() throws Exception { - startingTest("MasterElectionTest::default_space_nodes_not_marked_as_maintenance_when_cluster_has_no_global_document_types"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster") + FleetControllerOptions.Builder builder = defaultOptions() .setClusterHasGlobalDocumentTypes(false) .setMasterZooKeeperCooldownPeriod(1) .setMinTimeBeforeFirstSystemStateBroadcast(100000); - setUpFleetControllers(3, false, builder); - setUpVdsNodes(false); + Timer timer = new RealTimer(); + setUpFleetControllers(3, timer, builder); + setUpVdsNodes(timer); waitForMaster(0); waitForStableSystem(); waitForStateInAllSpaces("version:\\d+ distributor:10 storage:10"); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java index 483c92d4861..3d3a38aacd4 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NoZooKeeperTest.java @@ -9,11 +9,11 @@ public class NoZooKeeperTest extends FleetControllerTest { @Test void testWantedStatesInZooKeeper() throws Exception { - startingTest("NoZooKeeperTest::testWantedStatesInZooKeeper"); // Null is the default for zooKeeperServerAddress - FleetControllerOptions.Builder builder = defaultOptions("mycluster"); - setUpFleetController(true, builder); - setUpVdsNodes(true); + FleetControllerOptions.Builder builder = defaultOptions(); + Timer timer = new FakeTimer(); + setUpFleetController(timer, builder); + setUpVdsNodes(timer); waitForStableSystem(); assertTrue(nodes.get(0).isDistributor()); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java index 4ca090b4947..2c77767d6b4 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/NodeSlobrokConfigurationMembershipTest.java @@ -15,16 +15,17 @@ import static org.junit.jupiter.api.Assertions.assertTrue; @Timeout(30) public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest { + private final Timer timer = new FakeTimer(); private final Set<Integer> nodeIndices = asIntSet(0, 1, 2, 3); private final int foreignNodeIndex = 6; private FleetControllerOptions setUpClusterWithForeignNode(Set<Integer> validIndices) throws Exception { Set<ConfiguredNode> configuredNodes = asConfiguredNodes(validIndices); FleetControllerOptions.Builder options = optionsForConfiguredNodes(configuredNodes); - setUpFleetController(true, options); + setUpFleetController(timer, options); Set<Integer> nodesWithStranger = new TreeSet<>(validIndices); nodesWithStranger.add(foreignNodeIndex); - setUpVdsNodes(true, false, nodesWithStranger); + setUpVdsNodes(timer, false, nodesWithStranger); return options.build(); } @@ -39,13 +40,13 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest @Test void testSlobrokNodeOutsideConfiguredIndexSetIsNotIncludedInCluster() throws Exception { setUpClusterWithForeignNode(nodeIndices); - waitForStateExcludingNodeSubset("version:\\d+ distributor:4 storage:4", asIntSet(foreignNodeIndex)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:4 storage:4", asIntSet(foreignNodeIndex), timer); } @Test void testNodeSetReconfigurationForcesFreshSlobrokFetch() throws Exception { var options = setUpClusterWithForeignNode(nodeIndices); - waitForStateExcludingNodeSubset("version:\\d+ distributor:4 storage:4", asIntSet(foreignNodeIndex)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:4 storage:4", asIntSet(foreignNodeIndex), timer); // If we get a configuration with the node present, we have to accept it into // cluster. If we do not re-fetch state from slobrok we risk racing @@ -63,8 +64,8 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest void test_removed_retired_node_is_not_included_in_state() throws Exception { Set<ConfiguredNode> configuredNodes = asConfiguredNodes(nodeIndices); FleetControllerOptions.Builder builder = optionsForConfiguredNodes(configuredNodes); - options = setUpFleetController(true, builder); - setUpVdsNodes(true, false, nodeIndices); + options = setUpFleetController(timer, builder); + setUpVdsNodes(timer, false, nodeIndices); waitForState("version:\\d+ distributor:4 storage:4"); @@ -89,11 +90,11 @@ public class NodeSlobrokConfigurationMembershipTest extends FleetControllerTest // The previously retired node should now be marked as down, as it no longer // exists from the point of view of the content cluster. We have to use a subset // state waiter, as the controller will not send the new state to node 0. - waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d", asIntSet(0)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d", asIntSet(0), timer); // Ensure it remains down for subsequent cluster state versions as well. nodes.get(3).disconnect(); - waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d .1.s:d", asIntSet(0, 1)); + waitForStateExcludingNodeSubset("version:\\d+ distributor:4 .0.s:d storage:4 .0.s:d .1.s:d", asIntSet(0, 1), timer); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java index 995ece46779..cc9c3f84de3 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java @@ -46,6 +46,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; public class RpcServerTest extends FleetControllerTest { public static Logger log = Logger.getLogger(RpcServerTest.class.getName()); + private final FakeTimer timer = new FakeTimer(); private Supervisor supervisor; @@ -62,10 +63,9 @@ public class RpcServerTest extends FleetControllerTest { @Test void testGetSystemState() throws Exception { LogFormatter.initializeLogging(); - startingTest("RpcServerTest::testGetSystemState"); - FleetControllerOptions.Builder options = defaultOptions("mycluster"); - setUpFleetController(true, options); - setUpVdsNodes(true); + FleetControllerOptions.Builder options = defaultOptions(); + setUpFleetController(timer, options); + setUpVdsNodes(timer); waitForStableSystem(); assertTrue(nodes.get(0).isDistributor()); @@ -128,7 +128,6 @@ public class RpcServerTest extends FleetControllerTest { @Test void testGetNodeState() throws Exception { - startingTest("RpcServerTest::testGetNodeState"); Set<ConfiguredNode> configuredNodes = new TreeSet<>(); for (int i = 0; i < 10; i++) configuredNodes.add(new ConfiguredNode(i, false)); @@ -136,8 +135,8 @@ public class RpcServerTest extends FleetControllerTest { builder.setMinRatioOfStorageNodesUp(0); builder.setMaxInitProgressTime(30000); builder.setStableStateTimePeriod(60000); - setUpFleetController(true, builder); - setUpVdsNodes(true); + setUpFleetController(timer, builder); + setUpVdsNodes(timer); waitForStableSystem(); setWantedNodeState(State.DOWN, NodeType.DISTRIBUTOR, 2); @@ -221,7 +220,6 @@ public class RpcServerTest extends FleetControllerTest { @Test void testGetNodeStateWithConfiguredRetired() throws Exception { - startingTest("RpcServerTest::testGetNodeStateWithConfiguredRetired"); List<ConfiguredNode> configuredNodes = new ArrayList<>(); for (int i = 0; i < 4; i++) configuredNodes.add(new ConfiguredNode(i, false)); @@ -230,8 +228,8 @@ public class RpcServerTest extends FleetControllerTest { .setMinRatioOfStorageNodesUp(0) .setMaxInitProgressTime(30000) .setStableStateTimePeriod(60000); - setUpFleetController(true, builder); - setUpVdsNodes(true, false, configuredNodes); + setUpFleetController(timer, builder); + setUpVdsNodes(timer, false, configuredNodes); waitForState("version:\\d+ distributor:5 storage:5 .4.s:r"); setWantedNodeState(State.DOWN, NodeType.DISTRIBUTOR, 2); @@ -255,8 +253,6 @@ public class RpcServerTest extends FleetControllerTest { @Test void testGetNodeStateWithConfigurationChangeToRetiredWhileNodeDown() throws Exception { - startingTest("RpcServerTest::testGetNodeStateWithConfigurationChangeToRetiredWhileNodeDown"); - { // Configuration: 5 nodes, all normal List<ConfiguredNode> configuredNodes = new ArrayList<>(); for (int i = 0; i < 5; i++) @@ -264,8 +260,8 @@ public class RpcServerTest extends FleetControllerTest { FleetControllerOptions.Builder builder = defaultOptions("mycluster", configuredNodes) .setMaxInitProgressTime(30000) .setStableStateTimePeriod(60000); - setUpFleetController(true, builder); - setUpVdsNodes(true, false, configuredNodes); + setUpFleetController(timer, builder); + setUpVdsNodes(timer, false, configuredNodes); waitForState("version:\\d+ distributor:5 storage:5"); } @@ -279,7 +275,7 @@ public class RpcServerTest extends FleetControllerTest { } { // Configuration change: Add 2 new nodes and retire the 5 existing ones - setUpVdsNodes(true, false, 2); + setUpVdsNodes(timer, false, 2); Set<ConfiguredNode> configuredNodes = new TreeSet<>(); for (int i = 0; i < 5; i++) configuredNodes.add(new ConfiguredNode(i, true)); @@ -336,8 +332,6 @@ public class RpcServerTest extends FleetControllerTest { @Test void testGetNodeStateWithConfigurationChangeToRetired() throws Exception { - startingTest("RpcServerTest::testGetNodeStateWithConfigurationChangeToRetired"); - { // Configuration: 5 nodes, all normal List<ConfiguredNode> configuredNodes = new ArrayList<>(); for (int i = 0; i < 5; i++) @@ -346,8 +340,8 @@ public class RpcServerTest extends FleetControllerTest { .setMaxInitProgressTime(30000) .setStableStateTimePeriod(60000); options = builder.build(); - setUpFleetController(true, builder); - setUpVdsNodes(true, false, configuredNodes); + setUpFleetController(timer, builder); + setUpVdsNodes(timer, false, configuredNodes); waitForState("version:\\d+ distributor:5 storage:5"); } @@ -364,7 +358,7 @@ public class RpcServerTest extends FleetControllerTest { } { // Configuration change: Add 2 new nodes and retire the 5 existing ones - setUpVdsNodes(true, false, 2); + setUpVdsNodes(timer, false, 2); Set<ConfiguredNode> configuredNodes = new TreeSet<>(); for (int i = 0; i < 5; i++) configuredNodes.add(new ConfiguredNode(i, true)); @@ -415,13 +409,12 @@ public class RpcServerTest extends FleetControllerTest { @Test void testSetNodeState() throws Exception { - startingTest("RpcServerTest::testSetNodeState"); Set<Integer> nodeIndexes = new TreeSet<>(List.of(4, 6, 9, 10, 14, 16, 21, 22, 23, 25)); Set<ConfiguredNode> configuredNodes = nodeIndexes.stream().map(i -> new ConfiguredNode(i, false)).collect(Collectors.toSet()); FleetControllerOptions.Builder options = defaultOptions("mycluster", configuredNodes); //options.setStorageDistribution(new Distribution(getDistConfig(nodeIndexes))); - setUpFleetController(true, options); - setUpVdsNodes(true, false, nodeIndexes); + setUpFleetController(timer, options); + setUpVdsNodes(timer, false, nodeIndexes); waitForState("version:\\d+ distributor:26 .0.s:d .1.s:d .2.s:d .3.s:d .5.s:d .7.s:d .8.s:d .11.s:d .12.s:d .13.s:d .15.s:d .17.s:d .18.s:d .19.s:d .20.s:d .24.s:d storage:26 .0.s:d .1.s:d .2.s:d .3.s:d .5.s:d .7.s:d .8.s:d .11.s:d .12.s:d .13.s:d .15.s:d .17.s:d .18.s:d .19.s:d .20.s:d .24.s:d"); int rpcPort = fleetController().getRpcPort(); @@ -455,11 +448,10 @@ public class RpcServerTest extends FleetControllerTest { @Test void testSetNodeStateOutOfRange() throws Exception { - startingTest("RpcServerTest::testSetNodeStateOutOfRange"); - FleetControllerOptions.Builder options = defaultOptions("mycluster"); + FleetControllerOptions.Builder options = defaultOptions(); options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(2, 10))); - setUpFleetController(true, options); - setUpVdsNodes(true); + setUpFleetController(timer, options); + setUpVdsNodes(timer); waitForStableSystem(); int rpcPort = fleetController().getRpcPort(); @@ -482,11 +474,10 @@ public class RpcServerTest extends FleetControllerTest { @Test void testGetMaster() throws Exception { - startingTest("RpcServerTest::testGetMaster"); - FleetControllerOptions.Builder options = defaultOptions("mycluster"); + FleetControllerOptions.Builder options = defaultOptions(); options.setStorageDistribution(new Distribution(Distribution.getDefaultDistributionConfig(2, 10))); - setUpFleetController(true, options); - setUpVdsNodes(true); + setUpFleetController(timer, options); + setUpVdsNodes(timer); waitForStableSystem(); int rpcPort = fleetController().getRpcPort(); @@ -503,10 +494,9 @@ public class RpcServerTest extends FleetControllerTest { @Test void testGetNodeList() throws Exception { - startingTest("RpcServerTest::testGetNodeList"); - setUpFleetController(true, defaultOptions("mycluster", 5)); + setUpFleetController(timer, defaultOptions(5)); final int nodeCount = 5; - setUpVdsNodes(true, false, nodeCount); + setUpVdsNodes(timer, false, nodeCount); waitForStableSystem(); assertTrue(nodes.get(0).isDistributor()); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java index 6d2a4fdeb24..65dd13ab8a6 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java @@ -15,14 +15,15 @@ public class SlobrokTest extends FleetControllerTest { private static final Logger log = Logger.getLogger(SlobrokTest.class.getName()); + private final FakeTimer timer = new FakeTimer(); + @Test void testSingleSlobrokRestart() throws Exception { - startingTest("SlobrokTest::testSingleSlobrokRestart"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster") + FleetControllerOptions.Builder builder = defaultOptions() .setNodeStateRequestTimeoutMS(60 * 60 * 1000) .setMaxSlobrokDisconnectGracePeriod(60 * 60 * 1000); - setUpFleetController(true, builder); - setUpVdsNodes(true); + setUpFleetController(timer, builder); + setUpVdsNodes(timer); waitForStableSystem(); FleetController fleetController = fleetController(); @@ -72,12 +73,11 @@ public class SlobrokTest extends FleetControllerTest { @Test void testNodeTooLongOutOfSlobrok() throws Exception { - startingTest("SlobrokTest::testNodeTooLongOutOfSlobrok"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster") + FleetControllerOptions.Builder builder = defaultOptions() .setMaxSlobrokDisconnectGracePeriod(60 * 1000) .setNodeStateRequestTimeoutMS(10000 * 60 * 1000); - setUpFleetController(true, builder); - setUpVdsNodes(true); + setUpFleetController(timer, builder); + setUpVdsNodes(timer); waitForStableSystem(); int version = fleetController().getSystemState().getVersion(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java index 7d2cc9b8df2..c0e116ef5fe 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java @@ -7,18 +7,15 @@ import com.yahoo.vdslib.state.Node; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; -import com.yahoo.vespa.clustercontroller.core.database.DatabaseHandler; -import com.yahoo.vespa.clustercontroller.core.database.ZooKeeperDatabaseFactory; import com.yahoo.vespa.clustercontroller.core.testutils.StateWaiter; -import com.yahoo.vespa.clustercontroller.utils.util.NoMetricReporter; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; + import java.time.Duration; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.logging.Logger; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.MatcherAssert.assertThat; @@ -29,10 +26,10 @@ import static org.junit.jupiter.api.Assertions.assertTrue; @ExtendWith(CleanupZookeeperLogsOnSuccess.class) public class StateChangeTest extends FleetControllerTest { - public static Logger log = Logger.getLogger(StateChangeTest.class.getName()); + private final FakeTimer timer = new FakeTimer(); + private FleetController ctrl; private DummyCommunicator communicator; - private EventLog eventLog; private void initialize(FleetControllerOptions options) throws Exception { List<Node> nodes = new ArrayList<>(); @@ -43,16 +40,8 @@ public class StateChangeTest extends FleetControllerTest { var context = new TestFleetControllerContext(options); communicator = new DummyCommunicator(nodes, timer); - var metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex(), options.clusterName()); - eventLog = new EventLog(timer, metricUpdater); - var cluster = new ContentCluster(options.clusterName(), options.nodes(), options.storageDistribution()); - var stateGatherer = new NodeStateGatherer(timer, timer, eventLog); - var database = new DatabaseHandler(context, new ZooKeeperDatabaseFactory(context), timer, options.zooKeeperServerAddress(), timer); - var stateGenerator = new StateChangeHandler(context, timer, eventLog); - var stateBroadcaster = new SystemStateBroadcaster(context, timer, timer); - var masterElectionHandler = new MasterElectionHandler(context, options.fleetControllerIndex(), options.fleetControllerCount(), timer, timer); - ctrl = new FleetController(context, timer, eventLog, cluster, stateGatherer, communicator, null, communicator, database, - stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options); + boolean start = false; + ctrl = createFleetController(timer, options, context, communicator, communicator, null, start); ctrl.tick(); if (options.fleetControllerCount() == 1) { @@ -71,20 +60,13 @@ public class StateChangeTest extends FleetControllerTest { private void verifyNodeEvents(Node n, String correct) { String actual = ""; - for (NodeEvent e : eventLog.getNodeEvents(n)) { + for (NodeEvent e : ctrl.getEventLog().getNodeEvents(n)) { actual += e.toString() + "\n"; } assertEquals(correct, actual); } - private static List<ConfiguredNode> createNodes(int count) { - List<ConfiguredNode> nodes = new ArrayList<>(); - for (int i = 0; i < count; i++) - nodes.add(new ConfiguredNode(i, false)); - return nodes; - } - @Test void testNormalStartup() throws Exception { FleetControllerOptions.Builder options = defaultOptions(); @@ -139,22 +121,26 @@ public class StateChangeTest extends FleetControllerTest { assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString()); verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0), - "Event: distributor.0: Now reporting state U\n" + - "Event: distributor.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: distributor.0: Now reporting state I, i 0.00\n" + - "Event: distributor.0: Altered node state in cluster state from 'U' to 'I, i 0.00'\n" + - "Event: distributor.0: Now reporting state U\n" + - "Event: distributor.0: Altered node state in cluster state from 'I, i 0.00' to 'U'\n"); + """ + Event: distributor.0: Now reporting state U + Event: distributor.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: distributor.0: Now reporting state I, i 0.00 + Event: distributor.0: Altered node state in cluster state from 'U' to 'I, i 0.00' + Event: distributor.0: Now reporting state U + Event: distributor.0: Altered node state in cluster state from 'I, i 0.00' to 'U' + """); verifyNodeEvents(new Node(NodeType.STORAGE, 0), - "Event: storage.0: Now reporting state U\n" + - "Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: storage.0: Now reporting state I, i 0.00 (ls)\n" + - "Event: storage.0: Altered node state in cluster state from 'U' to 'D'\n" + - "Event: storage.0: Now reporting state I, i 0.100 (read)\n" + - "Event: storage.0: Altered node state in cluster state from 'D' to 'I, i 0.100 (read)'\n" + - "Event: storage.0: Now reporting state U\n" + - "Event: storage.0: Altered node state in cluster state from 'I, i 0.100 (read)' to 'U'\n"); + """ + Event: storage.0: Now reporting state U + Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: storage.0: Now reporting state I, i 0.00 (ls) + Event: storage.0: Altered node state in cluster state from 'U' to 'D' + Event: storage.0: Now reporting state I, i 0.100 (read) + Event: storage.0: Altered node state in cluster state from 'D' to 'I, i 0.100 (read)' + Event: storage.0: Now reporting state U + Event: storage.0: Altered node state in cluster state from 'I, i 0.100 (read)' to 'U' + """); } @Test @@ -217,25 +203,29 @@ public class StateChangeTest extends FleetControllerTest { assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 0)).hasDescription()); verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0), - "Event: distributor.0: Now reporting state U\n" + - "Event: distributor.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: distributor.0: Failed to get node state: D: Closed at other end\n" + - "Event: distributor.0: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.\n" + - "Event: distributor.0: Altered node state in cluster state from 'U' to 'D: Closed at other end'\n" + - "Event: distributor.0: Now reporting state U, t 12345678\n" + - "Event: distributor.0: Altered node state in cluster state from 'D: Closed at other end' to 'U, t 12345678'\n" + - "Event: distributor.0: Altered node state in cluster state from 'U, t 12345678' to 'U'\n"); + """ + Event: distributor.0: Now reporting state U + Event: distributor.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: distributor.0: Failed to get node state: D: Closed at other end + Event: distributor.0: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1. + Event: distributor.0: Altered node state in cluster state from 'U' to 'D: Closed at other end' + Event: distributor.0: Now reporting state U, t 12345678 + Event: distributor.0: Altered node state in cluster state from 'D: Closed at other end' to 'U, t 12345678' + Event: distributor.0: Altered node state in cluster state from 'U, t 12345678' to 'U' + """); verifyNodeEvents(new Node(NodeType.STORAGE, 0), - "Event: storage.0: Now reporting state U\n" + - "Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: storage.0: Failed to get node state: D: Closed at other end\n" + - "Event: storage.0: Stopped or possibly crashed after 1000 ms, which is before stable state time period. Premature crash count is now 1.\n" + - "Event: storage.0: Altered node state in cluster state from 'U' to 'M: Closed at other end'\n" + - "Event: storage.0: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.\n" + - "Event: storage.0: Altered node state in cluster state from 'M: Closed at other end' to 'D: Closed at other end'\n" + - "Event: storage.0: Now reporting state U, t 12345679\n" + - "Event: storage.0: Altered node state in cluster state from 'D: Closed at other end' to 'U, t 12345679'\n"); + """ + Event: storage.0: Now reporting state U + Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: storage.0: Failed to get node state: D: Closed at other end + Event: storage.0: Stopped or possibly crashed after 1000 ms, which is before stable state time period. Premature crash count is now 1. + Event: storage.0: Altered node state in cluster state from 'U' to 'M: Closed at other end' + Event: storage.0: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down. + Event: storage.0: Altered node state in cluster state from 'M: Closed at other end' to 'D: Closed at other end' + Event: storage.0: Now reporting state U, t 12345679 + Event: storage.0: Altered node state in cluster state from 'D: Closed at other end' to 'U, t 12345679' + """); assertEquals(1, ctrl.getCluster().getNodeInfo(new Node(NodeType.DISTRIBUTOR, 0)).getPrematureCrashCount()); assertEquals(1, ctrl.getCluster().getNodeInfo(new Node(NodeType.STORAGE, 0)).getPrematureCrashCount()); @@ -299,22 +289,26 @@ public class StateChangeTest extends FleetControllerTest { assertEquals(0, ctrl.getCluster().getNodeInfo(new Node(NodeType.STORAGE, 0)).getPrematureCrashCount()); verifyNodeEvents(new Node(NodeType.DISTRIBUTOR, 0), - "Event: distributor.0: Now reporting state U\n" + - "Event: distributor.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: distributor.0: Failed to get node state: D: controlled shutdown\n" + - "Event: distributor.0: Altered node state in cluster state from 'U' to 'D: controlled shutdown'\n" + - "Event: distributor.0: Now reporting state U\n" + - "Event: distributor.0: Altered node state in cluster state from 'D: controlled shutdown' to 'U'\n"); + """ + Event: distributor.0: Now reporting state U + Event: distributor.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: distributor.0: Failed to get node state: D: controlled shutdown + Event: distributor.0: Altered node state in cluster state from 'U' to 'D: controlled shutdown' + Event: distributor.0: Now reporting state U + Event: distributor.0: Altered node state in cluster state from 'D: controlled shutdown' to 'U' + """); verifyNodeEvents(new Node(NodeType.STORAGE, 0), - "Event: storage.0: Now reporting state U\n" + - "Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: storage.0: Failed to get node state: D: controlled shutdown\n" + - "Event: storage.0: Altered node state in cluster state from 'U' to 'M: controlled shutdown'\n" + - "Event: storage.0: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.\n" + - "Event: storage.0: Altered node state in cluster state from 'M: controlled shutdown' to 'D: controlled shutdown'\n" + - "Event: storage.0: Now reporting state U\n" + - "Event: storage.0: Altered node state in cluster state from 'D: controlled shutdown' to 'U'\n"); + """ + Event: storage.0: Now reporting state U + Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: storage.0: Failed to get node state: D: controlled shutdown + Event: storage.0: Altered node state in cluster state from 'U' to 'M: controlled shutdown' + Event: storage.0: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down. + Event: storage.0: Altered node state in cluster state from 'M: controlled shutdown' to 'D: controlled shutdown' + Event: storage.0: Now reporting state U + Event: storage.0: Altered node state in cluster state from 'D: controlled shutdown' to 'U' + """); } @@ -354,9 +348,11 @@ public class StateChangeTest extends FleetControllerTest { assertEquals("version:3 distributor:10 storage:10", ctrl.getSystemState().toString()); verifyNodeEvents(new Node(NodeType.STORAGE, 0), - "Event: storage.0: Now reporting state U\n" + - "Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: storage.0: Node is no longer in slobrok, but we still have a pending state request.\n"); + """ + Event: storage.0: Now reporting state U + Event: storage.0: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: storage.0: Node is no longer in slobrok, but we still have a pending state request. + """); } @Test @@ -401,16 +397,18 @@ public class StateChangeTest extends FleetControllerTest { assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6)).hasDescription()); verifyNodeEvents(new Node(NodeType.STORAGE, 6), - "Event: storage.6: Now reporting state U\n" + - "Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" + - "Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.\n" + - "Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'\n" + - "Event: storage.6: Now reporting state I, i 0.00 (ls)\n" + - "Event: storage.6: Now reporting state I, i 0.600 (read)\n" + - "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'I, i 0.600 (read)'\n" + - "Event: storage.6: Now reporting state U\n" + - "Event: storage.6: Altered node state in cluster state from 'I, i 0.600 (read)' to 'U'\n"); + """ + Event: storage.6: Now reporting state U + Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: storage.6: Failed to get node state: D: Connection error: Closed at other end + Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1. + Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end' + Event: storage.6: Now reporting state I, i 0.00 (ls) + Event: storage.6: Now reporting state I, i 0.600 (read) + Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'I, i 0.600 (read)' + Event: storage.6: Now reporting state U + Event: storage.6: Altered node state in cluster state from 'I, i 0.600 (read)' to 'U' + """); } @Test @@ -461,15 +459,17 @@ public class StateChangeTest extends FleetControllerTest { assert(!ctrl.getReportedNodeState(new Node(NodeType.STORAGE, 6)).hasDescription()); verifyNodeEvents(new Node(NodeType.STORAGE, 6), - "Event: storage.6: Now reporting state U\n" + - "Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'R'\n" + - "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" + - "Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1.\n" + - "Event: storage.6: Altered node state in cluster state from 'R' to 'M: Connection error: Closed at other end'\n" + - "Event: storage.6: Now reporting state I, i 0.00 (ls)\n" + - "Event: storage.6: Now reporting state I, i 0.600 (read)\n" + - "Event: storage.6: Now reporting state U\n" + - "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'R'\n"); + """ + Event: storage.6: Now reporting state U + Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'R' + Event: storage.6: Failed to get node state: D: Connection error: Closed at other end + Event: storage.6: Stopped or possibly crashed after 0 ms, which is before stable state time period. Premature crash count is now 1. + Event: storage.6: Altered node state in cluster state from 'R' to 'M: Connection error: Closed at other end' + Event: storage.6: Now reporting state I, i 0.00 (ls) + Event: storage.6: Now reporting state I, i 0.600 (read) + Event: storage.6: Now reporting state U + Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'R' + """); } @Test @@ -549,17 +549,19 @@ public class StateChangeTest extends FleetControllerTest { assertEquals("version:7 distributor:10 storage:10", ctrl.getSystemState().toString()); verifyNodeEvents(new Node(NodeType.STORAGE, 6), - "Event: storage.6: Now reporting state U\n" + - "Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" + - "Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'\n" + - "Event: storage.6: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.\n" + - "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end'\n" + - "Event: storage.6: Now reporting state I, i 0.00100 (ls)\n" + - "Event: storage.6: Now reporting state I, i 0.100 (read)\n" + - "Event: storage.6: Altered node state in cluster state from 'D: Connection error: Closed at other end' to 'I, i 0.100 (read)'\n" + - "Event: storage.6: Now reporting state U\n" + - "Event: storage.6: Altered node state in cluster state from 'I, i 0.100 (read)' to 'U'\n"); + """ + Event: storage.6: Now reporting state U + Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: storage.6: Failed to get node state: D: Connection error: Closed at other end + Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end' + Event: storage.6: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down. + Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end' + Event: storage.6: Now reporting state I, i 0.00100 (ls) + Event: storage.6: Now reporting state I, i 0.100 (read) + Event: storage.6: Altered node state in cluster state from 'D: Connection error: Closed at other end' to 'I, i 0.100 (read)' + Event: storage.6: Now reporting state U + Event: storage.6: Altered node state in cluster state from 'I, i 0.100 (read)' to 'U' + """); } @Test @@ -630,21 +632,23 @@ public class StateChangeTest extends FleetControllerTest { assertEquals("version:8 distributor:10 storage:10", ctrl.getSystemState().toString()); verifyNodeEvents(new Node(NodeType.STORAGE, 6), - "Event: storage.6: Now reporting state U\n" + - "Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" + - "Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end'\n" + - "Event: storage.6: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down.\n" + - "Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end'\n" + - "Event: storage.6: Now reporting state I, i 0.100 (read)\n" + - "Event: storage.6: Altered node state in cluster state from 'D: Connection error: Closed at other end' to 'I, i 0.100 (read)'\n" + - "Event: storage.6: 5001 milliseconds without initialize progress. Marking node down. Premature crash count is now 1.\n" + - "Event: storage.6: Altered node state in cluster state from 'I, i 0.100 (read)' to 'D'\n" + - "Event: storage.6: Failed to get node state: D: Connection error: Closed at other end\n" + - "Event: storage.6: Now reporting state I, i 0.00 (ls)\n" + - "Event: storage.6: Now reporting state I, i 0.100 (read)\n" + - "Event: storage.6: Now reporting state U\n" + - "Event: storage.6: Altered node state in cluster state from 'D' to 'U'\n"); + """ + Event: storage.6: Now reporting state U + Event: storage.6: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: storage.6: Failed to get node state: D: Connection error: Closed at other end + Event: storage.6: Altered node state in cluster state from 'U' to 'M: Connection error: Closed at other end' + Event: storage.6: Exceeded implicit maintenance mode grace period of 5000 milliseconds. Marking node down. + Event: storage.6: Altered node state in cluster state from 'M: Connection error: Closed at other end' to 'D: Connection error: Closed at other end' + Event: storage.6: Now reporting state I, i 0.100 (read) + Event: storage.6: Altered node state in cluster state from 'D: Connection error: Closed at other end' to 'I, i 0.100 (read)' + Event: storage.6: 5001 milliseconds without initialize progress. Marking node down. Premature crash count is now 1. + Event: storage.6: Altered node state in cluster state from 'I, i 0.100 (read)' to 'D' + Event: storage.6: Failed to get node state: D: Connection error: Closed at other end + Event: storage.6: Now reporting state I, i 0.00 (ls) + Event: storage.6: Now reporting state I, i 0.100 (read) + Event: storage.6: Now reporting state U + Event: storage.6: Altered node state in cluster state from 'D' to 'U' + """); } @@ -752,7 +756,6 @@ public class StateChangeTest extends FleetControllerTest { @Test void testContinuousCrashRightAfterInit() throws Exception { - startingTest("StateChangeTest::testContinuousCrashRightAfterInit"); // If node does this too many times, take it out of service FleetControllerOptions.Builder builder = defaultOptions() .setMaxTransitionTime(NodeType.STORAGE, 5000) @@ -806,7 +809,6 @@ public class StateChangeTest extends FleetControllerTest { @Test void testClusterStateMinNodes() throws Exception { - startingTest("StateChangeTest::testClusterStateMinNodes"); // If node does this too many times, take it out of service FleetControllerOptions.Builder builder = defaultOptions() .setMaxTransitionTime(NodeType.STORAGE, 0) @@ -861,7 +863,6 @@ public class StateChangeTest extends FleetControllerTest { @Test void testClusterStateMinFactor() throws Exception { - startingTest("StateChangeTest::testClusterStateMinFactor"); // If node does this too many times, take it out of service FleetControllerOptions.Builder options = defaultOptions(); options.setMaxTransitionTime(NodeType.STORAGE, 0); @@ -935,19 +936,16 @@ public class StateChangeTest extends FleetControllerTest { @Test void testNoSystemStateBeforeInitialTimePeriod() throws Exception { - startingTest("StateChangeTest::testNoSystemStateBeforeInitialTimePeriod()"); FleetControllerOptions.Builder builder = defaultOptions() .setMinTimeBeforeFirstSystemStateBroadcast(3 * 60 * 1000); - setUpSystem(builder); - boolean useFakeTimer = true; - setUpVdsNodes(useFakeTimer, true); + setUpFleetController(timer, builder); + setUpVdsNodes(timer, true); // Leave one node down to avoid sending cluster state due to having seen all node states. for (int i = 0; i < nodes.size(); ++i) { if (i != 3) { nodes.get(i).connect(); } } - setUpFleetController(useFakeTimer, builder); StateWaiter waiter = new StateWaiter(timer); fleetController().addSystemStateListener(waiter); @@ -981,14 +979,10 @@ public class StateChangeTest extends FleetControllerTest { @Test void testSystemStateSentWhenNodesReplied() throws Exception { - startingTest("StateChangeTest::testSystemStateSentWhenNodesReplied()"); FleetControllerOptions.Builder builder = defaultOptions() .setMinTimeBeforeFirstSystemStateBroadcast(300 * 60 * 1000); - - boolean useFakeTimer = true; - setUpSystem(builder); - - setUpVdsNodes(useFakeTimer, true); + setUpFleetController(timer, builder); + setUpVdsNodes(timer, true); for (DummyVdsNode node : nodes) { node.connect(); @@ -996,8 +990,6 @@ public class StateChangeTest extends FleetControllerTest { // Marking one node as 'initializing' improves testing of state later on. nodes.get(3).setNodeState(State.INITIALIZING); - setUpFleetController(useFakeTimer, builder); - final StateWaiter waiter = new StateWaiter(timer); fleetController().addSystemStateListener(waiter); @@ -1019,10 +1011,9 @@ public class StateChangeTest extends FleetControllerTest { @Test void testDontTagFailingSetSystemStateOk() throws Exception { - startingTest("StateChangeTest::testDontTagFailingSetSystemStateOk()"); FleetControllerOptions.Builder options = defaultOptions(); - setUpFleetController(true, options); - setUpVdsNodes(true); + setUpFleetController(timer, options); + setUpVdsNodes(timer); waitForStableSystem(); StateWaiter waiter = new StateWaiter(timer); @@ -1050,7 +1041,6 @@ public class StateChangeTest extends FleetControllerTest { @Test void testAlteringDistributionSplitCount() throws Exception { - startingTest("StateChangeTest::testAlteringDistributionSplitCount"); FleetControllerOptions.Builder options = defaultOptions(); options.setDistributionBits(17); @@ -1097,10 +1087,9 @@ public class StateChangeTest extends FleetControllerTest { @Test void testSetAllTimestampsAfterDowntime() throws Exception { - startingTest("StateChangeTest::testSetAllTimestampsAfterDowntime"); FleetControllerOptions.Builder options = defaultOptions(); - setUpFleetController(true, options); - setUpVdsNodes(true); + setUpFleetController(timer, options); + setUpVdsNodes(timer); waitForStableSystem(); StateWaiter waiter = new StateWaiter(timer); @@ -1203,11 +1192,13 @@ public class StateChangeTest extends FleetControllerTest { } verifyNodeEvents(new Node(NodeType.STORAGE, 2), - "Event: storage.2: Now reporting state U\n" + - "Event: storage.2: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: storage.2: Failed to get node state: D: foo\n" + - "Event: storage.2: Stopped or possibly crashed after 500 ms, which is before stable state time period. Premature crash count is now 1.\n" + - "Event: storage.2: Altered node state in cluster state from 'U' to 'M: foo'\n"); + """ + Event: storage.2: Now reporting state U + Event: storage.2: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: storage.2: Failed to get node state: D: foo + Event: storage.2: Stopped or possibly crashed after 500 ms, which is before stable state time period. Premature crash count is now 1. + Event: storage.2: Altered node state in cluster state from 'U' to 'M: foo' + """); // Note: even though max transition time has passed, events are now emitted only on cluster state // publish edges. These are currently suppressed when the cluster state is down, as all cluster down // states are considered similar to other cluster down states. This is not necessarily optimal, but @@ -1247,11 +1238,13 @@ public class StateChangeTest extends FleetControllerTest { // We should only get "Altered min distribution bit count" event once, not 9 times. verifyNodeEvents(new Node(NodeType.STORAGE, 2), - "Event: storage.2: Now reporting state U\n" + - "Event: storage.2: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U'\n" + - "Event: storage.2: Now reporting state I, i 0.100 (read)\n" + - "Event: storage.2: Altered node state in cluster state from 'U' to 'I, i 0.100 (read)'\n" + - "Event: storage.2: Altered min distribution bit count from 16 to 17\n"); + """ + Event: storage.2: Now reporting state U + Event: storage.2: Altered node state in cluster state from 'D: Node not seen in slobrok.' to 'U' + Event: storage.2: Now reporting state I, i 0.100 (read) + Event: storage.2: Altered node state in cluster state from 'U' to 'I, i 0.100 (read)' + Event: storage.2: Altered min distribution bit count from 16 to 17 + """); } @@ -1386,10 +1379,6 @@ public class StateChangeTest extends FleetControllerTest { } } - private static FleetControllerOptions.Builder defaultOptions() { - return defaultOptions("mycluster", createNodes(10)); - } - private static FleetControllerOptions.Builder optionsWithZeroTransitionTime() { FleetControllerOptions.Builder options = defaultOptions(); options.setMaxTransitionTime(NodeType.STORAGE, 0); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java index 796204989b9..f6b676cf421 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java @@ -6,7 +6,6 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import java.time.Instant; import java.util.concurrent.TimeoutException; -import java.util.logging.Level; import java.util.logging.Logger; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -16,6 +15,8 @@ public class StateGatherTest extends FleetControllerTest { public static Logger log = Logger.getLogger(StateGatherTest.class.getName()); + private final FakeTimer timer = new FakeTimer(); + private String getGetNodeStateReplyCounts(DummyVdsNode node) { StringBuilder sb = new StringBuilder(); sb.append("timedout ").append(node.timedOutStateReplies) @@ -28,14 +29,13 @@ public class StateGatherTest extends FleetControllerTest { @Test void testAlwaysHavePendingGetNodeStateRequestTowardsNodes() throws Exception { - Logger.getLogger(NodeStateGatherer.class.getName()).setLevel(Level.FINEST); - startingTest("StateGatherTest::testOverlappingGetNodeStateRequests"); - FleetControllerOptions.Builder builder = defaultOptions("mycluster") + //Logger.getLogger(NodeStateGatherer.class.getName()).setLevel(Level.FINEST); + FleetControllerOptions.Builder builder = defaultOptions() .setNodeStateRequestTimeoutMS(10 * 60 * 1000) // Force actual message timeout to be lower than request timeout. .setNodeStateRequestTimeoutEarliestPercentage(80) .setNodeStateRequestTimeoutLatestPercentage(80); - setUpFleetController(true, builder); + setUpFleetController(timer, builder); String[] connectionSpecs = getSlobrokConnectionSpecs(slobrok); DummyVdsNode dnode = new DummyVdsNode(timer, connectionSpecs, builder.clusterName(), NodeType.DISTRIBUTOR, 0); DummyVdsNode snode = new DummyVdsNode(timer, connectionSpecs, builder.clusterName(), NodeType.STORAGE, 0); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java index 909f5a1f784..17ed6ca7a7b 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/WantedStateTest.java @@ -15,6 +15,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; public class WantedStateTest extends FleetControllerTest { private Supervisor supervisor; + private final Timer timer = new FakeTimer(); @BeforeEach public void setup() { @@ -28,9 +29,8 @@ public class WantedStateTest extends FleetControllerTest { @Test void testSettingStorageNodeMaintenanceAndBack() throws Exception { - startingTest("WantedStateTest::testSettingStorageNodeMaintenanceAndBack()"); - setUpFleetController(true, defaultOptions("mycluster")); - setUpVdsNodes(true); + setUpFleetController(timer, defaultOptions()); + setUpVdsNodes(timer); waitForStableSystem(); setWantedState(nodes.get(1), State.MAINTENANCE, null, supervisor); @@ -42,9 +42,8 @@ public class WantedStateTest extends FleetControllerTest { @Test void testOverridingWantedStateOtherReason() throws Exception { - startingTest("WantedStateTest::testOverridingWantedStateOtherReason()"); - setUpFleetController(true, defaultOptions("mycluster")); - setUpVdsNodes(true); + setUpFleetController(timer, defaultOptions()); + setUpVdsNodes(timer); waitForStableSystem(); setWantedState(nodes.get(1), State.MAINTENANCE, "Foo", supervisor); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java index f7403b45707..205d5b05b29 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/restapiv2/StateRestApiTest.java @@ -11,13 +11,14 @@ import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.AnnotatedClusterState; import com.yahoo.vespa.clustercontroller.core.ClusterStateBundle; import com.yahoo.vespa.clustercontroller.core.ContentCluster; -import com.yahoo.vespa.clustercontroller.core.FleetControllerTest; import com.yahoo.vespa.clustercontroller.core.NodeInfo; import com.yahoo.vespa.clustercontroller.core.RemoteClusterControllerTaskScheduler; import com.yahoo.vespa.clustercontroller.core.hostinfo.HostInfo; import com.yahoo.vespa.clustercontroller.utils.staterestapi.StateRestAPI; import com.yahoo.vespa.clustercontroller.utils.staterestapi.requests.UnitStateRequest; import com.yahoo.vespa.clustercontroller.utils.staterestapi.server.JsonWriter; + +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.LinkedHashMap; @@ -53,7 +54,7 @@ public abstract class StateRestApiTest { Distribution distribution = new Distribution(getSimpleGroupConfig(2, 10)); jsonWriter.setDefaultPathPrefix("/cluster/v2"); { - Set<ConfiguredNode> nodes = FleetControllerTest.toNodes(0, 1, 2, 3); + Set<ConfiguredNode> nodes = toNodes(0, 1, 2, 3); ContentCluster cluster = new ContentCluster("books", nodes, distribution); initializeCluster(cluster, nodes); AnnotatedClusterState baselineState = AnnotatedClusterState.withoutAnnotations(ClusterState.stateFromString("distributor:4 storage:4")); @@ -64,8 +65,8 @@ public abstract class StateRestApiTest { ClusterStateBundle.of(baselineState, bucketSpaceStates), 0, 0); } { - Set<ConfiguredNode> nodes = FleetControllerTest.toNodes(1, 2, 3, 5, 7); - Set<ConfiguredNode> nodesInSlobrok = FleetControllerTest.toNodes(1, 3, 5, 7); + Set<ConfiguredNode> nodes = toNodes(1, 2, 3, 5, 7); + Set<ConfiguredNode> nodesInSlobrok = toNodes(1, 3, 5, 7); ContentCluster cluster = new ContentCluster("music", nodes, distribution); if (dontInitializeNode2) { @@ -202,4 +203,10 @@ public abstract class StateRestApiTest { return sb.toString(); } + private static Set<ConfiguredNode> toNodes(Integer ... indexes) { + return Arrays.stream(indexes) + .map(i -> new ConfiguredNode(i, false)) + .collect(Collectors.toSet()); + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java index 4f79500e84d..7036f7d2c90 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java @@ -115,7 +115,7 @@ public interface Waiter { } Duration timeLeft = Duration.between(Instant.now(), endTime); if (timeLeft.isNegative() || timeLeft.isZero()) - throw new IllegalStateException("Timed out waiting max " + timeout + " ms for " + c + "\n with wait task " + wt + ",\n reason: " + reason); + throw new IllegalStateException("Timed out waiting max " + timeout + " for " + c + "\n with wait task " + wt + ",\n reason: " + reason); if (allowWait) data.getMonitor().wait(Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft.toMillis())); } catch (InterruptedException e) { |