diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2022-08-12 15:00:08 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-12 15:00:08 +0200 |
commit | 4b0f85b8cd3c9a2a498930eeee3205e92b435703 (patch) | |
tree | 765f505779a468bb92dd02bb62dade1aa129cda9 | |
parent | 910af64b854aa25689c4c8a2a610c219180f6f6b (diff) | |
parent | 35e3812e05be91f0b651498b47db4378280c9afa (diff) |
Merge pull request #23637 from vespa-engine/hmusum/cleanup-13
Cluster controller unit test cleanup, part 2 [run-systemtest]
12 files changed, 210 insertions, 246 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 8b335e877cd..4097810b633 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -28,6 +28,8 @@ import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServerInterface; import com.yahoo.vespa.clustercontroller.utils.util.MetricReporter; import java.io.FileNotFoundException; +import java.time.Duration; +import java.time.Instant; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; @@ -1156,16 +1158,18 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta public NodeListener getNodeStateUpdateListener() { return FleetController.this; } }; - public void waitForCompleteCycle(long timeoutMS) { - long endTime = System.currentTimeMillis() + timeoutMS; + public void waitForCompleteCycle(Duration timeout) { + Instant endTime = Instant.now().plus(timeout); synchronized (monitor) { // To wait at least one complete cycle, if a cycle is already running we need to wait for the next one beyond. long wantedCycle = cycleCount + (processingCycle ? 2 : 1); waitingForCycle = true; try{ while (cycleCount < wantedCycle) { - if (System.currentTimeMillis() > endTime) throw new IllegalStateException("Timed out waiting for cycle to complete. Not completed after " + timeoutMS + " ms."); - if ( !isRunning() ) throw new IllegalStateException("Fleetcontroller not running. Will never complete cycles"); + if (Instant.now().isAfter(endTime)) + throw new IllegalStateException("Timed out waiting for cycle to complete. Not completed after " + timeout); + if ( !isRunning() ) + throw new IllegalStateException("Fleetcontroller not running. Will never complete cycles"); try{ monitor.wait(100); } catch (InterruptedException e) {} } } finally { @@ -1179,8 +1183,8 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta * But it is only used in unit tests that should not trigger any thread issues. Don't want to add locks that reduce * live performance to remove a non-problem. */ - public void waitForNodesHavingSystemStateVersionEqualToOrAbove(int version, int nodeCount, int timeout) throws InterruptedException { - long maxTime = System.currentTimeMillis() + timeout; + public void waitForNodesHavingSystemStateVersionEqualToOrAbove(int version, int nodeCount, Duration timeout) throws InterruptedException { + Instant endTime = Instant.now().plus(timeout); synchronized (monitor) { while (true) { int ackedNodes = 0; @@ -1193,17 +1197,16 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta context.log(logger, Level.INFO, ackedNodes + " nodes now have acked system state " + version + " or higher."); return; } - long remainingTime = maxTime - System.currentTimeMillis(); - if (remainingTime <= 0) { - throw new IllegalStateException("Did not get " + nodeCount + " nodes to system state " + version + " within timeout of " + timeout + " milliseconds."); + if (Instant.now().isAfter(endTime)) { + throw new IllegalStateException("Did not get " + nodeCount + " nodes to system state " + version + " within timeout of " + timeout); } monitor.wait(10); } } } - public void waitForNodesInSlobrok(int distNodeCount, int storNodeCount, int timeoutMillis) throws InterruptedException { - long maxTime = System.currentTimeMillis() + timeoutMillis; + public void waitForNodesInSlobrok(int distNodeCount, int storNodeCount, Duration timeout) throws InterruptedException { + Instant endTime = Instant.now().plus(timeout); synchronized (monitor) { while (true) { int distCount = 0, storCount = 0; @@ -1215,10 +1218,9 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta } if (distCount == distNodeCount && storCount == storNodeCount) return; - long remainingTime = maxTime - System.currentTimeMillis(); - if (remainingTime <= 0) { + if (Instant.now().isAfter(endTime)) { throw new IllegalStateException("Did not get all " + distNodeCount + " distributors and " + storNodeCount - + " storage nodes registered in slobrok within timeout of " + timeoutMillis + " ms. (Got " + + " storage nodes registered in slobrok within timeout of " + timeout + ". (Got " + distCount + " distributors and " + storCount + " storage nodes)"); } monitor.wait(10); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java index dc297e7a549..42bcfaac477 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java @@ -60,7 +60,7 @@ public class DatabaseTest extends FleetControllerTest { log.info("CHECK THAT WANTED STATES PERSIST FLEETCONTROLLER RESTART"); stopFleetController(); - startFleetController(); + startFleetController(false); waitForState("version:\\d+ distributor:10 .2.s:d storage:10 .3.s:m .7.s:r"); assertWantedStates(wantedStates); @@ -124,7 +124,7 @@ public class DatabaseTest extends FleetControllerTest { stopFleetController(); for (int i = 6; i < nodes.size(); ++i) nodes.get(i).disconnect(); - startFleetController(); + startFleetController(false); waitForState("version:\\d+ distributor:3 storage:7 .1.s:m .3.s:d .4.s:d .5.s:d .6.s:m"); @@ -154,7 +154,7 @@ public class DatabaseTest extends FleetControllerTest { Request req = new Request("setNodeState"); req.parameters().add(new StringValue("storage/cluster.mycluster/" + n.getType().toString() + "/" + n.getIndex())); req.parameters().add(new StringValue(ns.serialize(true))); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeoutInSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("s"), req.toString()); wantedStates.put(n, ns); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java index af067cc394f..fd24966e26a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java @@ -21,6 +21,8 @@ import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator; import com.yahoo.vespa.clustercontroller.core.rpc.RPCUtil; +import java.time.Duration; +import java.time.Instant; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -194,11 +196,11 @@ public class DummyVdsNode { public int getStateCommunicationVersion() { return stateCommunicationVersion; } - void waitForSystemStateVersion(int version) { + void waitForSystemStateVersion(int version, Duration timeout) { try { - long startTime = System.currentTimeMillis(); + Instant endTime = Instant.now().plus(timeout); while (getLatestSystemStateVersion().orElse(-1) < version) { - if ( (System.currentTimeMillis() - startTime) > (long) FleetControllerTest.timeoutMS) + if (Instant.now().isAfter(endTime)) throw new RuntimeException("Timed out waiting for state version " + version + " in " + this); Thread.sleep(10); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index 5e4b62e0462..41ae2b37411 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -22,7 +22,6 @@ import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator; import com.yahoo.vespa.clustercontroller.core.rpc.RpcServer; import com.yahoo.vespa.clustercontroller.core.rpc.SlobrokClient; import com.yahoo.vespa.clustercontroller.core.status.StatusHandler; -import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServerInterface; import com.yahoo.vespa.clustercontroller.core.testutils.WaitCondition; import com.yahoo.vespa.clustercontroller.core.testutils.WaitTask; import com.yahoo.vespa.clustercontroller.core.testutils.Waiter; @@ -31,14 +30,13 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.api.extension.TestWatcher; - +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -59,9 +57,10 @@ public abstract class FleetControllerTest implements Waiter { private static final Logger log = Logger.getLogger(FleetControllerTest.class.getName()); private static final int DEFAULT_NODE_COUNT = 10; - Supervisor supervisor; + private final Duration timeout = Duration.ofSeconds(30); protected final FakeTimer timer = new FakeTimer(); - boolean usingFakeTimer = false; + + Supervisor supervisor; protected Slobrok slobrok; protected FleetControllerOptions options; ZooKeeperTestServer zooKeeperServer; @@ -69,8 +68,6 @@ public abstract class FleetControllerTest implements Waiter { protected List<DummyVdsNode> nodes = new ArrayList<>(); private String testName; - final static int timeoutS; - final static int timeoutMS; private final Waiter waiter = new Waiter.Impl(new DataRetriever() { @Override public Object getMonitor() { return timer; } @@ -79,13 +76,11 @@ public abstract class FleetControllerTest implements Waiter { @Override public List<DummyVdsNode> getDummyNodes() { return nodes; } @Override - public int getTimeoutMS() { return timeoutMS; } + public Duration getTimeout() { return timeout; } }); static { LogSetup.initVespaLogging("fleetcontroller"); - timeoutS = 30; - timeoutMS = timeoutS * 1000; } static class BackOff implements BackOffPolicy { @@ -142,7 +137,7 @@ public abstract class FleetControllerTest implements Waiter { return opts; } - void setUpSystem(boolean useFakeTimer, FleetControllerOptions options) throws Exception { + void setUpSystem(FleetControllerOptions options) throws Exception { log.log(Level.FINE, "Setting up system"); slobrok = new Slobrok(); this.options = options; @@ -151,21 +146,15 @@ public abstract class FleetControllerTest implements Waiter { this.options.zooKeeperServerAddress = zooKeeperServer.getAddress(); log.log(Level.FINE, "Set up new zookeeper server at " + this.options.zooKeeperServerAddress); } - this.options.slobrokConnectionSpecs = new String[1]; - this.options.slobrokConnectionSpecs[0] = "tcp/localhost:" + slobrok.port(); - this.usingFakeTimer = useFakeTimer; + this.options.slobrokConnectionSpecs = getSlobrokConnectionSpecs(slobrok); } - FleetController createFleetController(boolean useFakeTimer, FleetControllerOptions options, boolean startThread, StatusPageServerInterface status) throws Exception { - Objects.requireNonNull(status, "status server cannot be null"); + FleetController createFleetController(boolean useFakeTimer, FleetControllerOptions options) throws Exception { var context = new TestFleetControllerContext(options); Timer timer = useFakeTimer ? this.timer : new RealTimer(); var metricUpdater = new MetricUpdater(new NoMetricReporter(), options.fleetControllerIndex, options.clusterName); var log = new EventLog(timer, metricUpdater); - var cluster = new ContentCluster( - options.clusterName, - options.nodes, - options.storageDistribution); + var cluster = new ContentCluster(options.clusterName, options.nodes, options.storageDistribution); var stateGatherer = new NodeStateGatherer(timer, timer, log); var communicator = new RPCCommunicator( RPCCommunicator.createRealSupervisor(), @@ -188,24 +177,18 @@ public abstract class FleetControllerTest implements Waiter { var stateGenerator = new StateChangeHandler(context, timer, log); var stateBroadcaster = new SystemStateBroadcaster(context, timer, timer); var masterElectionHandler = new MasterElectionHandler(context, options.fleetControllerIndex, options.fleetControllerCount, timer, timer); - var controller = new FleetController(context, timer, log, cluster, stateGatherer, communicator, status, rpcServer, lookUp, database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options); - if (startThread) { - controller.start(); - } + + var status = new StatusHandler.ContainerStatusPageServer(); + var controller = new FleetController(context, timer, log, cluster, stateGatherer, communicator, status, rpcServer, lookUp, + database, stateGenerator, stateBroadcaster, masterElectionHandler, metricUpdater, options); + controller.start(); return controller; } protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options) throws Exception { - setUpFleetController(useFakeTimer, options, true); - } - - protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options, boolean startThread) throws Exception { - setUpFleetController(useFakeTimer, options, startThread, new StatusHandler.ContainerStatusPageServer()); - } - protected void setUpFleetController(boolean useFakeTimer, FleetControllerOptions options, boolean startThread, StatusPageServerInterface status) throws Exception { - if (slobrok == null) setUpSystem(useFakeTimer, options); + if (slobrok == null) setUpSystem(options); if (fleetController == null) { - fleetController = createFleetController(useFakeTimer, options, startThread, status); + fleetController = createFleetController(useFakeTimer, options); } else { throw new Exception("called setUpFleetcontroller but it was already setup"); } @@ -218,9 +201,9 @@ public abstract class FleetControllerTest implements Waiter { } } - void startFleetController() throws Exception { + void startFleetController(boolean useFakeTimer) throws Exception { if (fleetController == null) { - fleetController = createFleetController(usingFakeTimer, options, true, new StatusHandler.ContainerStatusPageServer()); + fleetController = createFleetController(useFakeTimer, options); } else { log.log(Level.WARNING, "already started fleetcontroller, not starting another"); } @@ -239,8 +222,7 @@ public abstract class FleetControllerTest implements Waiter { setUpVdsNodes(useFakeTimer, options, startDisconnected, nodeIndexes); } protected void setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected, Set<Integer> nodeIndexes) throws Exception { - String[] connectionSpecs = new String[1]; - connectionSpecs[0] = "tcp/localhost:" + slobrok.port(); + String[] connectionSpecs = getSlobrokConnectionSpecs(slobrok); for (int nodeIndex : nodeIndexes) { nodes.add(new DummyVdsNode(useFakeTimer ? timer : new RealTimer(), options, connectionSpecs, this.options.clusterName, true, nodeIndex)); if ( ! startDisconnected) nodes.get(nodes.size() - 1).connect(); @@ -256,8 +238,7 @@ public abstract class FleetControllerTest implements Waiter { * the returned list is twice as large as configuredNodes. */ protected List<DummyVdsNode> setUpVdsNodes(boolean useFakeTimer, DummyVdsNodeOptions options, boolean startDisconnected, List<ConfiguredNode> configuredNodes) throws Exception { - String[] connectionSpecs = new String[1]; - connectionSpecs[0] = "tcp/localhost:" + slobrok.port(); + String[] connectionSpecs = getSlobrokConnectionSpecs(slobrok); nodes = new ArrayList<>(); final boolean distributor = true; for (ConfiguredNode configuredNode : configuredNodes) { @@ -296,7 +277,7 @@ public abstract class FleetControllerTest implements Waiter { .collect(Collectors.toList()); } @Override - public int getTimeoutMS() { return timeoutMS; } + public Duration getTimeout() { return timeout; } }); subsetWaiter.waitForState(expectedState); } @@ -341,16 +322,16 @@ public abstract class FleetControllerTest implements Waiter { public ClusterState waitForState(String state) throws Exception { return waiter.waitForState(state); } public ClusterState waitForStateInAllSpaces(String state) throws Exception { return waiter.waitForStateInAllSpaces(state); } public ClusterState waitForStateInSpace(String space, String state) throws Exception { return waiter.waitForStateInSpace(space, state); } - public ClusterState waitForState(String state, int timeoutMS) throws Exception { return waiter.waitForState(state, timeoutMS); } + public ClusterState waitForState(String state, Duration timeout) throws Exception { return waiter.waitForState(state, timeout); } public ClusterState waitForInitProgressPassed(Node n, double progress) { return waiter.waitForInitProgressPassed(n, progress); } public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount) { return waiter.waitForClusterStateIncludingNodesWithMinUsedBits(bitcount, nodecount); } - public void wait(WaitCondition c, WaitTask wt, int timeoutMS) { - waiter.wait(c, wt, timeoutMS); + public void wait(WaitCondition condition, WaitTask task, Duration timeout) { + waiter.wait(condition, task, timeout); } void waitForCompleteCycle() { - fleetController.waitForCompleteCycle(timeoutMS); + fleetController.waitForCompleteCycle(timeout); } private static class ExpectLine { @@ -479,7 +460,7 @@ public abstract class FleetControllerTest implements Waiter { Request req = new Request("setNodeState"); req.parameters().add(new StringValue(node.getSlobrokName())); req.parameters().add(new StringValue(ns.serialize())); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeoutInSeconds()); if (req.isError()) { fail("Failed to invoke setNodeState(): " + req.errorCode() + ": " + req.errorMessage()); } @@ -488,4 +469,14 @@ public abstract class FleetControllerTest implements Waiter { } } + static String[] getSlobrokConnectionSpecs(Slobrok slobrok) { + String[] connectionSpecs = new String[1]; + connectionSpecs[0] = "tcp/localhost:" + slobrok.port(); + return connectionSpecs; + } + + Duration timeout() { return timeout; } + + double timeoutInSeconds() { return (double) timeout.toMillis() / 1000; } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java index fb468ee4d5b..05a06a92e27 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java @@ -11,12 +11,11 @@ import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.NodeState; import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; -import com.yahoo.vespa.clustercontroller.core.status.StatusHandler; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.extension.ExtendWith; - +import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeoutException; @@ -44,46 +43,42 @@ public class MasterElectionTest extends FleetControllerTest { zooKeeperServer = new ZooKeeperTestServer(); } slobrok = new Slobrok(); - usingFakeTimer = useFakeTimer; this.options = options; this.options.zooKeeperSessionTimeout = defaultZkSessionTimeoutInMillis(); this.options.zooKeeperServerAddress = zooKeeperServer.getAddress(); - this.options.slobrokConnectionSpecs = new String[1]; - this.options.slobrokConnectionSpecs[0] = "tcp/localhost:" + slobrok.port(); + this.options.slobrokConnectionSpecs = getSlobrokConnectionSpecs(slobrok); this.options.fleetControllerCount = count; for (int i=0; i<count; ++i) { FleetControllerOptions nodeOptions = options.clone(); nodeOptions.fleetControllerIndex = i; - fleetControllers.add(createFleetController(usingFakeTimer, nodeOptions, true, new StatusHandler.ContainerStatusPageServer())); + fleetControllers.add(createFleetController(useFakeTimer, nodeOptions)); } } - private FleetControllerOptions adjustConfig(FleetControllerOptions o, - int fleetControllerIndex, int fleetControllerCount) { + private FleetControllerOptions adjustConfig(FleetControllerOptions o, int fleetControllerIndex, int fleetControllerCount) { FleetControllerOptions options = o.clone(); options.zooKeeperSessionTimeout = defaultZkSessionTimeoutInMillis(); options.zooKeeperServerAddress = zooKeeperServer.getAddress(); - options.slobrokConnectionSpecs = new String[1]; - options.slobrokConnectionSpecs[0] = "tcp/localhost:" + slobrok.port(); // Spec.fromLocalHostName(slobrok.port()).toString(); + options.slobrokConnectionSpecs = getSlobrokConnectionSpecs(slobrok); options.fleetControllerIndex = fleetControllerIndex; options.fleetControllerCount = fleetControllerCount; return options; } private void waitForZookeeperDisconnected() throws TimeoutException { - long maxTime = System.currentTimeMillis() + timeoutMS; + Instant maxTime = Instant.now().plus(timeout()); for (FleetController f : fleetControllers) { while (f.hasZookeeperConnection()) { try { Thread.sleep(1); } catch (InterruptedException e) { /* ignore */ } - if (System.currentTimeMillis() > maxTime) - throw new TimeoutException("Failed to notice zookeeper down within timeout of " + timeoutMS + " ms"); + if (Instant.now().isAfter(maxTime)) + throw new TimeoutException("Failed to notice zookeeper down within timeout of " + timeout()); } } waitForCompleteCycles(); } private void waitForCompleteCycle(int findex) { - fleetControllers.get(findex).waitForCompleteCycle(timeoutMS); + fleetControllers.get(findex).waitForCompleteCycle(timeout()); } private void waitForCompleteCycles() { @@ -117,7 +112,8 @@ public class MasterElectionTest extends FleetControllerTest { log.log(Level.INFO, "STARTING TEST: MasterElectionTest::testMasterElection()"); FleetControllerOptions options = defaultOptions("mycluster"); options.masterZooKeeperCooldownPeriod = 100; - setUpFleetController(5, false, options); + boolean usingFakeTimer = false; + setUpFleetController(5, usingFakeTimer, options); waitForMaster(0); log.log(Level.INFO, "SHUTTING DOWN FLEET CONTROLLER 0"); fleetControllers.get(0).shutdown(); @@ -134,15 +130,14 @@ public class MasterElectionTest extends FleetControllerTest { assertFalse(fleetControllers.get(i).isMaster(), "Fleet controller " + i); } - StatusHandler.ContainerStatusPageServer statusPageServer = new StatusHandler.ContainerStatusPageServer(); log.log(Level.INFO, "STARTING FLEET CONTROLLER 2"); - fleetControllers.set(2, createFleetController(usingFakeTimer, fleetControllers.get(2).getOptions(), true, statusPageServer)); + fleetControllers.set(2, createFleetController(usingFakeTimer, fleetControllers.get(2).getOptions())); waitForMaster(2); log.log(Level.INFO, "STARTING FLEET CONTROLLER 0"); - fleetControllers.set(0, createFleetController(usingFakeTimer, fleetControllers.get(0).getOptions(), true, statusPageServer)); + fleetControllers.set(0, createFleetController(usingFakeTimer, fleetControllers.get(0).getOptions())); waitForMaster(0); log.log(Level.INFO, "STARTING FLEET CONTROLLER 1"); - fleetControllers.set(1, createFleetController(usingFakeTimer, fleetControllers.get(1).getOptions(), true, statusPageServer)); + fleetControllers.set(1, createFleetController(usingFakeTimer, fleetControllers.get(1).getOptions())); waitForMaster(0); log.log(Level.INFO, "SHUTTING DOWN FLEET CONTROLLER 4"); @@ -164,7 +159,7 @@ public class MasterElectionTest extends FleetControllerTest { private void waitForMaster(int master) { log.log(Level.INFO, "Entering waitForMaster"); boolean isOnlyMaster = false; - for (int i = 0; i < FleetControllerTest.timeoutMS; i += 100) { + for (int i = 0; i < timeout().toMillis(); i += 100) { if (!fleetControllers.get(master).isMaster()) { log.log(Level.INFO, "Node " + master + " is not master yet, sleeping more"); timer.advanceTime(100); @@ -183,7 +178,7 @@ public class MasterElectionTest extends FleetControllerTest { break; } } - // Have to wait to get zookeeper communication chance to happen. + // Have to wait to get zookeeper communication chance to happen. try { Thread.sleep(100); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } @@ -312,13 +307,13 @@ public class MasterElectionTest extends FleetControllerTest { waitForMaster(1); } - private void waitForMasterReason(String reason, Integer master, List<Target> connections, int nodes[]) { - long endTime = System.currentTimeMillis() + timeoutMS; - while (System.currentTimeMillis() < endTime) { + private void waitForMasterReason(String reason, Integer master, List<Target> connections, int[] nodes) { + Instant endTime = Instant.now().plus(timeout()); + while (Instant.now().isBefore(endTime)) { boolean allOk = true; for (int node : nodes) { Request req = new Request("getMaster"); - connections.get(node).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(node).invokeSync(req, timeoutInSeconds()); if (req.isError()) { allOk = false; break; @@ -335,8 +330,7 @@ public class MasterElectionTest extends FleetControllerTest { if (allOk) return; try{ Thread.sleep(100); } catch (InterruptedException e) { /* ignore */ } } - throw new IllegalStateException("Did not get master reason '" + reason - + "' within timeout of " + timeoutMS + " ms"); + throw new IllegalStateException("Did not get master reason '" + reason + "' within timeout of " + timeout()); } @Test @@ -362,9 +356,9 @@ public class MasterElectionTest extends FleetControllerTest { Request req = new Request("getMaster"); for (int nodeIndex = 0; nodeIndex < 3; ++nodeIndex) { - for (int retry = 0; retry < FleetControllerTest.timeoutS * 10; ++retry) { + for (int retry = 0; retry < timeoutInSeconds() * 10; ++retry) { req = new Request("getMaster"); - connections.get(nodeIndex).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(nodeIndex).invokeSync(req, timeoutInSeconds()); assertFalse(req.isError(), req.errorMessage()); if (req.returnValues().get(0).asInt32() == 0 && req.returnValues().get(1).asString().equals("All 3 nodes agree that 0 is current master.")) { @@ -395,13 +389,13 @@ public class MasterElectionTest extends FleetControllerTest { waitForMaster(1); req = new Request("getMaster"); - connections.get(0).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(0).invokeSync(req, timeoutInSeconds()); assertEquals(104, req.errorCode(), req.toString()); assertEquals("Connection error", req.errorMessage(), req.toString()); - for (int i = 0; i < FleetControllerTest.timeoutS * 10; ++i) { + for (int i = 0; i < timeoutInSeconds() * 10; ++i) { req = new Request("getMaster"); - connections.get(1).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(1).invokeSync(req, timeoutInSeconds()); assertFalse(req.isError(), req.errorMessage()); if (req.returnValues().get(0).asInt32() != -1) break; // We may have bad timing causing node not to have realized it is master yet @@ -409,9 +403,9 @@ public class MasterElectionTest extends FleetControllerTest { assertEquals(1, req.returnValues().get(0).asInt32(), req.toString()); assertEquals("2 of 3 nodes agree 1 is master.", req.returnValues().get(1).asString(), req.toString()); - for (int i = 0; i < FleetControllerTest.timeoutS * 10; ++i) { + for (int i = 0; i < timeoutInSeconds() * 10; ++i) { req = new Request("getMaster"); - connections.get(2).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(2).invokeSync(req, timeoutInSeconds()); assertFalse(req.isError(), req.errorMessage()); if (req.returnValues().get(0).asInt32() != -1) break; } @@ -497,7 +491,8 @@ public class MasterElectionTest extends FleetControllerTest { options.clusterHasGlobalDocumentTypes = true; options.masterZooKeeperCooldownPeriod = 1; options.minTimeBeforeFirstSystemStateBroadcast = 100000; - setUpFleetController(3, false, options); + boolean useFakeTimer = false; + setUpFleetController(3, useFakeTimer, options); setUpVdsNodes(false, new DummyVdsNodeOptions()); fleetController = fleetControllers.get(0); // Required to prevent waitForStableSystem from NPE'ing waitForMaster(0); @@ -523,7 +518,7 @@ public class MasterElectionTest extends FleetControllerTest { waitForMaster(1); waitForCompleteCycle(1); - fleetControllers.set(0, createFleetController(usingFakeTimer, fleetControllers.get(0).getOptions(), true, new StatusHandler.ContainerStatusPageServer())); + fleetControllers.set(0, createFleetController(useFakeTimer, fleetControllers.get(0).getOptions())); waitForMaster(0); waitForCompleteCycle(0); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java index 41d7465b602..594a681f4b5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java @@ -52,8 +52,7 @@ public class RpcServerTest extends FleetControllerTest { void testRebinding() throws Exception { startingTest("RpcServerTest::testRebinding"); Slobrok slobrok = new Slobrok(); - String[] slobrokConnectionSpecs = new String[1]; - slobrokConnectionSpecs[0] = "tcp/localhost:" + slobrok.port(); + String[] slobrokConnectionSpecs = getSlobrokConnectionSpecs(slobrok); RpcServer server = new RpcServer(timer, new Object(), "mycluster", 0, new BackOff()); server.setSlobrokConnectionSpecs(slobrokConnectionSpecs, 18347); int portUsed = server.getPort(); @@ -101,7 +100,7 @@ public class RpcServerTest extends FleetControllerTest { log.log(Level.INFO, "Disconnecting distributor 0. Waiting for state to reflect change."); nodes.get(0).disconnect(); nodes.get(19).disconnect(); - fleetController.waitForNodesInSlobrok(9, 9, timeoutMS); + fleetController.waitForNodesInSlobrok(9, 9, timeout()); timer.advanceTime(options.nodeStateRequestTimeoutMS + options.maxSlobrokDisconnectGracePeriod); wait(new WaitCondition.StateWait(fleetController, fleetController.getMonitor()) { @@ -120,7 +119,7 @@ public class RpcServerTest extends FleetControllerTest { } return null; } - }, null, timeoutMS); + }, null, timeout()); int rpcPort = fleetController.getRpcPort(); supervisor = new Supervisor(new Transport()); @@ -128,7 +127,7 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(connection.isValid()); Request req = new Request("getSystemState"); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeoutInSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ss"), req.toString()); String systemState = req.returnValues().get(1).asString(); @@ -148,10 +147,7 @@ public class RpcServerTest extends FleetControllerTest { Node node = new Node(nodeType, nodeIndex); NodeState newNodeState = new NodeState(nodeType, newState); - Request req = new Request("setNodeState"); - req.parameters().add(new StringValue("storage/cluster.mycluster/" + node.getType().toString() + "/" + node.getIndex())); - req.parameters().add(new StringValue(newNodeState.serialize(true))); - connection.invokeSync(req, timeoutS); + Request req = setNodeState("storage/cluster.mycluster/" + node.getType().toString() + "/" + node.getIndex(), newNodeState, connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("s"), req.toString()); } @@ -193,10 +189,7 @@ public class RpcServerTest extends FleetControllerTest { Target connection = supervisor.connect(new Spec("localhost", rpcPort)); assertTrue(connection.isValid()); - Request req = new Request("getNodeState"); - req.parameters().add(new StringValue("distributor")); - req.parameters().add(new Int32Value(0)); - connection.invokeSync(req, timeoutS); + Request req = getNodeState("distributor", 0, connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(0).asString()).getState()); @@ -204,48 +197,33 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(reported.getState().oneOf("d-"), req.returnValues().get(1).asString()); assertEquals("", req.returnValues().get(2).asString()); - req = new Request("getNodeState"); - req.parameters().add(new StringValue("distributor")); - req.parameters().add(new Int32Value(2)); - connection.invokeSync(req, timeoutS); + req = getNodeState("distributor",2, connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(0).asString()).getState()); assertEquals("t:946080000", req.returnValues().get(1).asString()); assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(2).asString()).getState()); - req = new Request("getNodeState"); - req.parameters().add(new StringValue("distributor")); - req.parameters().add(new Int32Value(4)); - connection.invokeSync(req, timeoutS); + req = getNodeState("distributor", 4, connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals("", req.returnValues().get(0).asString()); assertEquals("t:946080000", req.returnValues().get(1).asString()); assertEquals("", req.returnValues().get(2).asString()); - req = new Request("getNodeState"); - req.parameters().add(new StringValue("distributor")); - req.parameters().add(new Int32Value(15)); - connection.invokeSync(req, timeoutS); + req = getNodeState("distributor", 15, connection); assertEquals(ErrorCode.METHOD_FAILED, req.errorCode(), req.toString()); assertEquals("No node distributor.15 exists in cluster mycluster", req.errorMessage()); assertFalse(req.checkReturnTypes("ssss"), req.toString()); - req = new Request("getNodeState"); - req.parameters().add(new StringValue("storage")); - req.parameters().add(new Int32Value(1)); - connection.invokeSync(req, timeoutS); + req = getNodeState("storage", 1, connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals("s:i i:0.2", req.returnValues().get(0).asString()); assertEquals("s:i i:0.2", req.returnValues().get(1).asString()); assertEquals("", req.returnValues().get(2).asString()); - req = new Request("getNodeState"); - req.parameters().add(new StringValue("storage")); - req.parameters().add(new Int32Value(2)); - connection.invokeSync(req, timeoutS); + req = getNodeState("storage", 2, connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals(State.DOWN, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(0).asString()).getState()); @@ -253,20 +231,14 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(reported.getState().oneOf("d-"), req.returnValues().get(1).asString()); assertEquals(State.RETIRED, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(2).asString()).getState()); - req = new Request("getNodeState"); - req.parameters().add(new StringValue("storage")); - req.parameters().add(new Int32Value(5)); - connection.invokeSync(req, timeoutS); + req = getNodeState("storage", 5, connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals("", req.returnValues().get(0).asString()); assertEquals("t:946080000", req.returnValues().get(1).asString()); assertEquals("", req.returnValues().get(2).asString()); - req = new Request("getNodeState"); - req.parameters().add(new StringValue("storage")); - req.parameters().add(new Int32Value(7)); - connection.invokeSync(req, timeoutS); + req = getNodeState("storage", 7, connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals(State.MAINTENANCE, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(0).asString()).getState()); @@ -483,19 +455,13 @@ public class RpcServerTest extends FleetControllerTest { Target connection = supervisor.connect(new Spec("localhost", rpcPort)); assertTrue(connection.isValid()); - Request req = new Request("setNodeState"); - req.parameters().add(new StringValue("storage/cluster.mycluster/storage/14")); - req.parameters().add(new StringValue("s:r")); - connection.invokeSync(req, timeoutS); + Request req = setNodeState("storage/cluster.mycluster/storage/14", "s:r", connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("s"), req.toString()); waitForState("version:\\d+ distributor:26 .* storage:26 .* .14.s:r .*"); - req = new Request("setNodeState"); - req.parameters().add(new StringValue("storage/cluster.mycluster/storage/16")); - req.parameters().add(new StringValue("s:m")); - connection.invokeSync(req, timeoutS); + req = setNodeState("storage/cluster.mycluster/storage/16", "s:m", connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("s"), req.toString()); @@ -528,24 +494,15 @@ public class RpcServerTest extends FleetControllerTest { Target connection = supervisor.connect(new Spec("localhost", rpcPort)); assertTrue(connection.isValid()); - Request req = new Request("setNodeState"); - req.parameters().add(new StringValue("storage/cluster.mycluster/storage/10")); - req.parameters().add(new StringValue("s:m")); - connection.invokeSync(req, timeoutS); + Request req = setNodeState("storage/cluster.mycluster/storage/10", "s:m", connection); assertEquals(ErrorCode.METHOD_FAILED, req.errorCode(), req.toString()); assertEquals("Cannot set wanted state of node storage.10. Index does not correspond to a configured node.", req.errorMessage(), req.toString()); - req = new Request("setNodeState"); - req.parameters().add(new StringValue("storage/cluster.mycluster/distributor/10")); - req.parameters().add(new StringValue("s:m")); - connection.invokeSync(req, timeoutS); + req = setNodeState("storage/cluster.mycluster/distributor/10", "s:m", connection); assertEquals(ErrorCode.METHOD_FAILED, req.errorCode(), req.toString()); assertEquals("Cannot set wanted state of node distributor.10. Index does not correspond to a configured node.", req.errorMessage(), req.toString()); - req = new Request("setNodeState"); - req.parameters().add(new StringValue("storage/cluster.mycluster/storage/9")); - req.parameters().add(new StringValue("s:m")); - connection.invokeSync(req, timeoutS); + req = setNodeState("storage/cluster.mycluster/storage/9", "s:m", connection); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); waitForState("version:\\d+ distributor:10 storage:10 .9.s:m"); @@ -566,7 +523,7 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(connection.isValid()); Request req = new Request("getMaster"); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeoutInSeconds()); assertEquals(0, req.returnValues().get(0).asInt32(), req.toString()); assertEquals("All 1 nodes agree that 0 is current master.", req.returnValues().get(1).asString(), req.toString()); @@ -590,36 +547,52 @@ public class RpcServerTest extends FleetControllerTest { Target connection = supervisor.connect(new Spec("localhost", rpcPort)); assertTrue(connection.isValid()); - // Possibly do request multiple times if we haven't lost slobrok contact first times yet. - for (int j = 0; j <= nodeCount; ++j) { - Request req = new Request("getNodeList"); - connection.invokeSync(req, timeoutS); - assertEquals(ErrorCode.NONE, req.errorCode(), req.errorMessage()); - assertTrue(req.checkReturnTypes("SS"), req.toString()); - String[] slobrok = req.returnValues().get(0).asStringArray().clone(); - String[] rpc = req.returnValues().get(1).asStringArray().clone(); - - assertEquals(2 * nodeCount, slobrok.length); - assertEquals(2 * nodeCount, rpc.length); - - // Verify that we can connect to all addresses returned. - for (int i = 0; i < 2 * nodeCount; ++i) { - if (slobrok[i].equals("storage/cluster.mycluster/distributor/0")) { - if (i < nodeCount && !"".equals(rpc[i])) { - continue; - } - assertEquals("", rpc[i], slobrok[i]); + Request req = new Request("getNodeList"); + connection.invokeSync(req, timeoutInSeconds()); + assertEquals(ErrorCode.NONE, req.errorCode(), req.errorMessage()); + assertTrue(req.checkReturnTypes("SS"), req.toString()); + String[] slobrok = req.returnValues().get(0).asStringArray().clone(); + String[] rpc = req.returnValues().get(1).asStringArray().clone(); + + assertEquals(2 * nodeCount, slobrok.length); + assertEquals(2 * nodeCount, rpc.length); + + // Verify that we can connect to all addresses returned. + for (int i = 0; i < 2 * nodeCount; ++i) { + if (slobrok[i].equals("storage/cluster.mycluster/distributor/0")) { + if (i < nodeCount && !"".equals(rpc[i])) { continue; } - assertNotEquals("", rpc[i]); - Request req2 = new Request("getnodestate2"); - req2.parameters().add(new StringValue("unknown")); - Target connection2 = supervisor.connect(new Spec(rpc[i])); - connection2.invokeSync(req2, timeoutS); - assertEquals(ErrorCode.NONE, req.errorCode(), req2.toString()); + assertEquals("", rpc[i], slobrok[i]); + continue; } - break; + assertNotEquals("", rpc[i]); + Request req2 = new Request("getnodestate2"); + req2.parameters().add(new StringValue("unknown")); + Target connection2 = supervisor.connect(new Spec(rpc[i])); + connection2.invokeSync(req2, timeoutInSeconds()); + assertEquals(ErrorCode.NONE, req.errorCode(), req2.toString()); } } + private Request setNodeState(String node, NodeState newNodeState, Target connection) { + return setNodeState(node, newNodeState.serialize(true), connection); + } + + private Request setNodeState(String node, String newNodeState, Target connection) { + Request req = new Request("setNodeState"); + req.parameters().add(new StringValue(node)); + req.parameters().add(new StringValue(newNodeState)); + connection.invokeSync(req, timeoutInSeconds()); + return req; + } + + private Request getNodeState(String nodeType, int nodeIndex, Target connection) { + Request req = new Request("getNodeState"); + req.parameters().add(new StringValue(nodeType)); + req.parameters().add(new Int32Value(nodeIndex)); + connection.invokeSync(req, timeoutInSeconds()); + return req; + } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java index b59c90f4955..78576f9600c 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java @@ -2,14 +2,13 @@ package com.yahoo.vespa.clustercontroller.core; import com.yahoo.jrt.slobrok.server.Slobrok; -import java.util.logging.Level; import org.junit.jupiter.api.Test; +import java.util.logging.Level; +import java.util.logging.Logger; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.logging.Logger; - public class SlobrokTest extends FleetControllerTest { private static final Logger log = Logger.getLogger(SlobrokTest.class.getName()); @@ -50,10 +49,10 @@ public class SlobrokTest extends FleetControllerTest { } //fleetController.setFreshSlobrokMirror(); waitForCompleteCycle(); - fleetController.waitForNodesInSlobrok(10, 10, timeoutMS); + fleetController.waitForNodesInSlobrok(10, 10, timeout()); log.log(Level.INFO, "Waiting for cluster to be up and available again"); - for (int i = 0; i < timeoutMS; i += 10) { + for (int i = 0; i < timeout().toMillis(); i += 10) { if (clusterAvailable()) break; timer.advanceTime(1000); waitForCompleteCycle(); @@ -81,7 +80,7 @@ public class SlobrokTest extends FleetControllerTest { int version = fleetController.getSystemState().getVersion(); nodes.get(0).disconnectSlobrok(); log.log(Level.INFO, "DISCONNECTED NODE FROM SLOBROK. SHOULD BE IN COOLDOWN PERIOD"); - fleetController.waitForNodesInSlobrok(9, 10, timeoutMS); + fleetController.waitForNodesInSlobrok(9, 10, timeout()); synchronized(timer) { nodes.get(0).sendGetNodeStateReply(0); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java index 24e65a89d2b..6ad0430b5c3 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java @@ -951,22 +951,23 @@ public class StateChangeTest extends FleetControllerTest { startingTest("StateChangeTest::testNoSystemStateBeforeInitialTimePeriod()"); FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.minTimeBeforeFirstSystemStateBroadcast = 3 * 60 * 1000; - setUpSystem(true, options); - setUpVdsNodes(true, new DummyVdsNodeOptions(), true); + setUpSystem(options); + boolean useFakeTimer = true; + setUpVdsNodes(useFakeTimer, new DummyVdsNodeOptions(), true); // Leave one node down to avoid sending cluster state due to having seen all node states. for (int i = 0; i < nodes.size(); ++i) { if (i != 3) { nodes.get(i).connect(); } } - setUpFleetController(true, options); + setUpFleetController(useFakeTimer, options); StateWaiter waiter = new StateWaiter(timer); fleetController.addSystemStateListener(waiter); // Ensure all nodes have been seen by fleetcontroller and that it has had enough time to possibly have sent a cluster state // Note: this is a candidate state and therefore NOT versioned yet - waiter.waitForState("^distributor:10 (\\.\\d+\\.t:\\d+ )*storage:10 (\\.\\d+\\.t:\\d+ )*.1.s:d( \\.\\d+\\.t:\\d+)*", timeoutMS); + waiter.waitForState("^distributor:10 (\\.\\d+\\.t:\\d+ )*storage:10 (\\.\\d+\\.t:\\d+ )*.1.s:d( \\.\\d+\\.t:\\d+)*", timeout()); waitForCompleteCycle(); new StateMessageChecker(nodes) { @Override @@ -977,10 +978,10 @@ public class StateChangeTest extends FleetControllerTest { // Pass time and see that the nodes get state timer.advanceTime(3 * 60 * 1000); - waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:d", timeoutMS); + waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:d", timeout()); int version = waiter.getCurrentSystemState().getVersion(); - fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 19, timeoutMS); + fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 19, timeout()); new StateMessageChecker(nodes) { @Override @@ -997,9 +998,10 @@ public class StateChangeTest extends FleetControllerTest { final FleetControllerOptions options = defaultOptions("mycluster", createNodes(10)); options.minTimeBeforeFirstSystemStateBroadcast = 300 * 60 * 1000; - setUpSystem(true, options); + boolean useFakeTimer = true; + setUpSystem(options); - setUpVdsNodes(true, new DummyVdsNodeOptions(), true); + setUpVdsNodes(useFakeTimer, new DummyVdsNodeOptions(), true); for (DummyVdsNode node : nodes) { node.connect(); @@ -1007,16 +1009,16 @@ public class StateChangeTest extends FleetControllerTest { // Marking one node as 'initializing' improves testing of state later on. nodes.get(3).setNodeState(State.INITIALIZING); - setUpFleetController(true, options); + setUpFleetController(useFakeTimer, options); final StateWaiter waiter = new StateWaiter(timer); fleetController.addSystemStateListener(waiter); - waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:i .1.i:1.0", timeoutMS); + waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:i .1.i:1.0", timeout()); waitForCompleteCycle(); final int version = waiter.getCurrentSystemState().getVersion(); - fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 20, timeoutMS); + fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 20, timeout()); // The last two versions of the cluster state should be seen (all nodes up, // zero out timestate) @@ -1042,10 +1044,10 @@ public class StateChangeTest extends FleetControllerTest { nodes.get(1).failSetSystemState(true); int versionBeforeChange = nodes.get(1).getSystemStatesReceived().get(0).getVersion(); nodes.get(2).disconnect(); // cause a new state - waiter.waitForState("version:\\d+ distributor:10 .1.s:d storage:10", timeoutMS); + waiter.waitForState("version:\\d+ distributor:10 .1.s:d storage:10", timeout()); int versionAfterChange = waiter.getCurrentSystemState().getVersion(); assertTrue(versionAfterChange > versionBeforeChange); - fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(versionAfterChange, 18, timeoutMS); + fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(versionAfterChange, 18, timeout()); // Assert that the failed node has not acknowledged the latest version. // (The version may still be larger than versionBeforeChange if the fleet controller sends a @@ -1120,7 +1122,7 @@ public class StateChangeTest extends FleetControllerTest { // Simulate netsplit. Take node down without node booting assertTrue(nodes.get(0).isDistributor()); nodes.get(0).disconnectImmediately(); - waiter.waitForState("version:\\d+ distributor:10 .0.s:d storage:10", timeoutMS); + waiter.waitForState("version:\\d+ distributor:10 .0.s:d storage:10", timeout()); // Add node back. nodes.get(0).connect(); @@ -1128,7 +1130,7 @@ public class StateChangeTest extends FleetControllerTest { // At this time, node taken down should have cluster states with all starting timestamps set. Others node should not. for (DummyVdsNode node : nodes) { - node.waitForSystemStateVersion(waiter.getCurrentSystemState().getVersion()); + node.waitForSystemStateVersion(waiter.getCurrentSystemState().getVersion(), timeout()); List<ClusterState> states = node.getSystemStatesReceived(); ClusterState lastState = states.get(0); StringBuilder stateHistory = new StringBuilder(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java index 892aefbb865..656d5a187d1 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java @@ -1,10 +1,10 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core; -import java.util.logging.Level; import org.junit.jupiter.api.Test; - +import java.time.Instant; import java.util.concurrent.TimeoutException; +import java.util.logging.Level; import java.util.logging.Logger; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -33,8 +33,7 @@ public class StateGatherTest extends FleetControllerTest { options.nodeStateRequestTimeoutEarliestPercentage = 80; options.nodeStateRequestTimeoutLatestPercentage = 80; setUpFleetController(true, options); - String[] connectionSpecs = new String[1]; - connectionSpecs[0] = "tcp/localhost:" + slobrok.port(); + String[] connectionSpecs = getSlobrokConnectionSpecs(slobrok); DummyVdsNodeOptions dummyOptions = new DummyVdsNodeOptions(); DummyVdsNode dnode = new DummyVdsNode(timer, dummyOptions, connectionSpecs, this.options.clusterName, true, 0); DummyVdsNode snode = new DummyVdsNode(timer, dummyOptions, connectionSpecs, this.options.clusterName, false, 0); @@ -58,11 +57,11 @@ public class StateGatherTest extends FleetControllerTest { } private void waitUntilTimedOutGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException { - long timeout = System.currentTimeMillis() + timeoutMS; + Instant endTime = Instant.now().plus(timeout()); synchronized (timer) { while (dnode.timedOutStateReplies != 1 || snode.timedOutStateReplies != 1) { - if (System.currentTimeMillis() > timeout) { - throw new TimeoutException("Did not get to have one timed out within timeout of " + timeoutMS + " ms" + if (Instant.now().isAfter(endTime)) { + throw new TimeoutException("Did not get to have one timed out within timeout of " + timeout() + ", " + getGetNodeStateReplyCounts(dnode) + ", " + getGetNodeStateReplyCounts(snode)); } try{ timer.wait(1); } catch (InterruptedException e) { /* ignore */ } @@ -71,9 +70,9 @@ public class StateGatherTest extends FleetControllerTest { } private void waitUntilPendingGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException { - long timeout = System.currentTimeMillis() + timeoutMS; + Instant endTime = Instant.now().plus(timeout()); while (dnode.getPendingNodeStateCount() != 1 || snode.getPendingNodeStateCount() != 1) { - if (System.currentTimeMillis() > timeout) throw new TimeoutException("Did not get to have one pending within timeout of " + timeoutMS + " ms"); + if (Instant.now().isAfter(endTime)) throw new TimeoutException("Did not get to have one pending within timeout of " + timeout()); try{ Thread.sleep(1); } catch (InterruptedException e) { /* ignore */ } } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java index 4cfab796a4e..d131c330381 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java @@ -5,7 +5,7 @@ import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vespa.clustercontroller.core.ClusterStateBundle; import com.yahoo.vespa.clustercontroller.core.FakeTimer; import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener; - +import java.time.Duration; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -57,8 +57,8 @@ public class StateWaiter implements SystemStateListener { } } - public void waitForState(String stateRegex, long timeout) { - waitForState(stateRegex, timeout, 0); + public void waitForState(String stateRegex, Duration timeout) { + waitForState(stateRegex, timeout.toMillis(), 0); } /** diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java index d6c43f54259..d6d3e2876a5 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/WaitCondition.java @@ -156,9 +156,10 @@ public interface WaitCondition { @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("RegexStateMatcher(\n wanted: '").append(pattern.pattern()) - .append("'\n last checked: '").append(lastCheckedState).append("'") - .append("'\n current: '").append(currentState).append(")"); + sb.append("RegexStateMatcher:") + .append("\n wanted: '").append(pattern.pattern()).append("'") + .append("\n last checked: '").append(lastCheckedState).append("'") + .append("\n current: '").append(currentState).append("'"); return sb.toString(); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java index 6336de3d1a3..7960cd7c9d2 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java @@ -1,16 +1,17 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core.testutils; -import java.util.logging.Level; import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.Node; import com.yahoo.vespa.clustercontroller.core.DummyVdsNode; import com.yahoo.vespa.clustercontroller.core.FleetController; - +import java.time.Duration; +import java.time.Instant; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Set; +import java.util.logging.Level; import java.util.logging.Logger; public interface Waiter { @@ -19,18 +20,18 @@ public interface Waiter { Object getMonitor(); FleetController getFleetController(); List<DummyVdsNode> getDummyNodes(); - int getTimeoutMS(); + Duration getTimeout(); } ClusterState waitForState(String state) throws Exception; ClusterState waitForStateInSpace(String space, String state) throws Exception; ClusterState waitForStateInAllSpaces(String state) throws Exception; - ClusterState waitForState(String state, int timeoutMS) throws Exception; + ClusterState waitForState(String state, Duration timeoutMS) throws Exception; ClusterState waitForStableSystem() throws Exception; ClusterState waitForStableSystem(int nodeCount) throws Exception; ClusterState waitForInitProgressPassed(Node n, double progress); ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount); - void wait(WaitCondition c, WaitTask wt, int timeoutMS); + void wait(WaitCondition c, WaitTask wt, Duration timeout); class Impl implements Waiter { @@ -42,7 +43,7 @@ public interface Waiter { } // TODO refactor - private ClusterState waitForState(String state, int timeoutMS, boolean checkAllSpaces, Set<String> checkSpaces) { + private ClusterState waitForState(String state, Duration timeoutMS, boolean checkAllSpaces, Set<String> checkSpaces) { LinkedList<DummyVdsNode> nodesToCheck = new LinkedList<>(); for(DummyVdsNode node : data.getDummyNodes()) { if (node.isConnected()) nodesToCheck.add(node); @@ -57,15 +58,15 @@ public interface Waiter { } public ClusterState waitForState(String state) { - return waitForState(state, data.getTimeoutMS()); + return waitForState(state, data.getTimeout()); } public ClusterState waitForStateInAllSpaces(String state) { - return waitForState(state, data.getTimeoutMS(), true, Collections.emptySet()); + return waitForState(state, data.getTimeout(), true, Collections.emptySet()); } public ClusterState waitForStateInSpace(String space, String state) { - return waitForState(state, data.getTimeoutMS(), false, Collections.singleton(space)); + return waitForState(state, data.getTimeout(), false, Collections.singleton(space)); } - public ClusterState waitForState(String state, int timeoutMS) { + public ClusterState waitForState(String state, Duration timeoutMS) { return waitForState(state, timeoutMS, false, Collections.emptySet()); } public ClusterState waitForStableSystem() { @@ -73,24 +74,23 @@ public interface Waiter { } public ClusterState waitForStableSystem(int nodeCount) { WaitCondition.StateWait swc = new WaitCondition.RegexStateMatcher("version:\\d+ distributor:"+nodeCount+" storage:"+nodeCount, data.getFleetController(), data.getMonitor()).includeNotifyingNodes(data.getDummyNodes()); - wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS()); + wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeout()); return swc.getCurrentState(); } public ClusterState waitForInitProgressPassed(Node n, double progress) { WaitCondition.StateWait swc = new WaitCondition.InitProgressPassedMatcher(n, progress, data.getFleetController(), data.getMonitor()); - wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS()); + wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeout()); return swc.getCurrentState(); } public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount) { WaitCondition.StateWait swc = new WaitCondition.MinUsedBitsMatcher(bitcount, nodecount, data.getFleetController(), data.getMonitor()); - wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS()); + wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeout()); return swc.getCurrentState(); } - public final void wait(WaitCondition c, WaitTask wt, int timeoutMS) { + public final void wait(WaitCondition c, WaitTask wt, Duration timeout) { log.log(Level.INFO, "Waiting for " + c + (wt == null ? "" : " with wait task " + wt)); - final long startTime = System.currentTimeMillis(); - final long endTime = startTime + timeoutMS; + Instant endTime = Instant.now().plus(timeout); String lastReason = null; while (true) { synchronized (data.getMonitor()) { @@ -111,11 +111,11 @@ public interface Waiter { allowWait = false; } } - final long timeLeft = endTime - System.currentTimeMillis(); - if (timeLeft <= 0) { - throw new IllegalStateException("Timed out waiting max " + timeoutMS + " ms for " + c + (wt == null ? "" : "\n with wait task " + wt) + ",\n reason: " + reason); - } - if (allowWait) data.getMonitor().wait(wt == null ? WaitTask.defaultTaskFrequencyMillis : Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft)); + Duration timeLeft = Duration.between(Instant.now(), endTime); + if (timeLeft.isNegative()) + throw new IllegalStateException("Timed out waiting max " + timeout + " ms for " + c + (wt == null ? "" : "\n with wait task " + wt) + ",\n reason: " + reason); + if (allowWait) + data.getMonitor().wait(wt == null ? WaitTask.defaultTaskFrequencyMillis : Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft.toMillis())); } catch (InterruptedException e) { } } |