From 35b6c171e787f3fa12b5bcbd84cd746ded29a41c Mon Sep 17 00:00:00 2001 From: Harald Musum Date: Fri, 12 Aug 2022 12:17:28 +0200 Subject: Make sure to get timeout in seconds as a double --- .../clustercontroller/core/FleetController.java | 6 ++--- .../vespa/clustercontroller/core/DatabaseTest.java | 2 +- .../core/FleetControllerTest.java | 9 +++++-- .../clustercontroller/core/MasterElectionTest.java | 28 +++++++++++----------- .../clustercontroller/core/RpcServerTest.java | 16 ++++++------- .../vespa/clustercontroller/core/SlobrokTest.java | 6 ++--- .../clustercontroller/core/StateChangeTest.java | 18 +++++++------- .../clustercontroller/core/StateGatherTest.java | 8 +++---- 8 files changed, 48 insertions(+), 45 deletions(-) diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 2c4faaf47e6..4097810b633 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -1197,8 +1197,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta context.log(logger, Level.INFO, ackedNodes + " nodes now have acked system state " + version + " or higher."); return; } - Duration remainingTime = Duration.between(Instant.now(), endTime); - if (remainingTime.isNegative()) { + if (Instant.now().isAfter(endTime)) { throw new IllegalStateException("Did not get " + nodeCount + " nodes to system state " + version + " within timeout of " + timeout); } monitor.wait(10); @@ -1219,8 +1218,7 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta } if (distCount == distNodeCount && storCount == storNodeCount) return; - Duration remainingTime = Duration.between(Instant.now(), endTime); - if (remainingTime.isNegative()) { + if (Instant.now().isAfter(endTime)) { throw new IllegalStateException("Did not get all " + distNodeCount + " distributors and " + storNodeCount + " storage nodes registered in slobrok within timeout of " + timeout + ". (Got " + distCount + " distributors and " + storCount + " storage nodes)"); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java index b74835080d9..42bcfaac477 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java @@ -154,7 +154,7 @@ public class DatabaseTest extends FleetControllerTest { Request req = new Request("setNodeState"); req.parameters().add(new StringValue("storage/cluster.mycluster/" + n.getType().toString() + "/" + n.getIndex())); req.parameters().add(new StringValue(ns.serialize(true))); - connection.invokeSync(req, timeout.getSeconds()); + connection.invokeSync(req, timeoutInSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("s"), req.toString()); wantedStates.put(n, ns); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index fab0c790f76..9877a76a6f2 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -57,9 +57,10 @@ public abstract class FleetControllerTest implements Waiter { private static final Logger log = Logger.getLogger(FleetControllerTest.class.getName()); private static final int DEFAULT_NODE_COUNT = 10; - final Duration timeout = Duration.ofSeconds(30); - Supervisor supervisor; + private final Duration timeout = Duration.ofSeconds(30); protected final FakeTimer timer = new FakeTimer(); + + Supervisor supervisor; protected Slobrok slobrok; protected FleetControllerOptions options; ZooKeeperTestServer zooKeeperServer; @@ -474,4 +475,8 @@ public abstract class FleetControllerTest implements Waiter { return connectionSpecs; } + Duration timeout() { return timeout; } + + double timeoutInSeconds() { return (double) timeout.toMillis() / 1000; } + } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java index bd022e2675c..36884892895 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java @@ -66,19 +66,19 @@ public class MasterElectionTest extends FleetControllerTest { } private void waitForZookeeperDisconnected() throws TimeoutException { - Instant maxTime = Instant.now().plus(timeout); + Instant maxTime = Instant.now().plus(timeout()); for (FleetController f : fleetControllers) { while (f.hasZookeeperConnection()) { try { Thread.sleep(1); } catch (InterruptedException e) { /* ignore */ } if (Instant.now().isAfter(maxTime)) - throw new TimeoutException("Failed to notice zookeeper down within timeout of " + timeout); + throw new TimeoutException("Failed to notice zookeeper down within timeout of " + timeout()); } } waitForCompleteCycles(); } private void waitForCompleteCycle(int findex) { - fleetControllers.get(findex).waitForCompleteCycle(timeout); + fleetControllers.get(findex).waitForCompleteCycle(timeout()); } private void waitForCompleteCycles() { @@ -159,7 +159,7 @@ public class MasterElectionTest extends FleetControllerTest { private void waitForMaster(int master) { log.log(Level.INFO, "Entering waitForMaster"); boolean isOnlyMaster = false; - for (int i = 0; i < timeout.toMillis(); i += 100) { + for (int i = 0; i < timeout().toMillis(); i += 100) { if (!fleetControllers.get(master).isMaster()) { log.log(Level.INFO, "Node " + master + " is not master yet, sleeping more"); timer.advanceTime(100); @@ -308,12 +308,12 @@ public class MasterElectionTest extends FleetControllerTest { } private void waitForMasterReason(String reason, Integer master, List connections, int[] nodes) { - Instant endTime = Instant.now().plus(timeout); + Instant endTime = Instant.now().plus(timeout()); while (Instant.now().isBefore(endTime)) { boolean allOk = true; for (int node : nodes) { Request req = new Request("getMaster"); - connections.get(node).invokeSync(req, timeout.getSeconds()); + connections.get(node).invokeSync(req, timeout().getSeconds()); if (req.isError()) { allOk = false; break; @@ -330,7 +330,7 @@ public class MasterElectionTest extends FleetControllerTest { if (allOk) return; try{ Thread.sleep(100); } catch (InterruptedException e) { /* ignore */ } } - throw new IllegalStateException("Did not get master reason '" + reason + "' within timeout of " + timeout); + throw new IllegalStateException("Did not get master reason '" + reason + "' within timeout of " + timeout()); } @Test @@ -356,9 +356,9 @@ public class MasterElectionTest extends FleetControllerTest { Request req = new Request("getMaster"); for (int nodeIndex = 0; nodeIndex < 3; ++nodeIndex) { - for (int retry = 0; retry < timeout.getSeconds() * 10; ++retry) { + for (int retry = 0; retry < timeout().getSeconds() * 10; ++retry) { req = new Request("getMaster"); - connections.get(nodeIndex).invokeSync(req, timeout.getSeconds()); + connections.get(nodeIndex).invokeSync(req, timeoutInSeconds()); assertFalse(req.isError(), req.errorMessage()); if (req.returnValues().get(0).asInt32() == 0 && req.returnValues().get(1).asString().equals("All 3 nodes agree that 0 is current master.")) { @@ -389,13 +389,13 @@ public class MasterElectionTest extends FleetControllerTest { waitForMaster(1); req = new Request("getMaster"); - connections.get(0).invokeSync(req, timeout.getSeconds()); + connections.get(0).invokeSync(req, timeoutInSeconds()); assertEquals(104, req.errorCode(), req.toString()); assertEquals("Connection error", req.errorMessage(), req.toString()); - for (int i = 0; i < timeout.getSeconds() * 10; ++i) { + for (int i = 0; i < timeout().getSeconds() * 10; ++i) { req = new Request("getMaster"); - connections.get(1).invokeSync(req, timeout.getSeconds()); + connections.get(1).invokeSync(req, timeoutInSeconds()); assertFalse(req.isError(), req.errorMessage()); if (req.returnValues().get(0).asInt32() != -1) break; // We may have bad timing causing node not to have realized it is master yet @@ -403,9 +403,9 @@ public class MasterElectionTest extends FleetControllerTest { assertEquals(1, req.returnValues().get(0).asInt32(), req.toString()); assertEquals("2 of 3 nodes agree 1 is master.", req.returnValues().get(1).asString(), req.toString()); - for (int i = 0; i < timeout.getSeconds() * 10; ++i) { + for (int i = 0; i < timeout().getSeconds() * 10; ++i) { req = new Request("getMaster"); - connections.get(2).invokeSync(req, timeout.getSeconds()); + connections.get(2).invokeSync(req, timeoutInSeconds()); assertFalse(req.isError(), req.errorMessage()); if (req.returnValues().get(0).asInt32() != -1) break; } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java index 8c01c0d2671..fd56f0d0143 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java @@ -100,7 +100,7 @@ public class RpcServerTest extends FleetControllerTest { log.log(Level.INFO, "Disconnecting distributor 0. Waiting for state to reflect change."); nodes.get(0).disconnect(); nodes.get(19).disconnect(); - fleetController.waitForNodesInSlobrok(9, 9, timeout); + fleetController.waitForNodesInSlobrok(9, 9, timeout()); timer.advanceTime(options.nodeStateRequestTimeoutMS + options.maxSlobrokDisconnectGracePeriod); wait(new WaitCondition.StateWait(fleetController, fleetController.getMonitor()) { @@ -119,7 +119,7 @@ public class RpcServerTest extends FleetControllerTest { } return null; } - }, null, timeout); + }, null, timeout()); int rpcPort = fleetController.getRpcPort(); supervisor = new Supervisor(new Transport()); @@ -127,7 +127,7 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(connection.isValid()); Request req = new Request("getSystemState"); - connection.invokeSync(req, timeout.getSeconds()); + connection.invokeSync(req, timeout().getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ss"), req.toString()); String systemState = req.returnValues().get(1).asString(); @@ -523,7 +523,7 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(connection.isValid()); Request req = new Request("getMaster"); - connection.invokeSync(req, timeout.getSeconds()); + connection.invokeSync(req, timeoutInSeconds()); assertEquals(0, req.returnValues().get(0).asInt32(), req.toString()); assertEquals("All 1 nodes agree that 0 is current master.", req.returnValues().get(1).asString(), req.toString()); @@ -548,7 +548,7 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(connection.isValid()); Request req = new Request("getNodeList"); - connection.invokeSync(req, timeout.getSeconds()); + connection.invokeSync(req, timeoutInSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.errorMessage()); assertTrue(req.checkReturnTypes("SS"), req.toString()); String[] slobrok = req.returnValues().get(0).asStringArray().clone(); @@ -570,7 +570,7 @@ public class RpcServerTest extends FleetControllerTest { Request req2 = new Request("getnodestate2"); req2.parameters().add(new StringValue("unknown")); Target connection2 = supervisor.connect(new Spec(rpc[i])); - connection2.invokeSync(req2, timeout.getSeconds()); + connection2.invokeSync(req2, timeoutInSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req2.toString()); } } @@ -583,7 +583,7 @@ public class RpcServerTest extends FleetControllerTest { Request req = new Request("setNodeState"); req.parameters().add(new StringValue(node)); req.parameters().add(new StringValue(newNodeState)); - connection.invokeSync(req, timeout.getSeconds()); + connection.invokeSync(req, timeoutInSeconds()); return req; } @@ -591,7 +591,7 @@ public class RpcServerTest extends FleetControllerTest { Request req = new Request("getNodeState"); req.parameters().add(new StringValue(nodeType)); req.parameters().add(new Int32Value(nodeIndex)); - connection.invokeSync(req, timeout.getSeconds()); + connection.invokeSync(req, timeoutInSeconds()); return req; } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java index d8a302731fe..78576f9600c 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java @@ -49,10 +49,10 @@ public class SlobrokTest extends FleetControllerTest { } //fleetController.setFreshSlobrokMirror(); waitForCompleteCycle(); - fleetController.waitForNodesInSlobrok(10, 10, timeout); + fleetController.waitForNodesInSlobrok(10, 10, timeout()); log.log(Level.INFO, "Waiting for cluster to be up and available again"); - for (int i = 0; i < timeout.toMillis(); i += 10) { + for (int i = 0; i < timeout().toMillis(); i += 10) { if (clusterAvailable()) break; timer.advanceTime(1000); waitForCompleteCycle(); @@ -80,7 +80,7 @@ public class SlobrokTest extends FleetControllerTest { int version = fleetController.getSystemState().getVersion(); nodes.get(0).disconnectSlobrok(); log.log(Level.INFO, "DISCONNECTED NODE FROM SLOBROK. SHOULD BE IN COOLDOWN PERIOD"); - fleetController.waitForNodesInSlobrok(9, 10, timeout); + fleetController.waitForNodesInSlobrok(9, 10, timeout()); synchronized(timer) { nodes.get(0).sendGetNodeStateReply(0); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java index 0589fc4f7ff..6ad0430b5c3 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java @@ -967,7 +967,7 @@ public class StateChangeTest extends FleetControllerTest { // Ensure all nodes have been seen by fleetcontroller and that it has had enough time to possibly have sent a cluster state // Note: this is a candidate state and therefore NOT versioned yet - waiter.waitForState("^distributor:10 (\\.\\d+\\.t:\\d+ )*storage:10 (\\.\\d+\\.t:\\d+ )*.1.s:d( \\.\\d+\\.t:\\d+)*", timeout); + waiter.waitForState("^distributor:10 (\\.\\d+\\.t:\\d+ )*storage:10 (\\.\\d+\\.t:\\d+ )*.1.s:d( \\.\\d+\\.t:\\d+)*", timeout()); waitForCompleteCycle(); new StateMessageChecker(nodes) { @Override @@ -978,10 +978,10 @@ public class StateChangeTest extends FleetControllerTest { // Pass time and see that the nodes get state timer.advanceTime(3 * 60 * 1000); - waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:d", timeout); + waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:d", timeout()); int version = waiter.getCurrentSystemState().getVersion(); - fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 19, timeout); + fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 19, timeout()); new StateMessageChecker(nodes) { @Override @@ -1014,11 +1014,11 @@ public class StateChangeTest extends FleetControllerTest { final StateWaiter waiter = new StateWaiter(timer); fleetController.addSystemStateListener(waiter); - waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:i .1.i:1.0", timeout); + waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:i .1.i:1.0", timeout()); waitForCompleteCycle(); final int version = waiter.getCurrentSystemState().getVersion(); - fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 20, timeout); + fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 20, timeout()); // The last two versions of the cluster state should be seen (all nodes up, // zero out timestate) @@ -1044,10 +1044,10 @@ public class StateChangeTest extends FleetControllerTest { nodes.get(1).failSetSystemState(true); int versionBeforeChange = nodes.get(1).getSystemStatesReceived().get(0).getVersion(); nodes.get(2).disconnect(); // cause a new state - waiter.waitForState("version:\\d+ distributor:10 .1.s:d storage:10", timeout); + waiter.waitForState("version:\\d+ distributor:10 .1.s:d storage:10", timeout()); int versionAfterChange = waiter.getCurrentSystemState().getVersion(); assertTrue(versionAfterChange > versionBeforeChange); - fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(versionAfterChange, 18, timeout); + fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(versionAfterChange, 18, timeout()); // Assert that the failed node has not acknowledged the latest version. // (The version may still be larger than versionBeforeChange if the fleet controller sends a @@ -1122,7 +1122,7 @@ public class StateChangeTest extends FleetControllerTest { // Simulate netsplit. Take node down without node booting assertTrue(nodes.get(0).isDistributor()); nodes.get(0).disconnectImmediately(); - waiter.waitForState("version:\\d+ distributor:10 .0.s:d storage:10", timeout); + waiter.waitForState("version:\\d+ distributor:10 .0.s:d storage:10", timeout()); // Add node back. nodes.get(0).connect(); @@ -1130,7 +1130,7 @@ public class StateChangeTest extends FleetControllerTest { // At this time, node taken down should have cluster states with all starting timestamps set. Others node should not. for (DummyVdsNode node : nodes) { - node.waitForSystemStateVersion(waiter.getCurrentSystemState().getVersion(), timeout); + node.waitForSystemStateVersion(waiter.getCurrentSystemState().getVersion(), timeout()); List states = node.getSystemStatesReceived(); ClusterState lastState = states.get(0); StringBuilder stateHistory = new StringBuilder(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java index e7dcf382464..656d5a187d1 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java @@ -57,11 +57,11 @@ public class StateGatherTest extends FleetControllerTest { } private void waitUntilTimedOutGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException { - Instant endTime = Instant.now().plus(timeout); + Instant endTime = Instant.now().plus(timeout()); synchronized (timer) { while (dnode.timedOutStateReplies != 1 || snode.timedOutStateReplies != 1) { if (Instant.now().isAfter(endTime)) { - throw new TimeoutException("Did not get to have one timed out within timeout of " + timeout + throw new TimeoutException("Did not get to have one timed out within timeout of " + timeout() + ", " + getGetNodeStateReplyCounts(dnode) + ", " + getGetNodeStateReplyCounts(snode)); } try{ timer.wait(1); } catch (InterruptedException e) { /* ignore */ } @@ -70,9 +70,9 @@ public class StateGatherTest extends FleetControllerTest { } private void waitUntilPendingGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException { - Instant endTime = Instant.now().plus(timeout); + Instant endTime = Instant.now().plus(timeout()); while (dnode.getPendingNodeStateCount() != 1 || snode.getPendingNodeStateCount() != 1) { - if (Instant.now().isAfter(endTime)) throw new TimeoutException("Did not get to have one pending within timeout of " + timeout); + if (Instant.now().isAfter(endTime)) throw new TimeoutException("Did not get to have one pending within timeout of " + timeout()); try{ Thread.sleep(1); } catch (InterruptedException e) { /* ignore */ } } } -- cgit v1.2.3