diff options
author | Harald Musum <musum@yahooinc.com> | 2022-08-11 18:24:21 +0200 |
---|---|---|
committer | Harald Musum <musum@yahooinc.com> | 2022-08-11 18:24:21 +0200 |
commit | 123c37ea7bc65f0d59de1f512a076c50eba1aa08 (patch) | |
tree | 5b60f04b0253c1ec44b8a780ad0df840a017acb4 | |
parent | 6fa6a0847973d4d4cc84fa40f48f796dcac3a995 (diff) |
Use one timeout and cleanup timeout usage a bit
11 files changed, 116 insertions, 112 deletions
diff --git a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java index 8b335e877cd..2c4faaf47e6 100644 --- a/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java +++ b/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/FleetController.java @@ -28,6 +28,8 @@ import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServer import com.yahoo.vespa.clustercontroller.core.status.statuspage.StatusPageServerInterface; import com.yahoo.vespa.clustercontroller.utils.util.MetricReporter; import java.io.FileNotFoundException; +import java.time.Duration; +import java.time.Instant; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collection; @@ -1156,16 +1158,18 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta public NodeListener getNodeStateUpdateListener() { return FleetController.this; } }; - public void waitForCompleteCycle(long timeoutMS) { - long endTime = System.currentTimeMillis() + timeoutMS; + public void waitForCompleteCycle(Duration timeout) { + Instant endTime = Instant.now().plus(timeout); synchronized (monitor) { // To wait at least one complete cycle, if a cycle is already running we need to wait for the next one beyond. long wantedCycle = cycleCount + (processingCycle ? 2 : 1); waitingForCycle = true; try{ while (cycleCount < wantedCycle) { - if (System.currentTimeMillis() > endTime) throw new IllegalStateException("Timed out waiting for cycle to complete. Not completed after " + timeoutMS + " ms."); - if ( !isRunning() ) throw new IllegalStateException("Fleetcontroller not running. Will never complete cycles"); + if (Instant.now().isAfter(endTime)) + throw new IllegalStateException("Timed out waiting for cycle to complete. Not completed after " + timeout); + if ( !isRunning() ) + throw new IllegalStateException("Fleetcontroller not running. Will never complete cycles"); try{ monitor.wait(100); } catch (InterruptedException e) {} } } finally { @@ -1179,8 +1183,8 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta * But it is only used in unit tests that should not trigger any thread issues. Don't want to add locks that reduce * live performance to remove a non-problem. */ - public void waitForNodesHavingSystemStateVersionEqualToOrAbove(int version, int nodeCount, int timeout) throws InterruptedException { - long maxTime = System.currentTimeMillis() + timeout; + public void waitForNodesHavingSystemStateVersionEqualToOrAbove(int version, int nodeCount, Duration timeout) throws InterruptedException { + Instant endTime = Instant.now().plus(timeout); synchronized (monitor) { while (true) { int ackedNodes = 0; @@ -1193,17 +1197,17 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta context.log(logger, Level.INFO, ackedNodes + " nodes now have acked system state " + version + " or higher."); return; } - long remainingTime = maxTime - System.currentTimeMillis(); - if (remainingTime <= 0) { - throw new IllegalStateException("Did not get " + nodeCount + " nodes to system state " + version + " within timeout of " + timeout + " milliseconds."); + Duration remainingTime = Duration.between(Instant.now(), endTime); + if (remainingTime.isNegative()) { + throw new IllegalStateException("Did not get " + nodeCount + " nodes to system state " + version + " within timeout of " + timeout); } monitor.wait(10); } } } - public void waitForNodesInSlobrok(int distNodeCount, int storNodeCount, int timeoutMillis) throws InterruptedException { - long maxTime = System.currentTimeMillis() + timeoutMillis; + public void waitForNodesInSlobrok(int distNodeCount, int storNodeCount, Duration timeout) throws InterruptedException { + Instant endTime = Instant.now().plus(timeout); synchronized (monitor) { while (true) { int distCount = 0, storCount = 0; @@ -1215,10 +1219,10 @@ public class FleetController implements NodeListener, SlobrokListener, SystemSta } if (distCount == distNodeCount && storCount == storNodeCount) return; - long remainingTime = maxTime - System.currentTimeMillis(); - if (remainingTime <= 0) { + Duration remainingTime = Duration.between(Instant.now(), endTime); + if (remainingTime.isNegative()) { throw new IllegalStateException("Did not get all " + distNodeCount + " distributors and " + storNodeCount - + " storage nodes registered in slobrok within timeout of " + timeoutMillis + " ms. (Got " + + " storage nodes registered in slobrok within timeout of " + timeout + ". (Got " + distCount + " distributors and " + storCount + " storage nodes)"); } monitor.wait(10); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java index cb87536314f..b74835080d9 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DatabaseTest.java @@ -154,7 +154,7 @@ public class DatabaseTest extends FleetControllerTest { Request req = new Request("setNodeState"); req.parameters().add(new StringValue("storage/cluster.mycluster/" + n.getType().toString() + "/" + n.getIndex())); req.parameters().add(new StringValue(ns.serialize(true))); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("s"), req.toString()); wantedStates.put(n, ns); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java index af067cc394f..fd24966e26a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/DummyVdsNode.java @@ -21,6 +21,8 @@ import com.yahoo.vdslib.state.NodeType; import com.yahoo.vdslib.state.State; import com.yahoo.vespa.clustercontroller.core.rpc.RPCCommunicator; import com.yahoo.vespa.clustercontroller.core.rpc.RPCUtil; +import java.time.Duration; +import java.time.Instant; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -194,11 +196,11 @@ public class DummyVdsNode { public int getStateCommunicationVersion() { return stateCommunicationVersion; } - void waitForSystemStateVersion(int version) { + void waitForSystemStateVersion(int version, Duration timeout) { try { - long startTime = System.currentTimeMillis(); + Instant endTime = Instant.now().plus(timeout); while (getLatestSystemStateVersion().orElse(-1) < version) { - if ( (System.currentTimeMillis() - startTime) > (long) FleetControllerTest.timeoutMS) + if (Instant.now().isAfter(endTime)) throw new RuntimeException("Timed out waiting for state version " + version + " in " + this); Thread.sleep(10); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java index d778518240e..fab0c790f76 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/FleetControllerTest.java @@ -30,6 +30,7 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.extension.ExtensionContext; import org.junit.jupiter.api.extension.TestWatcher; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -56,6 +57,7 @@ public abstract class FleetControllerTest implements Waiter { private static final Logger log = Logger.getLogger(FleetControllerTest.class.getName()); private static final int DEFAULT_NODE_COUNT = 10; + final Duration timeout = Duration.ofSeconds(30); Supervisor supervisor; protected final FakeTimer timer = new FakeTimer(); protected Slobrok slobrok; @@ -65,8 +67,6 @@ public abstract class FleetControllerTest implements Waiter { protected List<DummyVdsNode> nodes = new ArrayList<>(); private String testName; - final static int timeoutS; - final static int timeoutMS; private final Waiter waiter = new Waiter.Impl(new DataRetriever() { @Override public Object getMonitor() { return timer; } @@ -75,13 +75,11 @@ public abstract class FleetControllerTest implements Waiter { @Override public List<DummyVdsNode> getDummyNodes() { return nodes; } @Override - public int getTimeoutMS() { return timeoutMS; } + public Duration getTimeout() { return timeout; } }); static { LogSetup.initVespaLogging("fleetcontroller"); - timeoutS = 30; - timeoutMS = timeoutS * 1000; } static class BackOff implements BackOffPolicy { @@ -278,7 +276,7 @@ public abstract class FleetControllerTest implements Waiter { .collect(Collectors.toList()); } @Override - public int getTimeoutMS() { return timeoutMS; } + public Duration getTimeout() { return timeout; } }); subsetWaiter.waitForState(expectedState); } @@ -323,16 +321,16 @@ public abstract class FleetControllerTest implements Waiter { public ClusterState waitForState(String state) throws Exception { return waiter.waitForState(state); } public ClusterState waitForStateInAllSpaces(String state) throws Exception { return waiter.waitForStateInAllSpaces(state); } public ClusterState waitForStateInSpace(String space, String state) throws Exception { return waiter.waitForStateInSpace(space, state); } - public ClusterState waitForState(String state, int timeoutMS) throws Exception { return waiter.waitForState(state, timeoutMS); } + public ClusterState waitForState(String state, Duration timeout) throws Exception { return waiter.waitForState(state, timeout); } public ClusterState waitForInitProgressPassed(Node n, double progress) { return waiter.waitForInitProgressPassed(n, progress); } public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount) { return waiter.waitForClusterStateIncludingNodesWithMinUsedBits(bitcount, nodecount); } - public void wait(WaitCondition c, WaitTask wt, int timeoutMS) { - waiter.wait(c, wt, timeoutMS); + public void wait(WaitCondition condition, WaitTask task, Duration timeout) { + waiter.wait(condition, task, timeout); } void waitForCompleteCycle() { - fleetController.waitForCompleteCycle(timeoutMS); + fleetController.waitForCompleteCycle(timeout); } private static class ExpectLine { @@ -461,7 +459,7 @@ public abstract class FleetControllerTest implements Waiter { Request req = new Request("setNodeState"); req.parameters().add(new StringValue(node.getSlobrokName())); req.parameters().add(new StringValue(ns.serialize())); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); if (req.isError()) { fail("Failed to invoke setNodeState(): " + req.errorCode() + ": " + req.errorMessage()); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java index 9bdcbb9ce6c..bd022e2675c 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/MasterElectionTest.java @@ -15,6 +15,7 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Timeout; import org.junit.jupiter.api.extension.ExtendWith; +import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeoutException; @@ -65,19 +66,19 @@ public class MasterElectionTest extends FleetControllerTest { } private void waitForZookeeperDisconnected() throws TimeoutException { - long maxTime = System.currentTimeMillis() + timeoutMS; + Instant maxTime = Instant.now().plus(timeout); for (FleetController f : fleetControllers) { while (f.hasZookeeperConnection()) { try { Thread.sleep(1); } catch (InterruptedException e) { /* ignore */ } - if (System.currentTimeMillis() > maxTime) - throw new TimeoutException("Failed to notice zookeeper down within timeout of " + timeoutMS + " ms"); + if (Instant.now().isAfter(maxTime)) + throw new TimeoutException("Failed to notice zookeeper down within timeout of " + timeout); } } waitForCompleteCycles(); } private void waitForCompleteCycle(int findex) { - fleetControllers.get(findex).waitForCompleteCycle(timeoutMS); + fleetControllers.get(findex).waitForCompleteCycle(timeout); } private void waitForCompleteCycles() { @@ -158,7 +159,7 @@ public class MasterElectionTest extends FleetControllerTest { private void waitForMaster(int master) { log.log(Level.INFO, "Entering waitForMaster"); boolean isOnlyMaster = false; - for (int i = 0; i < FleetControllerTest.timeoutMS; i += 100) { + for (int i = 0; i < timeout.toMillis(); i += 100) { if (!fleetControllers.get(master).isMaster()) { log.log(Level.INFO, "Node " + master + " is not master yet, sleeping more"); timer.advanceTime(100); @@ -177,7 +178,7 @@ public class MasterElectionTest extends FleetControllerTest { break; } } - // Have to wait to get zookeeper communication chance to happen. + // Have to wait to get zookeeper communication chance to happen. try { Thread.sleep(100); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } @@ -306,13 +307,13 @@ public class MasterElectionTest extends FleetControllerTest { waitForMaster(1); } - private void waitForMasterReason(String reason, Integer master, List<Target> connections, int nodes[]) { - long endTime = System.currentTimeMillis() + timeoutMS; - while (System.currentTimeMillis() < endTime) { + private void waitForMasterReason(String reason, Integer master, List<Target> connections, int[] nodes) { + Instant endTime = Instant.now().plus(timeout); + while (Instant.now().isBefore(endTime)) { boolean allOk = true; for (int node : nodes) { Request req = new Request("getMaster"); - connections.get(node).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(node).invokeSync(req, timeout.getSeconds()); if (req.isError()) { allOk = false; break; @@ -329,8 +330,7 @@ public class MasterElectionTest extends FleetControllerTest { if (allOk) return; try{ Thread.sleep(100); } catch (InterruptedException e) { /* ignore */ } } - throw new IllegalStateException("Did not get master reason '" + reason - + "' within timeout of " + timeoutMS + " ms"); + throw new IllegalStateException("Did not get master reason '" + reason + "' within timeout of " + timeout); } @Test @@ -356,9 +356,9 @@ public class MasterElectionTest extends FleetControllerTest { Request req = new Request("getMaster"); for (int nodeIndex = 0; nodeIndex < 3; ++nodeIndex) { - for (int retry = 0; retry < FleetControllerTest.timeoutS * 10; ++retry) { + for (int retry = 0; retry < timeout.getSeconds() * 10; ++retry) { req = new Request("getMaster"); - connections.get(nodeIndex).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(nodeIndex).invokeSync(req, timeout.getSeconds()); assertFalse(req.isError(), req.errorMessage()); if (req.returnValues().get(0).asInt32() == 0 && req.returnValues().get(1).asString().equals("All 3 nodes agree that 0 is current master.")) { @@ -389,13 +389,13 @@ public class MasterElectionTest extends FleetControllerTest { waitForMaster(1); req = new Request("getMaster"); - connections.get(0).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(0).invokeSync(req, timeout.getSeconds()); assertEquals(104, req.errorCode(), req.toString()); assertEquals("Connection error", req.errorMessage(), req.toString()); - for (int i = 0; i < FleetControllerTest.timeoutS * 10; ++i) { + for (int i = 0; i < timeout.getSeconds() * 10; ++i) { req = new Request("getMaster"); - connections.get(1).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(1).invokeSync(req, timeout.getSeconds()); assertFalse(req.isError(), req.errorMessage()); if (req.returnValues().get(0).asInt32() != -1) break; // We may have bad timing causing node not to have realized it is master yet @@ -403,9 +403,9 @@ public class MasterElectionTest extends FleetControllerTest { assertEquals(1, req.returnValues().get(0).asInt32(), req.toString()); assertEquals("2 of 3 nodes agree 1 is master.", req.returnValues().get(1).asString(), req.toString()); - for (int i = 0; i < FleetControllerTest.timeoutS * 10; ++i) { + for (int i = 0; i < timeout.getSeconds() * 10; ++i) { req = new Request("getMaster"); - connections.get(2).invokeSync(req, FleetControllerTest.timeoutS); + connections.get(2).invokeSync(req, timeout.getSeconds()); assertFalse(req.isError(), req.errorMessage()); if (req.returnValues().get(0).asInt32() != -1) break; } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java index 677772e5c4e..bb4e85c032a 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/RpcServerTest.java @@ -100,7 +100,7 @@ public class RpcServerTest extends FleetControllerTest { log.log(Level.INFO, "Disconnecting distributor 0. Waiting for state to reflect change."); nodes.get(0).disconnect(); nodes.get(19).disconnect(); - fleetController.waitForNodesInSlobrok(9, 9, timeoutMS); + fleetController.waitForNodesInSlobrok(9, 9, timeout); timer.advanceTime(options.nodeStateRequestTimeoutMS + options.maxSlobrokDisconnectGracePeriod); wait(new WaitCondition.StateWait(fleetController, fleetController.getMonitor()) { @@ -119,7 +119,7 @@ public class RpcServerTest extends FleetControllerTest { } return null; } - }, null, timeoutMS); + }, null, timeout); int rpcPort = fleetController.getRpcPort(); supervisor = new Supervisor(new Transport()); @@ -127,7 +127,7 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(connection.isValid()); Request req = new Request("getSystemState"); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ss"), req.toString()); String systemState = req.returnValues().get(1).asString(); @@ -150,7 +150,7 @@ public class RpcServerTest extends FleetControllerTest { Request req = new Request("setNodeState"); req.parameters().add(new StringValue("storage/cluster.mycluster/" + node.getType().toString() + "/" + node.getIndex())); req.parameters().add(new StringValue(newNodeState.serialize(true))); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("s"), req.toString()); } @@ -195,7 +195,7 @@ public class RpcServerTest extends FleetControllerTest { Request req = new Request("getNodeState"); req.parameters().add(new StringValue("distributor")); req.parameters().add(new Int32Value(0)); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(0).asString()).getState()); @@ -206,7 +206,7 @@ public class RpcServerTest extends FleetControllerTest { req = new Request("getNodeState"); req.parameters().add(new StringValue("distributor")); req.parameters().add(new Int32Value(2)); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals(State.DOWN, NodeState.deserialize(NodeType.DISTRIBUTOR, req.returnValues().get(0).asString()).getState()); @@ -216,7 +216,7 @@ public class RpcServerTest extends FleetControllerTest { req = new Request("getNodeState"); req.parameters().add(new StringValue("distributor")); req.parameters().add(new Int32Value(4)); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals("", req.returnValues().get(0).asString()); @@ -226,7 +226,7 @@ public class RpcServerTest extends FleetControllerTest { req = new Request("getNodeState"); req.parameters().add(new StringValue("distributor")); req.parameters().add(new Int32Value(15)); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.METHOD_FAILED, req.errorCode(), req.toString()); assertEquals("No node distributor.15 exists in cluster mycluster", req.errorMessage()); assertFalse(req.checkReturnTypes("ssss"), req.toString()); @@ -234,7 +234,7 @@ public class RpcServerTest extends FleetControllerTest { req = new Request("getNodeState"); req.parameters().add(new StringValue("storage")); req.parameters().add(new Int32Value(1)); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals("s:i i:0.2", req.returnValues().get(0).asString()); @@ -244,7 +244,7 @@ public class RpcServerTest extends FleetControllerTest { req = new Request("getNodeState"); req.parameters().add(new StringValue("storage")); req.parameters().add(new Int32Value(2)); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals(State.DOWN, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(0).asString()).getState()); @@ -255,7 +255,7 @@ public class RpcServerTest extends FleetControllerTest { req = new Request("getNodeState"); req.parameters().add(new StringValue("storage")); req.parameters().add(new Int32Value(5)); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals("", req.returnValues().get(0).asString()); @@ -265,7 +265,7 @@ public class RpcServerTest extends FleetControllerTest { req = new Request("getNodeState"); req.parameters().add(new StringValue("storage")); req.parameters().add(new Int32Value(7)); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("ssss"), req.toString()); assertEquals(State.MAINTENANCE, NodeState.deserialize(NodeType.STORAGE, req.returnValues().get(0).asString()).getState()); @@ -485,7 +485,7 @@ public class RpcServerTest extends FleetControllerTest { Request req = new Request("setNodeState"); req.parameters().add(new StringValue("storage/cluster.mycluster/storage/14")); req.parameters().add(new StringValue("s:r")); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("s"), req.toString()); @@ -494,7 +494,7 @@ public class RpcServerTest extends FleetControllerTest { req = new Request("setNodeState"); req.parameters().add(new StringValue("storage/cluster.mycluster/storage/16")); req.parameters().add(new StringValue("s:m")); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); assertTrue(req.checkReturnTypes("s"), req.toString()); @@ -530,21 +530,21 @@ public class RpcServerTest extends FleetControllerTest { Request req = new Request("setNodeState"); req.parameters().add(new StringValue("storage/cluster.mycluster/storage/10")); req.parameters().add(new StringValue("s:m")); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.METHOD_FAILED, req.errorCode(), req.toString()); assertEquals("Cannot set wanted state of node storage.10. Index does not correspond to a configured node.", req.errorMessage(), req.toString()); req = new Request("setNodeState"); req.parameters().add(new StringValue("storage/cluster.mycluster/distributor/10")); req.parameters().add(new StringValue("s:m")); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.METHOD_FAILED, req.errorCode(), req.toString()); assertEquals("Cannot set wanted state of node distributor.10. Index does not correspond to a configured node.", req.errorMessage(), req.toString()); req = new Request("setNodeState"); req.parameters().add(new StringValue("storage/cluster.mycluster/storage/9")); req.parameters().add(new StringValue("s:m")); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.toString()); waitForState("version:\\d+ distributor:10 storage:10 .9.s:m"); @@ -565,7 +565,7 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(connection.isValid()); Request req = new Request("getMaster"); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(0, req.returnValues().get(0).asInt32(), req.toString()); assertEquals("All 1 nodes agree that 0 is current master.", req.returnValues().get(1).asString(), req.toString()); @@ -590,7 +590,7 @@ public class RpcServerTest extends FleetControllerTest { assertTrue(connection.isValid()); Request req = new Request("getNodeList"); - connection.invokeSync(req, timeoutS); + connection.invokeSync(req, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req.errorMessage()); assertTrue(req.checkReturnTypes("SS"), req.toString()); String[] slobrok = req.returnValues().get(0).asStringArray().clone(); @@ -612,7 +612,7 @@ public class RpcServerTest extends FleetControllerTest { Request req2 = new Request("getnodestate2"); req2.parameters().add(new StringValue("unknown")); Target connection2 = supervisor.connect(new Spec(rpc[i])); - connection2.invokeSync(req2, timeoutS); + connection2.invokeSync(req2, timeout.getSeconds()); assertEquals(ErrorCode.NONE, req.errorCode(), req2.toString()); } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java index b59c90f4955..d8a302731fe 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/SlobrokTest.java @@ -2,14 +2,13 @@ package com.yahoo.vespa.clustercontroller.core; import com.yahoo.jrt.slobrok.server.Slobrok; -import java.util.logging.Level; import org.junit.jupiter.api.Test; +import java.util.logging.Level; +import java.util.logging.Logger; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -import java.util.logging.Logger; - public class SlobrokTest extends FleetControllerTest { private static final Logger log = Logger.getLogger(SlobrokTest.class.getName()); @@ -50,10 +49,10 @@ public class SlobrokTest extends FleetControllerTest { } //fleetController.setFreshSlobrokMirror(); waitForCompleteCycle(); - fleetController.waitForNodesInSlobrok(10, 10, timeoutMS); + fleetController.waitForNodesInSlobrok(10, 10, timeout); log.log(Level.INFO, "Waiting for cluster to be up and available again"); - for (int i = 0; i < timeoutMS; i += 10) { + for (int i = 0; i < timeout.toMillis(); i += 10) { if (clusterAvailable()) break; timer.advanceTime(1000); waitForCompleteCycle(); @@ -81,7 +80,7 @@ public class SlobrokTest extends FleetControllerTest { int version = fleetController.getSystemState().getVersion(); nodes.get(0).disconnectSlobrok(); log.log(Level.INFO, "DISCONNECTED NODE FROM SLOBROK. SHOULD BE IN COOLDOWN PERIOD"); - fleetController.waitForNodesInSlobrok(9, 10, timeoutMS); + fleetController.waitForNodesInSlobrok(9, 10, timeout); synchronized(timer) { nodes.get(0).sendGetNodeStateReply(0); } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java index af4984bdb26..0589fc4f7ff 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateChangeTest.java @@ -967,7 +967,7 @@ public class StateChangeTest extends FleetControllerTest { // Ensure all nodes have been seen by fleetcontroller and that it has had enough time to possibly have sent a cluster state // Note: this is a candidate state and therefore NOT versioned yet - waiter.waitForState("^distributor:10 (\\.\\d+\\.t:\\d+ )*storage:10 (\\.\\d+\\.t:\\d+ )*.1.s:d( \\.\\d+\\.t:\\d+)*", timeoutMS); + waiter.waitForState("^distributor:10 (\\.\\d+\\.t:\\d+ )*storage:10 (\\.\\d+\\.t:\\d+ )*.1.s:d( \\.\\d+\\.t:\\d+)*", timeout); waitForCompleteCycle(); new StateMessageChecker(nodes) { @Override @@ -978,10 +978,10 @@ public class StateChangeTest extends FleetControllerTest { // Pass time and see that the nodes get state timer.advanceTime(3 * 60 * 1000); - waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:d", timeoutMS); + waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:d", timeout); int version = waiter.getCurrentSystemState().getVersion(); - fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 19, timeoutMS); + fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 19, timeout); new StateMessageChecker(nodes) { @Override @@ -1014,11 +1014,11 @@ public class StateChangeTest extends FleetControllerTest { final StateWaiter waiter = new StateWaiter(timer); fleetController.addSystemStateListener(waiter); - waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:i .1.i:1.0", timeoutMS); + waiter.waitForState("version:\\d+ distributor:10 storage:10 .1.s:i .1.i:1.0", timeout); waitForCompleteCycle(); final int version = waiter.getCurrentSystemState().getVersion(); - fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 20, timeoutMS); + fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(version, 20, timeout); // The last two versions of the cluster state should be seen (all nodes up, // zero out timestate) @@ -1044,10 +1044,10 @@ public class StateChangeTest extends FleetControllerTest { nodes.get(1).failSetSystemState(true); int versionBeforeChange = nodes.get(1).getSystemStatesReceived().get(0).getVersion(); nodes.get(2).disconnect(); // cause a new state - waiter.waitForState("version:\\d+ distributor:10 .1.s:d storage:10", timeoutMS); + waiter.waitForState("version:\\d+ distributor:10 .1.s:d storage:10", timeout); int versionAfterChange = waiter.getCurrentSystemState().getVersion(); assertTrue(versionAfterChange > versionBeforeChange); - fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(versionAfterChange, 18, timeoutMS); + fleetController.waitForNodesHavingSystemStateVersionEqualToOrAbove(versionAfterChange, 18, timeout); // Assert that the failed node has not acknowledged the latest version. // (The version may still be larger than versionBeforeChange if the fleet controller sends a @@ -1122,7 +1122,7 @@ public class StateChangeTest extends FleetControllerTest { // Simulate netsplit. Take node down without node booting assertTrue(nodes.get(0).isDistributor()); nodes.get(0).disconnectImmediately(); - waiter.waitForState("version:\\d+ distributor:10 .0.s:d storage:10", timeoutMS); + waiter.waitForState("version:\\d+ distributor:10 .0.s:d storage:10", timeout); // Add node back. nodes.get(0).connect(); @@ -1130,7 +1130,7 @@ public class StateChangeTest extends FleetControllerTest { // At this time, node taken down should have cluster states with all starting timestamps set. Others node should not. for (DummyVdsNode node : nodes) { - node.waitForSystemStateVersion(waiter.getCurrentSystemState().getVersion()); + node.waitForSystemStateVersion(waiter.getCurrentSystemState().getVersion(), timeout); List<ClusterState> states = node.getSystemStatesReceived(); ClusterState lastState = states.get(0); StringBuilder stateHistory = new StringBuilder(); diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java index 86b43f539fb..e7dcf382464 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/StateGatherTest.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.clustercontroller.core; import org.junit.jupiter.api.Test; +import java.time.Instant; import java.util.concurrent.TimeoutException; import java.util.logging.Level; import java.util.logging.Logger; @@ -56,11 +57,11 @@ public class StateGatherTest extends FleetControllerTest { } private void waitUntilTimedOutGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException { - long timeout = System.currentTimeMillis() + timeoutMS; + Instant endTime = Instant.now().plus(timeout); synchronized (timer) { while (dnode.timedOutStateReplies != 1 || snode.timedOutStateReplies != 1) { - if (System.currentTimeMillis() > timeout) { - throw new TimeoutException("Did not get to have one timed out within timeout of " + timeoutMS + " ms" + if (Instant.now().isAfter(endTime)) { + throw new TimeoutException("Did not get to have one timed out within timeout of " + timeout + ", " + getGetNodeStateReplyCounts(dnode) + ", " + getGetNodeStateReplyCounts(snode)); } try{ timer.wait(1); } catch (InterruptedException e) { /* ignore */ } @@ -69,9 +70,9 @@ public class StateGatherTest extends FleetControllerTest { } private void waitUntilPendingGetNodeState(DummyVdsNode dnode, DummyVdsNode snode) throws TimeoutException { - long timeout = System.currentTimeMillis() + timeoutMS; + Instant endTime = Instant.now().plus(timeout); while (dnode.getPendingNodeStateCount() != 1 || snode.getPendingNodeStateCount() != 1) { - if (System.currentTimeMillis() > timeout) throw new TimeoutException("Did not get to have one pending within timeout of " + timeoutMS + " ms"); + if (Instant.now().isAfter(endTime)) throw new TimeoutException("Did not get to have one pending within timeout of " + timeout); try{ Thread.sleep(1); } catch (InterruptedException e) { /* ignore */ } } } diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java index 4cfab796a4e..d131c330381 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/StateWaiter.java @@ -5,7 +5,7 @@ import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vespa.clustercontroller.core.ClusterStateBundle; import com.yahoo.vespa.clustercontroller.core.FakeTimer; import com.yahoo.vespa.clustercontroller.core.listeners.SystemStateListener; - +import java.time.Duration; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -57,8 +57,8 @@ public class StateWaiter implements SystemStateListener { } } - public void waitForState(String stateRegex, long timeout) { - waitForState(stateRegex, timeout, 0); + public void waitForState(String stateRegex, Duration timeout) { + waitForState(stateRegex, timeout.toMillis(), 0); } /** diff --git a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java index 6336de3d1a3..7960cd7c9d2 100644 --- a/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java +++ b/clustercontroller-core/src/test/java/com/yahoo/vespa/clustercontroller/core/testutils/Waiter.java @@ -1,16 +1,17 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.clustercontroller.core.testutils; -import java.util.logging.Level; import com.yahoo.vdslib.state.ClusterState; import com.yahoo.vdslib.state.Node; import com.yahoo.vespa.clustercontroller.core.DummyVdsNode; import com.yahoo.vespa.clustercontroller.core.FleetController; - +import java.time.Duration; +import java.time.Instant; import java.util.Collections; import java.util.LinkedList; import java.util.List; import java.util.Set; +import java.util.logging.Level; import java.util.logging.Logger; public interface Waiter { @@ -19,18 +20,18 @@ public interface Waiter { Object getMonitor(); FleetController getFleetController(); List<DummyVdsNode> getDummyNodes(); - int getTimeoutMS(); + Duration getTimeout(); } ClusterState waitForState(String state) throws Exception; ClusterState waitForStateInSpace(String space, String state) throws Exception; ClusterState waitForStateInAllSpaces(String state) throws Exception; - ClusterState waitForState(String state, int timeoutMS) throws Exception; + ClusterState waitForState(String state, Duration timeoutMS) throws Exception; ClusterState waitForStableSystem() throws Exception; ClusterState waitForStableSystem(int nodeCount) throws Exception; ClusterState waitForInitProgressPassed(Node n, double progress); ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount); - void wait(WaitCondition c, WaitTask wt, int timeoutMS); + void wait(WaitCondition c, WaitTask wt, Duration timeout); class Impl implements Waiter { @@ -42,7 +43,7 @@ public interface Waiter { } // TODO refactor - private ClusterState waitForState(String state, int timeoutMS, boolean checkAllSpaces, Set<String> checkSpaces) { + private ClusterState waitForState(String state, Duration timeoutMS, boolean checkAllSpaces, Set<String> checkSpaces) { LinkedList<DummyVdsNode> nodesToCheck = new LinkedList<>(); for(DummyVdsNode node : data.getDummyNodes()) { if (node.isConnected()) nodesToCheck.add(node); @@ -57,15 +58,15 @@ public interface Waiter { } public ClusterState waitForState(String state) { - return waitForState(state, data.getTimeoutMS()); + return waitForState(state, data.getTimeout()); } public ClusterState waitForStateInAllSpaces(String state) { - return waitForState(state, data.getTimeoutMS(), true, Collections.emptySet()); + return waitForState(state, data.getTimeout(), true, Collections.emptySet()); } public ClusterState waitForStateInSpace(String space, String state) { - return waitForState(state, data.getTimeoutMS(), false, Collections.singleton(space)); + return waitForState(state, data.getTimeout(), false, Collections.singleton(space)); } - public ClusterState waitForState(String state, int timeoutMS) { + public ClusterState waitForState(String state, Duration timeoutMS) { return waitForState(state, timeoutMS, false, Collections.emptySet()); } public ClusterState waitForStableSystem() { @@ -73,24 +74,23 @@ public interface Waiter { } public ClusterState waitForStableSystem(int nodeCount) { WaitCondition.StateWait swc = new WaitCondition.RegexStateMatcher("version:\\d+ distributor:"+nodeCount+" storage:"+nodeCount, data.getFleetController(), data.getMonitor()).includeNotifyingNodes(data.getDummyNodes()); - wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS()); + wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeout()); return swc.getCurrentState(); } public ClusterState waitForInitProgressPassed(Node n, double progress) { WaitCondition.StateWait swc = new WaitCondition.InitProgressPassedMatcher(n, progress, data.getFleetController(), data.getMonitor()); - wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS()); + wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeout()); return swc.getCurrentState(); } public ClusterState waitForClusterStateIncludingNodesWithMinUsedBits(int bitcount, int nodecount) { WaitCondition.StateWait swc = new WaitCondition.MinUsedBitsMatcher(bitcount, nodecount, data.getFleetController(), data.getMonitor()); - wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeoutMS()); + wait(swc, new WaitTask.StateResender(data.getFleetController()), data.getTimeout()); return swc.getCurrentState(); } - public final void wait(WaitCondition c, WaitTask wt, int timeoutMS) { + public final void wait(WaitCondition c, WaitTask wt, Duration timeout) { log.log(Level.INFO, "Waiting for " + c + (wt == null ? "" : " with wait task " + wt)); - final long startTime = System.currentTimeMillis(); - final long endTime = startTime + timeoutMS; + Instant endTime = Instant.now().plus(timeout); String lastReason = null; while (true) { synchronized (data.getMonitor()) { @@ -111,11 +111,11 @@ public interface Waiter { allowWait = false; } } - final long timeLeft = endTime - System.currentTimeMillis(); - if (timeLeft <= 0) { - throw new IllegalStateException("Timed out waiting max " + timeoutMS + " ms for " + c + (wt == null ? "" : "\n with wait task " + wt) + ",\n reason: " + reason); - } - if (allowWait) data.getMonitor().wait(wt == null ? WaitTask.defaultTaskFrequencyMillis : Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft)); + Duration timeLeft = Duration.between(Instant.now(), endTime); + if (timeLeft.isNegative()) + throw new IllegalStateException("Timed out waiting max " + timeout + " ms for " + c + (wt == null ? "" : "\n with wait task " + wt) + ",\n reason: " + reason); + if (allowWait) + data.getMonitor().wait(wt == null ? WaitTask.defaultTaskFrequencyMillis : Math.min(wt.getWaitTaskFrequencyInMillis(), timeLeft.toMillis())); } catch (InterruptedException e) { } } |