diff options
author | Valerij Fredriksen <valerij92@gmail.com> | 2019-02-09 10:31:58 +0100 |
---|---|---|
committer | Valerij Fredriksen <valerij92@gmail.com> | 2019-02-09 11:07:49 +0100 |
commit | 7bf67793bac909c047f994e922f19d647a464fec (patch) | |
tree | 1323115a21672e8efcb826b692b3c1b3c4402271 /node-admin | |
parent | fb8a826ee839d5f24fc18e4b36206e8c160386a4 (diff) |
Add delayed scheduling of NodeAgentContext
Diffstat (limited to 'node-admin')
6 files changed, 55 insertions, 21 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index 2303f78217c..288003ade3c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -38,6 +38,7 @@ import java.util.stream.Collectors; public class NodeAdminImpl implements NodeAdmin { private static final PrefixLogger logger = PrefixLogger.getNodeAdminLogger(NodeAdmin.class); private static final Duration NODE_AGENT_FREEZE_TIMEOUT = Duration.ofSeconds(5); + private static final Duration NODE_AGENT_SPREAD = Duration.ofSeconds(3); private final ScheduledExecutorService aclScheduler = Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("aclscheduler")); @@ -49,6 +50,8 @@ public class NodeAdminImpl implements NodeAdmin { private final Optional<AclMaintainer> aclMaintainer; private final Clock clock; + private final Duration freezeTimeout; + private final Duration spread; private boolean previousWantFrozen; private boolean isFrozen; private Instant startOfFreezeConvergence; @@ -64,19 +67,25 @@ public class NodeAdminImpl implements NodeAdmin { MetricReceiverWrapper metricReceiver, Clock clock) { this((NodeAgentWithSchedulerFactory) nodeAgentContext -> create(clock, nodeAgentFactory, nodeAgentContext), - nodeAgentContextFactory, aclMaintainer, metricReceiver, clock); + nodeAgentContextFactory, aclMaintainer, metricReceiver, clock, NODE_AGENT_FREEZE_TIMEOUT, NODE_AGENT_SPREAD); + } + + public NodeAdminImpl(NodeAgentFactory nodeAgentFactory, NodeAgentContextFactory nodeAgentContextFactory, + Optional<AclMaintainer> aclMaintainer, MetricReceiverWrapper metricReceiver, Clock clock, Duration freezeTimeout, Duration spread) { + this((NodeAgentWithSchedulerFactory) nodeAgentContext -> create(clock, nodeAgentFactory, nodeAgentContext), + nodeAgentContextFactory, aclMaintainer, metricReceiver, clock, freezeTimeout, spread); } NodeAdminImpl(NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory, - NodeAgentContextFactory nodeAgentContextFactory, - Optional<AclMaintainer> aclMaintainer, - MetricReceiverWrapper metricReceiver, - Clock clock) { + NodeAgentContextFactory nodeAgentContextFactory, Optional<AclMaintainer> aclMaintainer, MetricReceiverWrapper metricReceiver, + Clock clock, Duration freezeTimeout, Duration spread) { this.nodeAgentWithSchedulerFactory = nodeAgentWithSchedulerFactory; this.nodeAgentContextFactory = nodeAgentContextFactory; this.aclMaintainer = aclMaintainer; this.clock = clock; + this.freezeTimeout = freezeTimeout; + this.spread = spread; this.previousWantFrozen = true; this.isFrozen = true; this.startOfFreezeConvergence = clock.instant(); @@ -102,10 +111,14 @@ public class NodeAdminImpl implements NodeAdmin { nodeAgentWithSchedulerByHostname.put(hostname, naws); }); + Duration timeBetweenNodeAgents = spread.dividedBy(Math.max(nodeAgentContextsByHostname.size() - 1, 1)); + Instant nextAgentStart = clock.instant(); // At this point, nodeAgentContextsByHostname and nodeAgentWithSchedulerByHostname should have the same keys - nodeAgentContextsByHostname.forEach((hostname, context) -> - nodeAgentWithSchedulerByHostname.get(hostname).scheduleTickWith(context) - ); + for (String hostname : nodeAgentContextsByHostname.keySet()) { + NodeAgentContext context = nodeAgentContextsByHostname.get(hostname); + nodeAgentWithSchedulerByHostname.get(hostname).scheduleTickWith(context, nextAgentStart); + nextAgentStart = nextAgentStart.plus(timeBetweenNodeAgents); + } } private void updateNodeAgentMetrics() { @@ -135,7 +148,7 @@ public class NodeAdminImpl implements NodeAdmin { // Use filter with count instead of allMatch() because allMatch() will short circuit on first non-match boolean allNodeAgentsConverged = nodeAgentWithSchedulerByHostname.values().parallelStream() - .filter(nodeAgentScheduler -> !nodeAgentScheduler.setFrozen(wantFrozen, NODE_AGENT_FREEZE_TIMEOUT)) + .filter(nodeAgentScheduler -> !nodeAgentScheduler.setFrozen(wantFrozen, freezeTimeout)) .count() == 0; if (wantFrozen) { @@ -232,7 +245,7 @@ public class NodeAdminImpl implements NodeAdmin { @Override public boolean isDownloadingImage() { return nodeAgent.isDownloadingImage(); } @Override public int getAndResetNumberOfUnhandledExceptions() { return nodeAgent.getAndResetNumberOfUnhandledExceptions(); } - @Override public void scheduleTickWith(NodeAgentContext context) { nodeAgentScheduler.scheduleTickWith(context); } + @Override public void scheduleTickWith(NodeAgentContext context, Instant at) { nodeAgentScheduler.scheduleTickWith(context, at); } @Override public boolean setFrozen(boolean frozen, Duration timeout) { return nodeAgentScheduler.setFrozen(frozen, timeout); } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java index 54f357d5f29..237b7d0daf7 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent; import java.time.Clock; import java.time.Duration; +import java.time.Instant; import java.util.Objects; /** @@ -17,6 +18,7 @@ public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAg private NodeAgentContext currentContext; private NodeAgentContext nextContext; + private Instant nextContextAt; private boolean wantFrozen = false; private boolean isFrozen = true; private boolean pendingInterrupt = false; @@ -27,9 +29,10 @@ public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAg } @Override - public void scheduleTickWith(NodeAgentContext context) { + public void scheduleTickWith(NodeAgentContext context, Instant at) { synchronized (monitor) { nextContext = Objects.requireNonNull(context); + nextContextAt = Objects.requireNonNull(at); monitor.notifyAll(); // Notify of new context } } @@ -58,14 +61,17 @@ public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAg @Override public NodeAgentContext nextContext() throws InterruptedException { synchronized (monitor) { - while (setAndGetIsFrozen(wantFrozen) || nextContext == null) { + Duration untilNextContext = Duration.ZERO; + while (setAndGetIsFrozen(wantFrozen) || + nextContext == null || + (untilNextContext = Duration.between(Instant.now(), nextContextAt)).toMillis() > 0) { if (pendingInterrupt) { pendingInterrupt = false; throw new InterruptedException("interrupt() was called before next context was scheduled"); } try { - monitor.wait(); // Wait until scheduler provides a new context + monitor.wait(Math.max(untilNextContext.toMillis(), 0L)); // Wait until scheduler provides a new context } catch (InterruptedException ignored) { } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java index 540601ffa4f..a5daab8dcfd 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java @@ -2,14 +2,15 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent; import java.time.Duration; +import java.time.Instant; /** * @author freva */ public interface NodeAgentScheduler { - /** Schedule a tick for NodeAgent to run with the given NodeAgentContext */ - void scheduleTickWith(NodeAgentContext context); + /** Schedule a tick for NodeAgent to run with the given NodeAgentContext, at no earlier than given instant */ + void scheduleTickWith(NodeAgentContext context, Instant at); /** * Will eventually freeze/unfreeze the node agent diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java index 88cc833f1f8..e475e9a53c2 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java @@ -98,7 +98,7 @@ public class DockerTester implements AutoCloseable { Optional.empty(), Optional.empty(), Optional.empty()); NodeAgentContextFactory nodeAgentContextFactory = nodeSpec -> new NodeAgentContextImpl.Builder(nodeSpec).fileSystem(fileSystem).build(); - nodeAdmin = new NodeAdminImpl(nodeAgentFactory, nodeAgentContextFactory, Optional.empty(), mr, Clock.systemUTC()); + nodeAdmin = new NodeAdminImpl(nodeAgentFactory, nodeAgentContextFactory, Optional.empty(), mr, Clock.systemUTC(), Duration.ofMillis(10), Duration.ZERO); nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeRepository, orchestrator, nodeAdmin, HOST_HOSTNAME); diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java index 47e220a968b..f8e87ccef53 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java @@ -45,7 +45,7 @@ public class NodeAdminImplTest { private final ManualClock clock = new ManualClock(); private final NodeAdminImpl nodeAdmin = new NodeAdminImpl(nodeAgentWithSchedulerFactory, nodeAgentContextFactory, - Optional.empty(), new MetricReceiverWrapper(MetricReceiver.nullImplementation), clock); + Optional.empty(), new MetricReceiverWrapper(MetricReceiver.nullImplementation), clock, Duration.ZERO, Duration.ZERO); @Test public void nodeAgentsAreProperlyLifeCycleManaged() { diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java index f32e3d91e34..5aeccb4ab7d 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java @@ -5,6 +5,7 @@ import org.junit.Test; import java.time.Clock; import java.time.Duration; +import java.time.Instant; import java.util.Optional; import static org.junit.Assert.assertEquals; @@ -26,7 +27,7 @@ public class NodeAgentContextManagerTest { @Test(timeout = TIMEOUT) public void returns_immediately_if_next_context_is_ready() throws InterruptedException { NodeAgentContext context1 = generateContext(); - manager.scheduleTickWith(context1); + manager.scheduleTickWith(context1, clock.instant()); assertSame(initialContext, manager.currentContext()); assertSame(context1, manager.nextContext()); @@ -34,6 +35,19 @@ public class NodeAgentContextManagerTest { } @Test(timeout = TIMEOUT) + public void returns_no_earlier_than_at_given_time() throws InterruptedException { + NodeAgentContext context1 = generateContext(); + Instant returnAt = clock.instant().plusMillis(500); + manager.scheduleTickWith(context1, returnAt); + + assertSame(initialContext, manager.currentContext()); + assertSame(context1, manager.nextContext()); + assertSame(context1, manager.currentContext()); + // Is accurate to a millisecond + assertFalse(clock.instant().plusMillis(1).isBefore(returnAt)); + } + + @Test(timeout = TIMEOUT) public void blocks_in_nextContext_until_one_is_scheduled() throws InterruptedException { AsyncExecutor<NodeAgentContext> async = new AsyncExecutor<>(manager::nextContext); assertFalse(async.response.isPresent()); @@ -41,7 +55,7 @@ public class NodeAgentContextManagerTest { assertFalse(async.response.isPresent()); NodeAgentContext context1 = generateContext(); - manager.scheduleTickWith(context1); + manager.scheduleTickWith(context1, clock.instant()); async.awaitResult(); assertEquals(Optional.of(context1), async.response); @@ -68,7 +82,7 @@ public class NodeAgentContextManagerTest { // Generate new context and get it from the supplier, this completes the unfreeze NodeAgentContext context1 = generateContext(); - manager.scheduleTickWith(context1); + manager.scheduleTickWith(context1, clock.instant()); assertSame(context1, manager.nextContext()); assertTrue(manager.setFrozen(false, Duration.ZERO)); @@ -91,7 +105,7 @@ public class NodeAgentContextManagerTest { assertFalse(async.response.isPresent()); NodeAgentContext context1 = generateContext(); - manager.scheduleTickWith(context1); + manager.scheduleTickWith(context1, clock.instant()); assertSame(context1, manager.nextContext()); async.awaitResult(); |