summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorValerij Fredriksen <valerij92@gmail.com>2019-02-09 10:31:58 +0100
committerValerij Fredriksen <valerij92@gmail.com>2019-02-09 11:07:49 +0100
commit7bf67793bac909c047f994e922f19d647a464fec (patch)
tree1323115a21672e8efcb826b692b3c1b3c4402271
parentfb8a826ee839d5f24fc18e4b36206e8c160386a4 (diff)
Add delayed scheduling of NodeAgentContext
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java33
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java12
-rw-r--r--node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java5
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java2
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java2
-rw-r--r--node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java22
6 files changed, 55 insertions, 21 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
index 2303f78217c..288003ade3c 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java
@@ -38,6 +38,7 @@ import java.util.stream.Collectors;
public class NodeAdminImpl implements NodeAdmin {
private static final PrefixLogger logger = PrefixLogger.getNodeAdminLogger(NodeAdmin.class);
private static final Duration NODE_AGENT_FREEZE_TIMEOUT = Duration.ofSeconds(5);
+ private static final Duration NODE_AGENT_SPREAD = Duration.ofSeconds(3);
private final ScheduledExecutorService aclScheduler =
Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("aclscheduler"));
@@ -49,6 +50,8 @@ public class NodeAdminImpl implements NodeAdmin {
private final Optional<AclMaintainer> aclMaintainer;
private final Clock clock;
+ private final Duration freezeTimeout;
+ private final Duration spread;
private boolean previousWantFrozen;
private boolean isFrozen;
private Instant startOfFreezeConvergence;
@@ -64,19 +67,25 @@ public class NodeAdminImpl implements NodeAdmin {
MetricReceiverWrapper metricReceiver,
Clock clock) {
this((NodeAgentWithSchedulerFactory) nodeAgentContext -> create(clock, nodeAgentFactory, nodeAgentContext),
- nodeAgentContextFactory, aclMaintainer, metricReceiver, clock);
+ nodeAgentContextFactory, aclMaintainer, metricReceiver, clock, NODE_AGENT_FREEZE_TIMEOUT, NODE_AGENT_SPREAD);
+ }
+
+ public NodeAdminImpl(NodeAgentFactory nodeAgentFactory, NodeAgentContextFactory nodeAgentContextFactory,
+ Optional<AclMaintainer> aclMaintainer, MetricReceiverWrapper metricReceiver, Clock clock, Duration freezeTimeout, Duration spread) {
+ this((NodeAgentWithSchedulerFactory) nodeAgentContext -> create(clock, nodeAgentFactory, nodeAgentContext),
+ nodeAgentContextFactory, aclMaintainer, metricReceiver, clock, freezeTimeout, spread);
}
NodeAdminImpl(NodeAgentWithSchedulerFactory nodeAgentWithSchedulerFactory,
- NodeAgentContextFactory nodeAgentContextFactory,
- Optional<AclMaintainer> aclMaintainer,
- MetricReceiverWrapper metricReceiver,
- Clock clock) {
+ NodeAgentContextFactory nodeAgentContextFactory, Optional<AclMaintainer> aclMaintainer, MetricReceiverWrapper metricReceiver,
+ Clock clock, Duration freezeTimeout, Duration spread) {
this.nodeAgentWithSchedulerFactory = nodeAgentWithSchedulerFactory;
this.nodeAgentContextFactory = nodeAgentContextFactory;
this.aclMaintainer = aclMaintainer;
this.clock = clock;
+ this.freezeTimeout = freezeTimeout;
+ this.spread = spread;
this.previousWantFrozen = true;
this.isFrozen = true;
this.startOfFreezeConvergence = clock.instant();
@@ -102,10 +111,14 @@ public class NodeAdminImpl implements NodeAdmin {
nodeAgentWithSchedulerByHostname.put(hostname, naws);
});
+ Duration timeBetweenNodeAgents = spread.dividedBy(Math.max(nodeAgentContextsByHostname.size() - 1, 1));
+ Instant nextAgentStart = clock.instant();
// At this point, nodeAgentContextsByHostname and nodeAgentWithSchedulerByHostname should have the same keys
- nodeAgentContextsByHostname.forEach((hostname, context) ->
- nodeAgentWithSchedulerByHostname.get(hostname).scheduleTickWith(context)
- );
+ for (String hostname : nodeAgentContextsByHostname.keySet()) {
+ NodeAgentContext context = nodeAgentContextsByHostname.get(hostname);
+ nodeAgentWithSchedulerByHostname.get(hostname).scheduleTickWith(context, nextAgentStart);
+ nextAgentStart = nextAgentStart.plus(timeBetweenNodeAgents);
+ }
}
private void updateNodeAgentMetrics() {
@@ -135,7 +148,7 @@ public class NodeAdminImpl implements NodeAdmin {
// Use filter with count instead of allMatch() because allMatch() will short circuit on first non-match
boolean allNodeAgentsConverged = nodeAgentWithSchedulerByHostname.values().parallelStream()
- .filter(nodeAgentScheduler -> !nodeAgentScheduler.setFrozen(wantFrozen, NODE_AGENT_FREEZE_TIMEOUT))
+ .filter(nodeAgentScheduler -> !nodeAgentScheduler.setFrozen(wantFrozen, freezeTimeout))
.count() == 0;
if (wantFrozen) {
@@ -232,7 +245,7 @@ public class NodeAdminImpl implements NodeAdmin {
@Override public boolean isDownloadingImage() { return nodeAgent.isDownloadingImage(); }
@Override public int getAndResetNumberOfUnhandledExceptions() { return nodeAgent.getAndResetNumberOfUnhandledExceptions(); }
- @Override public void scheduleTickWith(NodeAgentContext context) { nodeAgentScheduler.scheduleTickWith(context); }
+ @Override public void scheduleTickWith(NodeAgentContext context, Instant at) { nodeAgentScheduler.scheduleTickWith(context, at); }
@Override public boolean setFrozen(boolean frozen, Duration timeout) { return nodeAgentScheduler.setFrozen(frozen, timeout); }
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java
index 54f357d5f29..237b7d0daf7 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.node.admin.nodeagent;
import java.time.Clock;
import java.time.Duration;
+import java.time.Instant;
import java.util.Objects;
/**
@@ -17,6 +18,7 @@ public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAg
private NodeAgentContext currentContext;
private NodeAgentContext nextContext;
+ private Instant nextContextAt;
private boolean wantFrozen = false;
private boolean isFrozen = true;
private boolean pendingInterrupt = false;
@@ -27,9 +29,10 @@ public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAg
}
@Override
- public void scheduleTickWith(NodeAgentContext context) {
+ public void scheduleTickWith(NodeAgentContext context, Instant at) {
synchronized (monitor) {
nextContext = Objects.requireNonNull(context);
+ nextContextAt = Objects.requireNonNull(at);
monitor.notifyAll(); // Notify of new context
}
}
@@ -58,14 +61,17 @@ public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAg
@Override
public NodeAgentContext nextContext() throws InterruptedException {
synchronized (monitor) {
- while (setAndGetIsFrozen(wantFrozen) || nextContext == null) {
+ Duration untilNextContext = Duration.ZERO;
+ while (setAndGetIsFrozen(wantFrozen) ||
+ nextContext == null ||
+ (untilNextContext = Duration.between(Instant.now(), nextContextAt)).toMillis() > 0) {
if (pendingInterrupt) {
pendingInterrupt = false;
throw new InterruptedException("interrupt() was called before next context was scheduled");
}
try {
- monitor.wait(); // Wait until scheduler provides a new context
+ monitor.wait(Math.max(untilNextContext.toMillis(), 0L)); // Wait until scheduler provides a new context
} catch (InterruptedException ignored) { }
}
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java
index 540601ffa4f..a5daab8dcfd 100644
--- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java
+++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java
@@ -2,14 +2,15 @@
package com.yahoo.vespa.hosted.node.admin.nodeagent;
import java.time.Duration;
+import java.time.Instant;
/**
* @author freva
*/
public interface NodeAgentScheduler {
- /** Schedule a tick for NodeAgent to run with the given NodeAgentContext */
- void scheduleTickWith(NodeAgentContext context);
+ /** Schedule a tick for NodeAgent to run with the given NodeAgentContext, at no earlier than given instant */
+ void scheduleTickWith(NodeAgentContext context, Instant at);
/**
* Will eventually freeze/unfreeze the node agent
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
index 88cc833f1f8..e475e9a53c2 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerTester.java
@@ -98,7 +98,7 @@ public class DockerTester implements AutoCloseable {
Optional.empty(), Optional.empty(), Optional.empty());
NodeAgentContextFactory nodeAgentContextFactory = nodeSpec ->
new NodeAgentContextImpl.Builder(nodeSpec).fileSystem(fileSystem).build();
- nodeAdmin = new NodeAdminImpl(nodeAgentFactory, nodeAgentContextFactory, Optional.empty(), mr, Clock.systemUTC());
+ nodeAdmin = new NodeAdminImpl(nodeAgentFactory, nodeAgentContextFactory, Optional.empty(), mr, Clock.systemUTC(), Duration.ofMillis(10), Duration.ZERO);
nodeAdminStateUpdater = new NodeAdminStateUpdater(nodeRepository, orchestrator,
nodeAdmin, HOST_HOSTNAME);
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
index 47e220a968b..f8e87ccef53 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java
@@ -45,7 +45,7 @@ public class NodeAdminImplTest {
private final ManualClock clock = new ManualClock();
private final NodeAdminImpl nodeAdmin = new NodeAdminImpl(nodeAgentWithSchedulerFactory, nodeAgentContextFactory,
- Optional.empty(), new MetricReceiverWrapper(MetricReceiver.nullImplementation), clock);
+ Optional.empty(), new MetricReceiverWrapper(MetricReceiver.nullImplementation), clock, Duration.ZERO, Duration.ZERO);
@Test
public void nodeAgentsAreProperlyLifeCycleManaged() {
diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java
index f32e3d91e34..5aeccb4ab7d 100644
--- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java
+++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManagerTest.java
@@ -5,6 +5,7 @@ import org.junit.Test;
import java.time.Clock;
import java.time.Duration;
+import java.time.Instant;
import java.util.Optional;
import static org.junit.Assert.assertEquals;
@@ -26,7 +27,7 @@ public class NodeAgentContextManagerTest {
@Test(timeout = TIMEOUT)
public void returns_immediately_if_next_context_is_ready() throws InterruptedException {
NodeAgentContext context1 = generateContext();
- manager.scheduleTickWith(context1);
+ manager.scheduleTickWith(context1, clock.instant());
assertSame(initialContext, manager.currentContext());
assertSame(context1, manager.nextContext());
@@ -34,6 +35,19 @@ public class NodeAgentContextManagerTest {
}
@Test(timeout = TIMEOUT)
+ public void returns_no_earlier_than_at_given_time() throws InterruptedException {
+ NodeAgentContext context1 = generateContext();
+ Instant returnAt = clock.instant().plusMillis(500);
+ manager.scheduleTickWith(context1, returnAt);
+
+ assertSame(initialContext, manager.currentContext());
+ assertSame(context1, manager.nextContext());
+ assertSame(context1, manager.currentContext());
+ // Is accurate to a millisecond
+ assertFalse(clock.instant().plusMillis(1).isBefore(returnAt));
+ }
+
+ @Test(timeout = TIMEOUT)
public void blocks_in_nextContext_until_one_is_scheduled() throws InterruptedException {
AsyncExecutor<NodeAgentContext> async = new AsyncExecutor<>(manager::nextContext);
assertFalse(async.response.isPresent());
@@ -41,7 +55,7 @@ public class NodeAgentContextManagerTest {
assertFalse(async.response.isPresent());
NodeAgentContext context1 = generateContext();
- manager.scheduleTickWith(context1);
+ manager.scheduleTickWith(context1, clock.instant());
async.awaitResult();
assertEquals(Optional.of(context1), async.response);
@@ -68,7 +82,7 @@ public class NodeAgentContextManagerTest {
// Generate new context and get it from the supplier, this completes the unfreeze
NodeAgentContext context1 = generateContext();
- manager.scheduleTickWith(context1);
+ manager.scheduleTickWith(context1, clock.instant());
assertSame(context1, manager.nextContext());
assertTrue(manager.setFrozen(false, Duration.ZERO));
@@ -91,7 +105,7 @@ public class NodeAgentContextManagerTest {
assertFalse(async.response.isPresent());
NodeAgentContext context1 = generateContext();
- manager.scheduleTickWith(context1);
+ manager.scheduleTickWith(context1, clock.instant());
assertSame(context1, manager.nextContext());
async.awaitResult();