diff options
author | Valerij Fredriksen <valerijf@oath.com> | 2019-01-09 16:48:20 +0100 |
---|---|---|
committer | Valerij Fredriksen <valerijf@oath.com> | 2019-01-09 23:40:53 +0100 |
commit | ecae1271b5d13f53f3849561f1f8df6b585e084a (patch) | |
tree | 906f041dd7fcc8762b84483a678649f74b7b789d /node-admin | |
parent | 006571441a46020ecdef746cb12b76a056cd0948 (diff) |
Add timeout to setFrozen()
Diffstat (limited to 'node-admin')
4 files changed, 48 insertions, 18 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java index 801376c23c7..2303f78217c 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImpl.java @@ -37,6 +37,8 @@ import java.util.stream.Collectors; */ public class NodeAdminImpl implements NodeAdmin { private static final PrefixLogger logger = PrefixLogger.getNodeAdminLogger(NodeAdmin.class); + private static final Duration NODE_AGENT_FREEZE_TIMEOUT = Duration.ofSeconds(5); + private final ScheduledExecutorService aclScheduler = Executors.newScheduledThreadPool(1, ThreadFactoryFactory.getDaemonThreadFactory("aclscheduler")); private final ScheduledExecutorService metricsScheduler = @@ -61,7 +63,7 @@ public class NodeAdminImpl implements NodeAdmin { Optional<AclMaintainer> aclMaintainer, MetricReceiverWrapper metricReceiver, Clock clock) { - this((NodeAgentWithSchedulerFactory) nodeAgentContext -> create(nodeAgentFactory, nodeAgentContext), + this((NodeAgentWithSchedulerFactory) nodeAgentContext -> create(clock, nodeAgentFactory, nodeAgentContext), nodeAgentContextFactory, aclMaintainer, metricReceiver, clock); } @@ -132,8 +134,8 @@ public class NodeAdminImpl implements NodeAdmin { } // Use filter with count instead of allMatch() because allMatch() will short circuit on first non-match - boolean allNodeAgentsConverged = nodeAgentWithSchedulerByHostname.values().stream() - .filter(nodeAgentScheduler -> !nodeAgentScheduler.setFrozen(wantFrozen)) + boolean allNodeAgentsConverged = nodeAgentWithSchedulerByHostname.values().parallelStream() + .filter(nodeAgentScheduler -> !nodeAgentScheduler.setFrozen(wantFrozen, NODE_AGENT_FREEZE_TIMEOUT)) .count() == 0; if (wantFrozen) { @@ -231,7 +233,7 @@ public class NodeAdminImpl implements NodeAdmin { @Override public int getAndResetNumberOfUnhandledExceptions() { return nodeAgent.getAndResetNumberOfUnhandledExceptions(); } @Override public void scheduleTickWith(NodeAgentContext context) { nodeAgentScheduler.scheduleTickWith(context); } - @Override public boolean setFrozen(boolean frozen) { return nodeAgentScheduler.setFrozen(frozen); } + @Override public boolean setFrozen(boolean frozen, Duration timeout) { return nodeAgentScheduler.setFrozen(frozen, timeout); } } @FunctionalInterface @@ -239,8 +241,8 @@ public class NodeAdminImpl implements NodeAdmin { NodeAgentWithScheduler create(NodeAgentContext context); } - private static NodeAgentWithScheduler create(NodeAgentFactory nodeAgentFactory, NodeAgentContext context) { - NodeAgentContextManager contextManager = new NodeAgentContextManager(context); + private static NodeAgentWithScheduler create(Clock clock, NodeAgentFactory nodeAgentFactory, NodeAgentContext context) { + NodeAgentContextManager contextManager = new NodeAgentContextManager(clock, context); NodeAgent nodeAgent = nodeAgentFactory.create(contextManager); return new NodeAgentWithScheduler(nodeAgent, contextManager); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java index 245652ed927..54f357d5f29 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentContextManager.java @@ -1,58 +1,73 @@ // Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.nodeagent; +import java.time.Clock; +import java.time.Duration; import java.util.Objects; /** + * This class should be used by exactly 2 thread, 1 for each interface it implements. + * * @author freva */ public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAgentScheduler { + private final Object monitor = new Object(); + private final Clock clock; + private NodeAgentContext currentContext; private NodeAgentContext nextContext; private boolean wantFrozen = false; private boolean isFrozen = true; private boolean pendingInterrupt = false; - public NodeAgentContextManager(NodeAgentContext context) { - currentContext = context; + public NodeAgentContextManager(Clock clock, NodeAgentContext context) { + this.clock = clock; + this.currentContext = context; } @Override public void scheduleTickWith(NodeAgentContext context) { synchronized (monitor) { nextContext = Objects.requireNonNull(context); - monitor.notifyAll(); + monitor.notifyAll(); // Notify of new context } } @Override - public boolean setFrozen(boolean frozen) { + public boolean setFrozen(boolean frozen, Duration timeout) { synchronized (monitor) { if (wantFrozen != frozen) { wantFrozen = frozen; - monitor.notifyAll(); + monitor.notifyAll(); // Notify the supplier of the wantFrozen change + } + + boolean successful; + long remainder; + long end = clock.instant().plus(timeout).toEpochMilli(); + while (!(successful = isFrozen == frozen) && (remainder = end - clock.millis()) > 0) { + try { + monitor.wait(remainder); // Wait with timeout until the supplier is has reached wanted frozen state + } catch (InterruptedException ignored) { } } - return isFrozen == frozen; + return successful; } } @Override public NodeAgentContext nextContext() throws InterruptedException { synchronized (monitor) { - isFrozen = true; - while (nextContext == null) { + while (setAndGetIsFrozen(wantFrozen) || nextContext == null) { if (pendingInterrupt) { pendingInterrupt = false; throw new InterruptedException("interrupt() was called before next context was scheduled"); } try { - monitor.wait(); + monitor.wait(); // Wait until scheduler provides a new context } catch (InterruptedException ignored) { } } - isFrozen = false; currentContext = nextContext; nextContext = null; @@ -74,4 +89,14 @@ public class NodeAgentContextManager implements NodeAgentContextSupplier, NodeAg monitor.notifyAll(); } } + + private boolean setAndGetIsFrozen(boolean isFrozen) { + synchronized (monitor) { + if (this.isFrozen != isFrozen) { + this.isFrozen = isFrozen; + monitor.notifyAll(); // Notify the scheduler of the isFrozen change + } + return this.isFrozen; + } + } }
\ No newline at end of file diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java index 3806c3d6bf3..540601ffa4f 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/nodeagent/NodeAgentScheduler.java @@ -1,6 +1,8 @@ // Copyright 2019 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.node.admin.nodeagent; +import java.time.Duration; + /** * @author freva */ @@ -12,7 +14,8 @@ public interface NodeAgentScheduler { /** * Will eventually freeze/unfreeze the node agent * @param frozen whether node agent should be frozen + * @param timeout maximum duration this method should block while waiting for NodeAgent to reach target state * @return True if node agent has converged to the desired state */ - boolean setFrozen(boolean frozen); + boolean setFrozen(boolean frozen, Duration timeout); } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java index 766638b94cb..47e220a968b 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/nodeadmin/NodeAdminImplTest.java @@ -156,7 +156,7 @@ public class NodeAdminImplTest { private void mockNodeAgentSetFrozenResponse(List<NodeAgentWithScheduler> nodeAgents, boolean... responses) { for (int i = 0; i < nodeAgents.size(); i++) { NodeAgentWithScheduler nodeAgent = nodeAgents.get(i); - when(nodeAgent.setFrozen(anyBoolean())).thenReturn(responses[i]); + when(nodeAgent.setFrozen(anyBoolean(), any())).thenReturn(responses[i]); } } |