aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InfraApplicationRedeployerTest.java
diff options
context:
space:
mode:
Diffstat (limited to 'node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InfraApplicationRedeployerTest.java')
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InfraApplicationRedeployerTest.java172
1 files changed, 172 insertions, 0 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InfraApplicationRedeployerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InfraApplicationRedeployerTest.java
new file mode 100644
index 00000000000..7a8129ad275
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InfraApplicationRedeployerTest.java
@@ -0,0 +1,172 @@
+package com.yahoo.vespa.hosted.provision.maintenance;
+
+import com.yahoo.concurrent.UncheckedTimeoutException;
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.Deployment;
+import com.yahoo.config.provision.Flavor;
+import com.yahoo.config.provision.InfraDeployer;
+import com.yahoo.config.provision.NodeResources;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.transaction.Mutex;
+import com.yahoo.vespa.applicationmodel.InfrastructureApplication;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.Node.State;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.node.IP;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Phaser;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Supplier;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+
+/**
+ * @author jonmv
+ */
+class InfraApplicationRedeployerTest {
+
+ private static final ApplicationId cfghost = InfrastructureApplication.CONFIG_SERVER_HOST.id();
+ private static final ApplicationId cfg = InfrastructureApplication.CONFIG_SERVER.id();
+ private static final ApplicationId tenanthost = InfrastructureApplication.TENANT_HOST.id();
+
+ @Test
+ void testMultiTriggering() throws InterruptedException {
+ TestLocks locks = new TestLocks();
+ List<Node> nodes = new CopyOnWriteArrayList<>();
+ TestInfraDeployer deployer = new TestInfraDeployer();
+ InfraApplicationRedeployer redeployer = new InfraApplicationRedeployer(deployer, locks::get, () -> NodeList.copyOf(nodes));
+ Phaser intro = new Phaser(2);
+ CountDownLatch intermezzo = new CountDownLatch(1), outro = new CountDownLatch(1);
+
+ // First run does nothing, as no nodes are ready after all, but several new runs are triggered as this ends.
+ locks.expect(tenanthost, () -> () -> { intro.arriveAndAwaitAdvance(); intro.arriveAndAwaitAdvance(); });
+ redeployer.readied(NodeType.host);
+ intro.arriveAndAwaitAdvance(); // Wait for redeployer to start, before setting up more state.
+ // Before re-triggered events from first tenanthost run, we also trigger for confighost, which should then run before those.
+ locks.expect(cfghost, () -> () -> { });
+ redeployer.readied(NodeType.confighost);
+ for (int i = 0; i < 10000; i++) redeployer.readied(NodeType.host);
+ nodes.add(node("host", NodeType.host, State.ready));
+ // Re-run for tenanthost clears host from ready, and next run does nothing.
+ deployer.expect(tenanthost, () -> {
+ nodes.clear();
+ return Optional.empty();
+ });
+ locks.expect(tenanthost, () -> intermezzo::countDown);
+ intro.arriveAndAwaitAdvance(); // Let redeployer continue.
+ intermezzo.await(10, TimeUnit.SECONDS); // Rendezvous with last, no-op tenanthost redeployment.
+ locks.verify();
+ deployer.verify();
+
+ // Confighost is triggered again with one ready host. Both applications deploy, and a new trigger redeploys neither.
+ locks.expect(cfghost, () -> () -> { });
+ locks.expect(cfg, () -> () -> { });
+ nodes.add(node("cfghost", NodeType.confighost, State.ready));
+ deployer.expect(cfghost, () -> {
+ nodes.clear();
+ return Optional.empty();
+ });
+ deployer.expect(cfg, () -> {
+ redeployer.readied(NodeType.confighost);
+ return Optional.empty();
+ });
+ locks.expect(cfghost, () -> outro::countDown);
+ redeployer.readied(NodeType.confighost);
+
+ outro.await(10, TimeUnit.SECONDS);
+ redeployer.close();
+ locks.verify();
+ deployer.verify();
+ }
+
+ @Test
+ void testRetries() throws InterruptedException {
+ TestLocks locks = new TestLocks();
+ List<Node> nodes = new CopyOnWriteArrayList<>();
+ TestInfraDeployer deployer = new TestInfraDeployer();
+ InfraApplicationRedeployer redeployer = new InfraApplicationRedeployer(deployer, locks::get, () -> NodeList.copyOf(nodes));
+
+ // Does nothing.
+ redeployer.readied(NodeType.tenant);
+
+ // Getting lock fails with runtime exception; no deployments, no retries.
+ locks.expect(tenanthost, () -> { throw new RuntimeException("Failed"); });
+ redeployer.readied(NodeType.host);
+
+ // Getting lock times out for configserver application; deployment of configserverapp is retried, but host is done.
+ CountDownLatch latch = new CountDownLatch(1);
+ locks.expect(cfghost, () -> () -> { });
+ locks.expect(cfg, () -> { throw new UncheckedTimeoutException("Timeout"); });
+ locks.expect(cfg, () -> latch::countDown);
+ nodes.add(node("cfghost", NodeType.confighost, State.ready));
+ deployer.expect(cfghost, () -> {
+ nodes.set(0, node("cfghost", NodeType.confighost, State.active));
+ return Optional.empty();
+ });
+ deployer.expect(cfg, Optional::empty);
+ redeployer.readied(NodeType.confighost);
+ latch.await(10, TimeUnit.SECONDS);
+ redeployer.close();
+ locks.verify();
+ deployer.verify();
+ }
+
+ private static Node node(String name, NodeType type, State state) {
+ return Node.create(name, name, new Flavor(NodeResources.unspecified()), state, type)
+ .ipConfig(IP.Config.of(List.of("1.2.3.4"), List.of("1.2.3.4")))
+ .build();
+ }
+
+ private static class Expectations<T, R> {
+
+ final Queue<T> expected = new ConcurrentLinkedQueue<>();
+ final Queue<Throwable> stacks = new ConcurrentLinkedQueue<>();
+ final Queue<Supplier<R>> reactions = new ConcurrentLinkedQueue<>();
+ final AtomicReference<Throwable> failure = new AtomicReference<>();
+
+ void expect(T id, Supplier<R> reaction) {
+ expected.add(id);
+ stacks.add(new AssertionError("Failed expectation of " + id));
+ reactions.add(reaction);
+ }
+
+ R get(T id) {
+ Throwable s = stacks.poll();
+ if (s == null) s = new AssertionError("Unexpected invocation with " + id);
+ try { assertEquals(expected.poll(), id); }
+ catch (Throwable t) {
+ StackTraceElement[] trace = t.getStackTrace();
+ t.setStackTrace(s.getStackTrace());
+ s.setStackTrace(trace);
+ t.addSuppressed(s);
+ if ( ! failure.compareAndSet(null, t)) failure.get().addSuppressed(t);
+ throw t;
+ }
+ return reactions.poll().get();
+ }
+
+ @SuppressWarnings("unchecked")
+ <E extends Throwable> void verify() throws E {
+ if (failure.get() != null) throw (E) failure.get();
+ assertEquals(List.of(), List.copyOf(expected));
+ }
+
+ }
+
+ private static class TestLocks extends Expectations<ApplicationId, Mutex> { }
+
+ private static class TestInfraDeployer extends Expectations<ApplicationId, Optional<Deployment>> implements InfraDeployer {
+ @Override public Optional<Deployment> getDeployment(ApplicationId application) { return get(application); }
+ @Override public void activateAllSupportedInfraApplications(boolean propagateException) { fail(); }
+ }
+
+}