diff options
author | Harald Musum <musum@yahooinc.com> | 2023-08-30 12:24:07 +0200 |
---|---|---|
committer | Harald Musum <musum@yahooinc.com> | 2023-08-30 12:24:07 +0200 |
commit | 5445d9f3b1b91b073dac2d609037b534e526c477 (patch) | |
tree | c187ff43b0ed1da976a1960ac5a93da8e4bea674 | |
parent | 306dce28ed3cfa0d2f4679fd2519db3d1ac780c7 (diff) |
Use deploy time (prepare time) when considering applications for maintenance deployment
10 files changed, 130 insertions, 69 deletions
diff --git a/config-provisioning/src/main/java/com/yahoo/config/provision/Deployer.java b/config-provisioning/src/main/java/com/yahoo/config/provision/Deployer.java index f184519e928..e9ceb34570e 100644 --- a/config-provisioning/src/main/java/com/yahoo/config/provision/Deployer.java +++ b/config-provisioning/src/main/java/com/yahoo/config/provision/Deployer.java @@ -57,6 +57,9 @@ public interface Deployer { /** Returns the time the active session was activated, or empty if there is no active session */ Optional<Instant> activationTime(ApplicationId application); + /** Returns the time of last deployed session for this application or empty if there are no deployments */ + Optional<Instant> deployTime(ApplicationId application); + /** Whether the deployer is bootstrapping, some users of the deployer will want to hold off with deployments in that case. */ default boolean bootstrapping() { return false; } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java index 138d963b250..acc195729e0 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java @@ -43,6 +43,7 @@ import com.yahoo.transaction.Transaction; import com.yahoo.vespa.applicationmodel.InfrastructureApplication; import com.yahoo.vespa.config.server.application.Application; import com.yahoo.vespa.config.server.application.ApplicationCuratorDatabase; +import com.yahoo.vespa.config.server.application.ApplicationData; import com.yahoo.vespa.config.server.application.ApplicationReindexing; import com.yahoo.vespa.config.server.application.ApplicationVersions; import com.yahoo.vespa.config.server.application.ClusterReindexing; @@ -445,11 +446,28 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye public Optional<Instant> activationTime(ApplicationId application) { Tenant tenant = tenantRepository.getTenant(application.tenant()); if (tenant == null) return Optional.empty(); + Optional<Instant> activatedTime = getActiveSession(tenant, application).map(Session::getActivatedTime); log.log(Level.FINEST, application + " last activated " + activatedTime.orElse(Instant.EPOCH)); return activatedTime; } + @Override + public Optional<Instant> deployTime(ApplicationId application) { + Tenant tenant = tenantRepository.getTenant(application.tenant()); + if (tenant == null) return Optional.empty(); + + // TODO: Fallback to empty instead if no deploy time (in Vespa 9) + Optional<Long> lastDeployedSession = tenant.getApplicationRepo().applicationData(application) + .flatMap(ApplicationData::lastDeployedSession); + if (lastDeployedSession.isEmpty()) return activationTime(application); + + Instant createTime = getRemoteSession(tenant, lastDeployedSession.get()).getCreateTime(); + log.log(Level.FINEST, application + " last deployed " + createTime); + + return Optional.of(createTime); + } + public ApplicationId activate(Tenant tenant, long sessionId, TimeoutBudget timeoutBudget, diff --git a/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java b/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java index c355be5090a..2a79e2c03aa 100644 --- a/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java +++ b/configserver/src/test/java/com/yahoo/vespa/config/server/ApplicationRepositoryTest.java @@ -97,7 +97,6 @@ public class ApplicationRepositoryTest { private ApplicationRepository applicationRepository; private TenantRepository tenantRepository; - private MockProvisioner provisioner; private OrchestratorMock orchestrator; private TimeoutBudget timeoutBudget; private Curator curator; @@ -123,7 +122,7 @@ public class ApplicationRepositoryTest { .build(); flagSource = new InMemoryFlagSource(); fileDirectory = new FileDirectory(configserverConfig); - provisioner = new MockProvisioner(); + MockProvisioner provisioner = new MockProvisioner(); tenantRepository = new TestTenantRepository.Builder() .withClock(clock) .withConfigserverConfig(configserverConfig) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java index 28679b504aa..ba71bbf0d42 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainer.java @@ -16,7 +16,8 @@ import java.time.Duration; import java.time.Instant; import java.util.Map; import java.util.function.Function; -import java.util.stream.Collectors; + +import static java.util.stream.Collectors.toMap; /** * The application maintainer regularly redeploys all applications to make sure the node repo and application @@ -39,7 +40,7 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { @Override protected boolean canDeployNow(ApplicationId application) { - return deployer().activationTime(application) + return deployer().deployTime(application) // Don't deploy if a regular deploy just happened .map(lastDeployTime -> lastDeployTime.isBefore(nodeRepository().clock().instant().minus(minTimeBetweenRedeployments))) // We only know last deploy time for applications that were deployed on this config server, @@ -57,13 +58,17 @@ public class PeriodicApplicationMaintainer extends ApplicationMaintainer { .map(node -> node.allocation().get().owner()) .distinct() .filter(this::canDeployNow) - .collect(Collectors.toMap(Function.identity(), this::activationTime)); + .collect(toMap(Function.identity(), this::deployTime)); return deploymentTimes.entrySet().stream() .sorted(Map.Entry.comparingByValue()) .map(Map.Entry::getKey) .filter(this::shouldMaintain) - .collect(Collectors.toMap(applicationId -> applicationId, applicationId -> "current deployment being too old")); + .collect(toMap(applicationId -> applicationId, applicationId -> "current deployment being too old")); + } + + private Instant deployTime(ApplicationId applicationId) { + return deployer().deployTime(applicationId).orElse(Instant.EPOCH); } private boolean shouldMaintain(ApplicationId id) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java index 3ed01e00ee6..4b4e9121be4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDeployer.java @@ -40,10 +40,11 @@ public class MockDeployer implements Deployer { // For mock deploy anything, changing wantToRetire to retired only private final NodeRepository nodeRepository; - /** The number of redeployments done to this, which is also the config generation */ - public int redeployments = 0; + /** The number of activations done to this, which is also the config generation */ + public int activations = 0; - private final Map<ApplicationId, Instant> lastDeployTimes = new HashMap<>(); + private final Map<ApplicationId, Instant> lastPrepareTimes = new HashMap<>(); + private final Map<ApplicationId, Instant> lastActivationTimes = new HashMap<>(); private final Clock clock; private final ReentrantLock lock = new ReentrantLock(); @@ -121,7 +122,12 @@ public class MockDeployer implements Deployer { @Override public Optional<Instant> activationTime(ApplicationId application) { - return Optional.ofNullable(lastDeployTimes.get(application)); + return Optional.ofNullable(lastActivationTimes.get(application)); + } + + @Override + public Optional<Instant> deployTime(ApplicationId application) { + return Optional.ofNullable(lastPrepareTimes.get(application)); } @Override @@ -136,7 +142,8 @@ public class MockDeployer implements Deployer { new MockDeployment(provisioner, new ApplicationContext(applicationId, List.of())).activate(); applications.remove(applicationId); - lastDeployTimes.remove(applicationId); + lastPrepareTimes.remove(applicationId); + lastActivationTimes.remove(applicationId); } public class MockDeployment implements Deployment { @@ -155,6 +162,7 @@ public class MockDeployer implements Deployer { @Override public void prepare() { preparedHosts = application.prepare(provisioner); + lastPrepareTimes.put(application.id, clock.instant()); } @Override @@ -164,15 +172,15 @@ public class MockDeployer implements Deployer { if (failActivate) throw new IllegalStateException("failActivate is true"); - redeployments++; + activations++; try (var lock = provisioner.lock(application.id)) { try (NestedTransaction t = new NestedTransaction()) { - provisioner.activate(preparedHosts, new ActivationContext(redeployments), new ApplicationTransaction(lock, t)); + provisioner.activate(preparedHosts, new ActivationContext(activations), new ApplicationTransaction(lock, t)); t.commit(); - lastDeployTimes.put(application.id, clock.instant()); + lastActivationTimes.put(application.id, clock.instant()); } } - return redeployments; + return activations; } @Override @@ -191,18 +199,20 @@ public class MockDeployer implements Deployer { } @Override - public void prepare() { } + public void prepare() { + lastPrepareTimes.put(applicationId, clock.instant()); + } @Override public long activate() { - lastDeployTimes.put(applicationId, clock.instant()); + lastActivationTimes.put(applicationId, clock.instant()); for (Node node : nodeRepository.nodes().list().owner(applicationId).state(Node.State.active).retiring()) { try (NodeMutex lock = nodeRepository.nodes().lockAndGetRequired(node)) { nodeRepository.nodes().write(lock.node().retire(nodeRepository.clock().instant()), lock); } } - return redeployments++; + return activations++; } @Override diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainerTest.java index e1e2ff3db15..d1eaac8258e 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainerTest.java @@ -43,7 +43,7 @@ public class ExpeditedChangeApplicationMaintainerTest { // Create applications fixture.activate(); - assertEquals("Initial applications are deployed", 3, fixture.deployer.redeployments); + assertEquals("Initial applications are deployed", 3, fixture.deployer.activations); ExpeditedChangeApplicationMaintainer maintainer = new ExpeditedChangeApplicationMaintainer(fixture.deployer, new TestMetric(), nodeRepository, @@ -51,34 +51,34 @@ public class ExpeditedChangeApplicationMaintainerTest { clock.advance(Duration.ofMinutes(2)); maintainer.maintain(); - assertEquals("No changes -> no redeployments", 3, fixture.deployer.redeployments); + assertEquals("No changes -> no redeployments", 3, fixture.deployer.activations); nodeRepository.nodes().fail(nodeRepository.nodes().list().owner(fixture.app1).asList().get(3).hostname(), Agent.system, "Failing to unit test"); clock.advance(Duration.ofMinutes(2)); maintainer.maintain(); - assertEquals("System change -> no redeployments", 3, fixture.deployer.redeployments); + assertEquals("System change -> no redeployments", 3, fixture.deployer.activations); clock.advance(Duration.ofSeconds(1)); nodeRepository.nodes().fail(nodeRepository.nodes().list().owner(fixture.app2).asList().get(4).hostname(), Agent.operator, "Manual node failing"); clock.advance(Duration.ofMinutes(2)); maintainer.maintain(); - assertEquals("Operator change -> redeployment", 4, fixture.deployer.redeployments); + assertEquals("Operator change -> redeployment", 4, fixture.deployer.activations); clock.advance(Duration.ofSeconds(1)); nodeRepository.nodes().fail(nodeRepository.nodes().list().owner(fixture.app3).asList().get(1).hostname(), Agent.operator, "Manual node failing"); clock.advance(Duration.ofMinutes(2)); maintainer.maintain(); - assertEquals("Operator change -> redeployment", 5, fixture.deployer.redeployments); + assertEquals("Operator change -> redeployment", 5, fixture.deployer.activations); clock.advance(Duration.ofSeconds(1)); fixture.tester.makeReadyNodes(1, fixture.nodeResources, NodeType.proxy); clock.advance(Duration.ofMinutes(2)); maintainer.maintain(); - assertEquals("Ready proxy node -> redeployment", 6, fixture.deployer.redeployments); + assertEquals("Ready proxy node -> redeployment", 6, fixture.deployer.activations); clock.advance(Duration.ofMinutes(2)); maintainer.maintain(); - assertEquals("No further operator changes -> no (new) redeployments", 6, fixture.deployer.redeployments); + assertEquals("No further operator changes -> no (new) redeployments", 6, fixture.deployer.activations); } private static class Fixture { diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index c63be6d5dc5..12f10e434e6 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -211,7 +211,7 @@ public class NodeFailerTest { tester.runMaintainers(); tester.clock.advance(Duration.ofMinutes(5)); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(0, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertFalse(tester.nodeRepository.nodes().node(downHost1).get().isDown()); @@ -224,7 +224,7 @@ public class NodeFailerTest { for (int minutes = 0; minutes < 45; minutes +=5 ) { tester.runMaintainers(); tester.clock.advance(Duration.ofMinutes(5)); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(0, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertTrue(tester.nodeRepository.nodes().node(downHost1).get().isDown()); @@ -237,7 +237,7 @@ public class NodeFailerTest { tester.runMaintainers(); assertFalse(tester.nodeRepository.nodes().node(downHost1).get().isDown()); assertTrue(tester.nodeRepository.nodes().node(downHost1).get().isUp()); - assertEquals(1, tester.deployer.redeployments); + assertEquals(1, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(1, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertEquals(downHost2, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).asList().get(0).hostname()); @@ -253,7 +253,7 @@ public class NodeFailerTest { // the host is still down and fails tester.clock.advance(Duration.ofMinutes(5)); tester.runMaintainers(); - assertEquals(2, tester.deployer.redeployments); + assertEquals(2, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(2, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); @@ -264,7 +264,7 @@ public class NodeFailerTest { for (int minutes = 0; minutes < 75; minutes +=5 ) { tester.runMaintainers(); tester.clock.advance(Duration.ofMinutes(5)); - assertEquals(2, tester.deployer.redeployments); + assertEquals(2, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(2, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); } @@ -274,7 +274,7 @@ public class NodeFailerTest { tester.clock.advance(Duration.ofDays(1)); tester.runMaintainers(); // The node is now failed - assertEquals(3, tester.deployer.redeployments); + assertEquals(3, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(3, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertTrue("The index of the last failed node is not reused", @@ -330,7 +330,7 @@ public class NodeFailerTest { for (int minutes = 0; minutes < 45; minutes +=5 ) { tester.runMaintainers(); tester.clock.advance(Duration.ofMinutes(5)); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(3, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(0, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); } @@ -338,7 +338,7 @@ public class NodeFailerTest { // downHost should now be failed and replaced tester.clock.advance(Duration.ofDays(1)); tester.runMaintainers(); - assertEquals(1, tester.deployer.redeployments); + assertEquals(1, tester.deployer.activations); assertEquals(1, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertEquals(3, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(downHost, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).asList().get(0).hostname()); @@ -364,7 +364,7 @@ public class NodeFailerTest { // Node is failed and replaced tester.runMaintainers(); - assertEquals(1, tester.deployer.redeployments); + assertEquals(1, tester.deployer.activations); NodeList failedOrActive = tester.nodeRepository.nodes().list(Node.State.active, Node.State.failed); assertEquals(4, failedOrActive.state(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(Set.of(downNode.hostname()), failedOrActive.state(Node.State.failed).nodeType(NodeType.tenant).hostnames()); @@ -392,7 +392,7 @@ public class NodeFailerTest { for (int minutes = 0; minutes < 45; minutes += 5 ) { tester.runMaintainers(); tester.clock.advance(Duration.ofMinutes(5)); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(7, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.host).size()); } @@ -400,7 +400,7 @@ public class NodeFailerTest { tester.clock.advance(Duration.ofMinutes(30)); tester.runMaintainers(); - assertEquals(2, tester.deployer.redeployments); + assertEquals(2, tester.deployer.activations); assertEquals(2, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(7, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.host).size()); @@ -409,7 +409,7 @@ public class NodeFailerTest { // The failing of the host is deferred to the next maintain tester.runMaintainers(); - assertEquals(2 + 1, tester.deployer.redeployments); + assertEquals(2 + 1, tester.deployer.activations); assertEquals(2, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(6, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.host).size()); @@ -430,7 +430,7 @@ public class NodeFailerTest { tester.clock.advance(Duration.ofMinutes(30)); tester.runMaintainers(); - assertEquals(3 + 1, tester.deployer.redeployments); + assertEquals(3 + 1, tester.deployer.activations); assertEquals(3, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(6, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.host).size()); @@ -444,7 +444,7 @@ public class NodeFailerTest { tester.runMaintainers(); tester.runMaintainers(); // The host is failed in the 2. maintain() - assertEquals(5 + 2, tester.deployer.redeployments); + assertEquals(5 + 2, tester.deployer.activations); assertEquals(5, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(5, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.host).size()); @@ -458,7 +458,7 @@ public class NodeFailerTest { tester.clock.advance(Duration.ofDays(1)); tester.runMaintainers(); - assertEquals(6 + 2, tester.deployer.redeployments); + assertEquals(6 + 2, tester.deployer.activations); assertEquals(6, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(NodeType.tenant).size()); assertEquals(8, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.tenant).size()); assertEquals(5, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.host).size()); @@ -493,7 +493,7 @@ public class NodeFailerTest { for (int minutes = 0; minutes < 45; minutes +=5 ) { tester.runMaintainers(); tester.clock.advance(Duration.ofMinutes(5)); - assertEquals( 0, tester.deployer.redeployments); + assertEquals( 0, tester.deployer.activations); assertEquals(count, tester.nodeRepository.nodes().list(Node.State.active).nodeType(nodeType).size()); } @@ -501,7 +501,7 @@ public class NodeFailerTest { tester.runMaintainers(); // one down host should now be failed, but not two as we are only allowed to fail one proxy - assertEquals(expectedFailCount, tester.deployer.redeployments); + assertEquals(expectedFailCount, tester.deployer.activations); assertEquals(count - expectedFailCount, tester.nodeRepository.nodes().list(Node.State.active).nodeType(nodeType).size()); assertEquals(expectedFailCount, tester.nodeRepository.nodes().list(Node.State.failed).nodeType(nodeType).size()); tester.nodeRepository.nodes().list(Node.State.failed).nodeType(nodeType) diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java index 900d77fcb26..9fea27a2cb0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java @@ -124,40 +124,61 @@ public class PeriodicApplicationMaintainerTest { public void application_deploy_inhibits_redeploy_for_a_while() { fixture.activate(); - assertEquals("No deployment expected", 2, fixture.deployer.redeployments); + assertEquals("No deployment expected", 2, fixture.deployer.activations); // Holds off on deployments a while after starting fixture.runApplicationMaintainer(); - assertEquals("No deployment expected", 2, fixture.deployer.redeployments); + assertEquals("No deployment expected", 2, fixture.deployer.activations); // Exhaust initial wait period clock.advance(Duration.ofMinutes(30).plus(Duration.ofSeconds(1))); // Will not do any deployments, as bootstrapping is still in progress fixture.runApplicationMaintainer(); - assertEquals("No deployment expected", 2, fixture.deployer.redeployments); + assertEquals("No deployment expected", 2, fixture.deployer.activations); // First deployment of applications will happen now, as bootstrapping is done fixture.setBootstrapping(false); fixture.runApplicationMaintainer(); - assertEquals("No deployment expected", 4, fixture.deployer.redeployments); + assertEquals("No deployment expected", 4, fixture.deployer.activations); Instant firstDeployTime = clock.instant(); + assertEquals(firstDeployTime, fixture.deployer.deployTime(fixture.app1).get()); + assertEquals(firstDeployTime, fixture.deployer.deployTime(fixture.app2).get()); assertEquals(firstDeployTime, fixture.deployer.activationTime(fixture.app1).get()); assertEquals(firstDeployTime, fixture.deployer.activationTime(fixture.app2).get()); clock.advance(Duration.ofMinutes(5)); fixture.runApplicationMaintainer(); // Too soon: Not redeployed: - assertEquals("No deployment expected", 4, fixture.deployer.redeployments); + assertEquals("No deployment expected", 4, fixture.deployer.activations); + assertEquals(firstDeployTime, fixture.deployer.deployTime(fixture.app1).get()); + assertEquals(firstDeployTime, fixture.deployer.deployTime(fixture.app2).get()); assertEquals(firstDeployTime, fixture.deployer.activationTime(fixture.app1).get()); assertEquals(firstDeployTime, fixture.deployer.activationTime(fixture.app2).get()); clock.advance(Duration.ofMinutes(30)); + Instant instant1 = clock.instant(); fixture.runApplicationMaintainer(); // Redeployed: - assertEquals("No deployment expected", 6, fixture.deployer.redeployments); - assertEquals(clock.instant(), fixture.deployer.activationTime(fixture.app1).get()); - assertEquals(clock.instant(), fixture.deployer.activationTime(fixture.app2).get()); + assertEquals("No deployment expected", 6, fixture.deployer.activations); + assertEquals(instant1, fixture.deployer.deployTime(fixture.app1).get()); + assertEquals(instant1, fixture.deployer.deployTime(fixture.app2).get()); + assertEquals(instant1, fixture.deployer.activationTime(fixture.app1).get()); + assertEquals(instant1, fixture.deployer.activationTime(fixture.app2).get()); + + clock.advance(Duration.ofMinutes(30)); + // Prepare (simulate that activation failed) + fixture.prepare(); + Instant secondDeployTime = clock.instant(); + fixture.runApplicationMaintainer(); + // Too soon: Not redeployed, since a deployment (prepare) was done less than 30 minutes ago: + assertEquals("No deployment expected", 6, fixture.deployer.activations); + assertEquals(secondDeployTime, fixture.deployer.deployTime(fixture.app1).get()); + assertEquals(secondDeployTime, fixture.deployer.deployTime(fixture.app2).get()); + assertEquals(instant1, fixture.deployer.activationTime(fixture.app1).get()); + assertEquals(instant1, fixture.deployer.activationTime(fixture.app2).get()); + + clock.advance(Duration.ofMinutes(30)); } @Test(timeout = 60_000) @@ -188,8 +209,8 @@ public class PeriodicApplicationMaintainerTest { fixture.deployer.lock().unlock(); fixture.runApplicationMaintainer(); Instant deployTime = clock.instant(); - assertEquals(deployTime, fixture.deployer.activationTime(fixture.app1).get()); - assertEquals(deployTime, fixture.deployer.activationTime(fixture.app2).get()); + assertEquals(deployTime, fixture.deployer.deployTime(fixture.app1).get()); + assertEquals(deployTime, fixture.deployer.deployTime(fixture.app2).get()); // Too soon: Already deployed recently clock.advance(Duration.ofMinutes(5)); @@ -228,6 +249,11 @@ public class PeriodicApplicationMaintainerTest { Duration.ofMinutes(30)); } + void prepare() { + deployer.deployFromLocalActive(app1, false).get().prepare(); + deployer.deployFromLocalActive(app2, false).get().prepare(); + } + void activate() { deployer.deployFromLocalActive(app1, false).get().activate(); deployer.deployFromLocalActive(app2, false).get().activate(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java index f42044c2944..d0ac59b1e15 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java @@ -104,7 +104,7 @@ public class RetiredExpirerTest { createRetiredExpirer(deployer).run(); assertEquals(3, nodeRepository.nodes().list(Node.State.active).owner(applicationId).size()); assertEquals(4, nodeRepository.nodes().list(Node.State.inactive).owner(applicationId).size()); - assertEquals(1, deployer.redeployments); + assertEquals(1, deployer.activations); // inactivated nodes are not retired for (Node node : nodeRepository.nodes().list(Node.State.inactive).owner(applicationId)) @@ -147,14 +147,14 @@ public class RetiredExpirerTest { retiredExpirer.run(); assertEquals(5, nodeRepository.nodes().list(Node.State.active).owner(applicationId).size()); assertEquals(2, nodeRepository.nodes().list(Node.State.dirty).owner(applicationId).size()); - assertEquals(1, deployer.redeployments); + assertEquals(1, deployer.activations); verify(orchestrator, times(4)).acquirePermissionToRemove(any()); // Running it again has no effect retiredExpirer.run(); assertEquals(5, nodeRepository.nodes().list(Node.State.active).owner(applicationId).size()); assertEquals(2, nodeRepository.nodes().list(Node.State.dirty).owner(applicationId).size()); - assertEquals(1, deployer.redeployments); + assertEquals(1, deployer.activations); verify(orchestrator, times(6)).acquirePermissionToRemove(any()); // Running it again deactivates nodes that have exceeded max retirement period @@ -163,7 +163,7 @@ public class RetiredExpirerTest { assertEquals(3, nodeRepository.nodes().list(Node.State.active).owner(applicationId).size()); assertEquals(2, nodeRepository.nodes().list(Node.State.dirty).owner(applicationId).size()); assertEquals(2, nodeRepository.nodes().list(Node.State.inactive).owner(applicationId).size()); - assertEquals(2, deployer.redeployments); + assertEquals(2, deployer.activations); verify(orchestrator, times(6)).acquirePermissionToRemove(any()); // Removed nodes are not retired diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java index 6d67f39d9bb..da64fa2fd64 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java @@ -51,7 +51,7 @@ public class SpareCapacityMaintainerTest { public void testEmpty() { var tester = new SpareCapacityMaintainerTester(); tester.maintainer.maintain(); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(0, tester.nodeRepository.nodes().list().retired().size()); } @@ -61,7 +61,7 @@ public class SpareCapacityMaintainerTest { tester.addHosts(2, new NodeResources(10, 100, 1000, 1)); tester.addNodes(0, 1, new NodeResources(10, 100, 1000, 1), 0); tester.maintainer.maintain(); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(0, tester.nodeRepository.nodes().list().retired().size()); assertEquals(1, tester.metric.values.get("spareHostCapacity")); } @@ -72,7 +72,7 @@ public class SpareCapacityMaintainerTest { tester.addHosts(3, new NodeResources(10, 100, 1000, 1)); tester.addNodes(0, 1, new NodeResources(10, 100, 1000, 1), 0); tester.maintainer.maintain(); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(0, tester.nodeRepository.nodes().list().retired().size()); assertEquals(2, tester.metric.values.get("spareHostCapacity")); } @@ -83,7 +83,7 @@ public class SpareCapacityMaintainerTest { tester.addHosts(2, new NodeResources(10, 100, 1000, 1)); tester.addNodes(0, 2, new NodeResources(10, 100, 1000, 1), 0); tester.maintainer.maintain(); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(0, tester.nodeRepository.nodes().list().retired().size()); assertEquals(0, tester.metric.values.get("spareHostCapacity")); } @@ -95,7 +95,7 @@ public class SpareCapacityMaintainerTest { tester.addNodes(0, 2, new NodeResources(5, 50, 500, 0.5), 0); tester.addNodes(1, 2, new NodeResources(5, 50, 500, 0.5), 2); tester.maintainer.maintain(); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(0, tester.nodeRepository.nodes().list().retired().size()); assertEquals(2, tester.metric.values.get("spareHostCapacity")); } @@ -109,13 +109,13 @@ public class SpareCapacityMaintainerTest { tester.addNodes(1, 2, new NodeResources(5, 50, 500, 0.5), 2); tester.addNodes(2, 2, new NodeResources(5, 50, 500, 0.5), 4); tester.maintainer.maintain(); - assertEquals(1, tester.deployer.redeployments); + assertEquals(1, tester.deployer.activations); assertEquals(1, tester.nodeRepository.nodes().list().retired().size()); assertEquals(1, tester.metric.values.get("spareHostCapacity")); // Maintaining again is a no-op since the node to move is already retired tester.maintainer.maintain(); - assertEquals(1, tester.deployer.redeployments); + assertEquals(1, tester.deployer.activations); assertEquals(1, tester.nodeRepository.nodes().list().retired().size()); assertEquals(1, tester.metric.values.get("spareHostCapacity")); } @@ -132,7 +132,7 @@ public class SpareCapacityMaintainerTest { tester.addNodes(2, 2, new NodeResources(5, 50, 500, 0.5), 4); tester.addNodes(3, 2, new NodeResources(5, 50, 500, 0.5), 6); tester.maintainer.maintain(); - assertEquals(1, tester.deployer.redeployments); + assertEquals(1, tester.deployer.activations); assertEquals(1, tester.nodeRepository.nodes().list().retired().size()); assertEquals(1, tester.metric.values.get("spareHostCapacity")); } @@ -145,7 +145,7 @@ public class SpareCapacityMaintainerTest { setupMultipleHosts(tester, 4); tester.maintainer.maintain(); - assertEquals(1, tester.deployer.redeployments); + assertEquals(1, tester.deployer.activations); assertEquals(1, tester.nodeRepository.nodes().list().retired().size()); assertEquals(1, tester.metric.values.get("spareHostCapacity")); } @@ -156,7 +156,7 @@ public class SpareCapacityMaintainerTest { setupMultipleHosts(tester, 3); tester.maintainer.maintain(); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(0, tester.nodeRepository.nodes().list().retired().size()); assertEquals(0, tester.metric.values.get("spareHostCapacity")); } @@ -199,7 +199,7 @@ public class SpareCapacityMaintainerTest { tester.addNodes(6, 1, new NodeResources( 4, 40, 400, 0.4), 6); tester.maintainer.maintain(); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(0, tester.nodeRepository.nodes().list().retired().size()); assertEquals(0, tester.metric.values.get("spareHostCapacity")); } @@ -216,7 +216,7 @@ public class SpareCapacityMaintainerTest { tester.maintainer.maintain(); assertEquals(2, tester.metric.values.get("overcommittedHosts")); - assertEquals(1, tester.deployer.redeployments); + assertEquals(1, tester.deployer.activations); assertEquals(List.of(new NodeResources( 1.1, 10, 100, 0.1)), tester.nodeRepository.nodes().list().retired().mapToList(Node::resources)); } @@ -239,7 +239,7 @@ public class SpareCapacityMaintainerTest { tester.maintainer.maintain(); long totalTime = System.currentTimeMillis() - startTime; System.out.println("Complete in " + ( totalTime / 1000) + " seconds"); - assertEquals(0, tester.deployer.redeployments); + assertEquals(0, tester.deployer.activations); assertEquals(0, tester.nodeRepository.nodes().list().retired().size()); assertEquals(0, tester.metric.values.get("spareHostCapacity")); } |