diff options
author | Martin Polden <mpolden@mpolden.no> | 2021-03-01 18:05:17 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2021-03-02 20:12:27 +0100 |
commit | df5d09f858e23d80ab24bfbd76ae532be9a2f0f8 (patch) | |
tree | 2618a42568a1ad0fdfb75b3e28ed2033526a88a8 | |
parent | 594d6e523598b96ef39e3449bb886d89a52922e6 (diff) |
Use custom inactive expiry for config server and controller
5 files changed, 66 insertions, 12 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java index 5b9cd6a69e1..b720bf004ff 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java @@ -55,6 +55,10 @@ public abstract class Expirer extends NodeRepositoryMaintainer { } protected boolean isExpired(Node node) { + return isExpired(node, expiryTime); + } + + protected final boolean isExpired(Node node, Duration expiryTime) { return node.history().hasEventBefore(eventType, clock().instant().minus(expiryTime)); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java index ae6e716bffe..238f89fc448 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.config.provision.NodeType; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; @@ -10,6 +11,7 @@ import com.yahoo.vespa.hosted.provision.node.Status; import java.time.Duration; import java.util.List; +import java.util.Map; /** * Maintenance job which moves inactive nodes to dirty or parked after timeout. @@ -30,10 +32,15 @@ import java.util.List; public class InactiveExpirer extends Expirer { private final NodeRepository nodeRepository; + private final Duration defaultTimeout; + private final Map<NodeType, Duration> inactiveTimeouts; - InactiveExpirer(NodeRepository nodeRepository, Duration inactiveTimeout, Metric metric) { - super(Node.State.inactive, History.Event.Type.deactivated, nodeRepository, inactiveTimeout, metric); + InactiveExpirer(NodeRepository nodeRepository, Duration defaultTimeout, Map<NodeType, Duration> inactiveTimeouts, + Metric metric) { + super(Node.State.inactive, History.Event.Type.deactivated, nodeRepository, defaultTimeout, metric); this.nodeRepository = nodeRepository; + this.defaultTimeout = defaultTimeout; + this.inactiveTimeouts = Map.copyOf(inactiveTimeouts); } @Override @@ -45,8 +52,12 @@ public class InactiveExpirer extends Expirer { @Override protected boolean isExpired(Node node) { - return super.isExpired(node) - || node.allocation().get().owner().instance().isTester(); + return super.isExpired(node, timeout(node)) || + node.allocation().get().owner().instance().isTester(); + } + + private Duration timeout(Node node) { + return inactiveTimeouts.getOrDefault(node.type(), defaultTimeout); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 0154b030baa..f2ef0168e2f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -8,18 +8,20 @@ import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostLivenessTracker; import com.yahoo.config.provision.InfraDeployer; +import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Zone; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.hosted.provision.NodeRepository; -import com.yahoo.vespa.hosted.provision.autoscale.MetricsFetcher; import com.yahoo.vespa.hosted.provision.autoscale.MetricsDb; +import com.yahoo.vespa.hosted.provision.autoscale.MetricsFetcher; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionServiceProvider; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.time.Duration; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.CopyOnWriteArrayList; @@ -52,7 +54,9 @@ public class NodeRepositoryMaintenance extends AbstractComponent { maintainers.add(new OperatorChangeApplicationMaintainer(deployer, metric, nodeRepository, defaults.operatorChangeRedeployInterval)); maintainers.add(new ReservationExpirer(nodeRepository, defaults.reservationExpiry, metric)); maintainers.add(new RetiredExpirer(nodeRepository, orchestrator, deployer, metric, defaults.retiredInterval, defaults.retiredExpiry)); - maintainers.add(new InactiveExpirer(nodeRepository, defaults.inactiveExpiry, metric)); + maintainers.add(new InactiveExpirer(nodeRepository, defaults.inactiveExpiry, Map.of(NodeType.config, defaults.inactiveConfigServerExpiry, + NodeType.controller, defaults.inactiveControllerExpiry), + metric)); maintainers.add(new FailedExpirer(nodeRepository, zone, defaults.failedExpirerInterval, metric)); maintainers.add(new DirtyExpirer(nodeRepository, defaults.dirtyExpiry, metric)); maintainers.add(new ProvisionedExpirer(nodeRepository, defaults.provisionedExpiry, metric)); @@ -99,6 +103,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration reservationExpiry; private final Duration inactiveExpiry; + private final Duration inactiveConfigServerExpiry; + private final Duration inactiveControllerExpiry; private final Duration retiredExpiry; private final Duration failedExpirerInterval; private final Duration dirtyExpiry; @@ -148,6 +154,8 @@ public class NodeRepositoryMaintenance extends AbstractComponent { retiredExpiry = Duration.ofDays(4); // give up migrating data after 4 days dedicatedClusterControllerMigratorInterval = zone.environment() == Environment.staging || zone.system().isCd() ? Duration.ofMinutes(3) : Duration.ofHours(2); + inactiveConfigServerExpiry = Duration.ofMinutes(5); + inactiveControllerExpiry = Duration.ofMinutes(5); if (zone.environment() == Environment.prod && ! zone.system().isCd()) { inactiveExpiry = Duration.ofHours(4); // enough time for the application owner to discover and redeploy diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java index eda744e9ee1..3bd40670631 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.component.Vtag; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; import com.yahoo.config.provision.Capacity; @@ -18,6 +19,7 @@ import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.History; +import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; import com.yahoo.vespa.orchestrator.OrchestrationException; @@ -28,9 +30,12 @@ import java.time.Duration; import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.function.Supplier; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doThrow; @@ -64,7 +69,7 @@ public class InactiveAndFailedExpirerTest { // Inactive times out tester.advanceTime(Duration.ofMinutes(14)); - new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), new TestMetric()).run(); + new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), Map.of(), new TestMetric()).run(); assertEquals(0, tester.nodeRepository().nodes().list(Node.State.inactive).size()); NodeList dirty = tester.nodeRepository().nodes().list(Node.State.dirty); assertEquals(2, dirty.size()); @@ -105,7 +110,7 @@ public class InactiveAndFailedExpirerTest { // Inactive times out and node is moved to dirty tester.advanceTime(Duration.ofMinutes(14)); - new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), new TestMetric()).run(); + new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), Map.of(), new TestMetric()).run(); NodeList dirty = tester.nodeRepository().nodes().list(Node.State.dirty); assertEquals(2, dirty.size()); @@ -156,7 +161,7 @@ public class InactiveAndFailedExpirerTest { // Inactive times out and one node is moved to parked tester.advanceTime(Duration.ofMinutes(11)); // Trigger InactiveExpirer - new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), new TestMetric()).run(); + new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), Map.of(), new TestMetric()).run(); assertEquals(1, tester.nodeRepository().nodes().list(Node.State.parked).size()); } @@ -178,7 +183,7 @@ public class InactiveAndFailedExpirerTest { assertEquals(1, inactiveNodes.size()); // See that nodes are moved to dirty immediately. - new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), new TestMetric()).run(); + new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), Map.of(), new TestMetric()).run(); assertEquals(0, tester.nodeRepository().nodes().list(Node.State.inactive).size()); NodeList dirty = tester.nodeRepository().nodes().list(Node.State.dirty); assertEquals(1, dirty.size()); @@ -202,8 +207,31 @@ public class InactiveAndFailedExpirerTest { // Nodes marked for deprovisioning are moved to parked tester.patchNodes(inactiveNodes, (node) -> node.withWantToRetire(true, true, Agent.system, tester.clock().instant())); tester.advanceTime(Duration.ofMinutes(11)); - new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), new TestMetric()).run(); + new InactiveExpirer(tester.nodeRepository(), Duration.ofMinutes(10), Map.of(), new TestMetric()).run(); assertEquals(2, tester.nodeRepository().nodes().list(Node.State.parked).size()); } + @Test + public void inactive_config_server_expires_according_to_custom_timeout() { + ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build(); + InactiveExpirer expirer = new InactiveExpirer(tester.nodeRepository(), Duration.ofHours(1), + Map.of(NodeType.config, Duration.ofMinutes(5)), + new TestMetric()); + NodeList nodes = tester.makeConfigServers(3, "default", Vtag.currentVersion); + Supplier<Node> firstNode = () -> tester.nodeRepository().nodes().node(nodes.first().get().hostname()).get(); + ApplicationId application = firstNode.get().allocation().get().owner(); + + // Retired config server is moved to inactive + tester.nodeRepository().nodes().retire(NodeListFilter.from(firstNode.get()), Agent.system, tester.clock().instant()); + tester.prepareAndActivateInfraApplication(application, NodeType.config); + assertSame(Node.State.inactive, firstNode.get().state()); + expirer.maintain(); + assertSame(Node.State.inactive, firstNode.get().state()); + + // Config server expires + tester.clock().advance(Duration.ofMinutes(5)); + expirer.maintain(); + assertSame(Node.State.dirty, firstNode.get().state()); + } + } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index 97baddf93fa..c0306215f6d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -227,7 +227,10 @@ public class ProvisioningTester { } public void prepareAndActivateInfraApplication(ApplicationId application, NodeType nodeType, Version version) { - ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from(nodeType.toString())).vespaVersion(version).build(); + ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from(nodeType.toString())) + .vespaVersion(version) + .stateful(nodeType == NodeType.config || nodeType == NodeType.controller) + .build(); Capacity capacity = Capacity.fromRequiredNodeType(nodeType); List<HostSpec> hostSpecs = prepare(application, cluster, capacity); activate(application, hostSpecs); |