summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Marius Venstad <jonmv@users.noreply.github.com>2020-11-04 18:15:06 +0100
committerGitHub <noreply@github.com>2020-11-04 18:15:06 +0100
commit2c61049b176fbbea20714973bd7a90aaa0bb91de (patch)
tree34432c45d2ec3779da3091040fedfe25d616660b
parent26487c3acc487947b367ad85f94b19487f101ff5 (diff)
parent1e16c0cb4119ae2c9f0bb3f490993ea9f26e0bbe (diff)
Merge pull request #15177 from vespa-engine/mpolden/enforce-cd-interval
Enforce maximum maintenance interval for CD systems
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java126
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java5
-rw-r--r--controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java9
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java2
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java6
6 files changed, 101 insertions, 51 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
index bc63c235027..82f37c9bc93 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
@@ -3,17 +3,22 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.google.inject.Inject;
import com.yahoo.component.AbstractComponent;
+import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.controller.Controller;
-import com.yahoo.vespa.hosted.controller.maintenance.config.MaintainerConfig;
-import com.yahoo.vespa.hosted.controller.persistence.CuratorDb;
import java.time.Duration;
+import java.time.temporal.TemporalUnit;
import java.util.Collections;
import java.util.List;
+import java.util.Objects;
import java.util.stream.Collectors;
+import static java.time.temporal.ChronoUnit.HOURS;
+import static java.time.temporal.ChronoUnit.MINUTES;
+import static java.time.temporal.ChronoUnit.SECONDS;
+
/**
* Maintenance jobs of the controller.
* Each maintenance job is a singleton instance of its implementing class, created and owned by this,
@@ -48,38 +53,34 @@ public class ControllerMaintenance extends AbstractComponent {
private final HostRepairMaintainer hostRepairMaintainer;
private final ContainerImageExpirer containerImageExpirer;
-
@Inject
@SuppressWarnings("unused") // instantiated by Dependency Injection
- public ControllerMaintenance(MaintainerConfig maintainerConfig,
- Controller controller,
- CuratorDb curator,
- Metric metric) {
- Duration maintenanceInterval = Duration.ofMinutes(maintainerConfig.intervalMinutes());
- deploymentExpirer = new DeploymentExpirer(controller, maintenanceInterval);
- deploymentIssueReporter = new DeploymentIssueReporter(controller, controller.serviceRegistry().deploymentIssues(), maintenanceInterval);
+ public ControllerMaintenance(Controller controller, Metric metric) {
+ Intervals intervals = new Intervals(controller.system());
+ deploymentExpirer = new DeploymentExpirer(controller, intervals.defaultInterval);
+ deploymentIssueReporter = new DeploymentIssueReporter(controller, controller.serviceRegistry().deploymentIssues(), intervals.defaultInterval);
metricsReporter = new MetricsReporter(controller, metric);
- outstandingChangeDeployer = new OutstandingChangeDeployer(controller, Duration.ofMinutes(3));
- versionStatusUpdater = new VersionStatusUpdater(controller, Duration.ofMinutes(3));
- upgrader = new Upgrader(controller, maintenanceInterval, curator);
- readyJobsTrigger = new ReadyJobsTrigger(controller, Duration.ofMinutes(1));
- deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(controller, Duration.ofMinutes(5));
- applicationOwnershipConfirmer = new ApplicationOwnershipConfirmer(controller, Duration.ofHours(12), controller.serviceRegistry().ownershipIssues());
- systemUpgrader = new SystemUpgrader(controller, Duration.ofMinutes(1));
- jobRunner = new JobRunner(controller, Duration.ofSeconds(90));
- osUpgraders = osUpgraders(controller);
- osVersionStatusUpdater = new OsVersionStatusUpdater(controller, maintenanceInterval);
- contactInformationMaintainer = new ContactInformationMaintainer(controller, Duration.ofHours(12));
- nameServiceDispatcher = new NameServiceDispatcher(controller, Duration.ofSeconds(10));
- costReportMaintainer = new CostReportMaintainer(controller, Duration.ofHours(2), controller.serviceRegistry().costReportConsumer());
- resourceMeterMaintainer = new ResourceMeterMaintainer(controller, Duration.ofMinutes(1), metric, controller.serviceRegistry().meteringService());
- cloudEventReporter = new CloudEventReporter(controller, Duration.ofMinutes(30), metric);
- rotationStatusUpdater = new RotationStatusUpdater(controller, maintenanceInterval);
- resourceTagMaintainer = new ResourceTagMaintainer(controller, Duration.ofMinutes(30), controller.serviceRegistry().resourceTagger());
- systemRoutingPolicyMaintainer = new SystemRoutingPolicyMaintainer(controller, Duration.ofMinutes(10));
- applicationMetaDataGarbageCollector = new ApplicationMetaDataGarbageCollector(controller, Duration.ofHours(12));
- hostRepairMaintainer = new HostRepairMaintainer(controller, Duration.ofHours(12));
- containerImageExpirer = new ContainerImageExpirer(controller, Duration.ofHours(2));
+ outstandingChangeDeployer = new OutstandingChangeDeployer(controller, intervals.outstandingChangeDeployer);
+ versionStatusUpdater = new VersionStatusUpdater(controller, intervals.versionStatusUpdater);
+ upgrader = new Upgrader(controller, intervals.defaultInterval);
+ readyJobsTrigger = new ReadyJobsTrigger(controller, intervals.readyJobsTrigger);
+ deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(controller, intervals.deploymentMetricsMaintainer);
+ applicationOwnershipConfirmer = new ApplicationOwnershipConfirmer(controller, intervals.applicationOwnershipConfirmer, controller.serviceRegistry().ownershipIssues());
+ systemUpgrader = new SystemUpgrader(controller, intervals.systemUpgrader);
+ jobRunner = new JobRunner(controller, intervals.jobRunner);
+ osUpgraders = osUpgraders(controller, intervals.osUpgrader);
+ osVersionStatusUpdater = new OsVersionStatusUpdater(controller, intervals.defaultInterval);
+ contactInformationMaintainer = new ContactInformationMaintainer(controller, intervals.contactInformationMaintainer);
+ nameServiceDispatcher = new NameServiceDispatcher(controller, intervals.nameServiceDispatcher);
+ costReportMaintainer = new CostReportMaintainer(controller, intervals.costReportMaintainer, controller.serviceRegistry().costReportConsumer());
+ resourceMeterMaintainer = new ResourceMeterMaintainer(controller, intervals.resourceMeterMaintainer, metric, controller.serviceRegistry().meteringService());
+ cloudEventReporter = new CloudEventReporter(controller, intervals.cloudEventReporter, metric);
+ rotationStatusUpdater = new RotationStatusUpdater(controller, intervals.defaultInterval);
+ resourceTagMaintainer = new ResourceTagMaintainer(controller, intervals.resourceTagMaintainer, controller.serviceRegistry().resourceTagger());
+ systemRoutingPolicyMaintainer = new SystemRoutingPolicyMaintainer(controller, intervals.systemRoutingPolicyMaintainer);
+ applicationMetaDataGarbageCollector = new ApplicationMetaDataGarbageCollector(controller, intervals.applicationMetaDataGarbageCollector);
+ hostRepairMaintainer = new HostRepairMaintainer(controller, intervals.hostRepairMaintainer);
+ containerImageExpirer = new ContainerImageExpirer(controller, intervals.containerImageExpirer);
}
public Upgrader upgrader() { return upgrader; }
@@ -113,13 +114,72 @@ public class ControllerMaintenance extends AbstractComponent {
}
/** Create one OS upgrader per cloud found in the zone registry of controller */
- private static List<OsUpgrader> osUpgraders(Controller controller) {
+ private static List<OsUpgrader> osUpgraders(Controller controller, Duration interval) {
return controller.zoneRegistry().zones().controllerUpgraded().zones().stream()
.map(ZoneApi::getCloudName)
.distinct()
.sorted()
- .map(cloud -> new OsUpgrader(controller, Duration.ofMinutes(1), cloud))
+ .map(cloud -> new OsUpgrader(controller, interval, cloud))
.collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList));
}
+ private static class Intervals {
+
+ private static final Duration MAX_CD_INTERVAL = Duration.ofHours(1);
+
+ private final SystemName system;
+
+ private final Duration defaultInterval;
+ private final Duration outstandingChangeDeployer;
+ private final Duration versionStatusUpdater;
+ private final Duration readyJobsTrigger;
+ private final Duration deploymentMetricsMaintainer;
+ private final Duration applicationOwnershipConfirmer;
+ private final Duration systemUpgrader;
+ private final Duration jobRunner;
+ private final Duration osUpgrader;
+ private final Duration contactInformationMaintainer;
+ private final Duration nameServiceDispatcher;
+ private final Duration costReportMaintainer;
+ private final Duration resourceMeterMaintainer;
+ private final Duration cloudEventReporter;
+ private final Duration resourceTagMaintainer;
+ private final Duration systemRoutingPolicyMaintainer;
+ private final Duration applicationMetaDataGarbageCollector;
+ private final Duration hostRepairMaintainer;
+ private final Duration containerImageExpirer;
+
+ public Intervals(SystemName system) {
+ this.system = Objects.requireNonNull(system);
+ this.defaultInterval = duration(system.isCd() || system == SystemName.dev ? 1 : 5, MINUTES);
+ this.outstandingChangeDeployer = duration(3, MINUTES);
+ this.versionStatusUpdater = duration(3, MINUTES);
+ this.readyJobsTrigger = duration(1, MINUTES);
+ this.deploymentMetricsMaintainer = duration(5, MINUTES);
+ this.applicationOwnershipConfirmer = duration(12, HOURS);
+ this.systemUpgrader = duration(1, MINUTES);
+ this.jobRunner = duration(90, SECONDS);
+ this.osUpgrader = duration(1, MINUTES);
+ this.contactInformationMaintainer = duration(12, HOURS);
+ this.nameServiceDispatcher = duration(10, SECONDS);
+ this.costReportMaintainer = duration(2, HOURS);
+ this.resourceMeterMaintainer = duration(1, MINUTES);
+ this.cloudEventReporter = duration(30, MINUTES);
+ this.resourceTagMaintainer = duration(30, MINUTES);
+ this.systemRoutingPolicyMaintainer = duration(10, MINUTES);
+ this.applicationMetaDataGarbageCollector = duration(12, HOURS);
+ this.hostRepairMaintainer = duration(12, HOURS);
+ this.containerImageExpirer = duration(2, HOURS);
+ }
+
+ private Duration duration(long amount, TemporalUnit unit) {
+ Duration duration = Duration.of(amount, unit);
+ if (system.isCd() && duration.compareTo(MAX_CD_INTERVAL) > 0) {
+ return MAX_CD_INTERVAL; // Ensure that maintainer is given enough time to run in CD
+ }
+ return duration;
+ }
+
+ }
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
index 5ac89cc54be..2639f366a07 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
@@ -18,7 +18,6 @@ import java.time.Duration;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
-import java.util.Objects;
import java.util.Optional;
import java.util.Random;
import java.util.function.BinaryOperator;
@@ -42,9 +41,9 @@ public class Upgrader extends ControllerMaintainer {
private final CuratorDb curator;
private final Random random;
- public Upgrader(Controller controller, Duration interval, CuratorDb curator) {
+ public Upgrader(Controller controller, Duration interval) {
super(controller, interval);
- this.curator = Objects.requireNonNull(curator, "curator cannot be null");
+ this.curator = controller.curator();
this.random = new Random(controller.clock().instant().toEpochMilli()); // Seed with clock for test determinism
}
diff --git a/controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def b/controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def
deleted file mode 100644
index 7ec8860bef4..00000000000
--- a/controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-namespace=vespa.hosted.controller.maintenance.config
-
-intervalMinutes int default=30
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
index 7fd02a8e780..b939598c704 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java
@@ -1,7 +1,6 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.deployment;
-import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.test.ManualClock;
import com.yahoo.vespa.hosted.controller.Application;
@@ -19,7 +18,6 @@ import com.yahoo.vespa.hosted.controller.maintenance.JobRunnerTest;
import com.yahoo.vespa.hosted.controller.maintenance.OutstandingChangeDeployer;
import com.yahoo.vespa.hosted.controller.maintenance.ReadyJobsTrigger;
import com.yahoo.vespa.hosted.controller.maintenance.Upgrader;
-import com.yahoo.vespa.hosted.controller.persistence.JobControlFlags;
import java.time.DayOfWeek;
import java.time.Duration;
@@ -77,10 +75,9 @@ public class DeploymentTester {
tester = controllerTester;
jobs = tester.controller().jobController();
cloud = (MockTesterCloud) tester.controller().jobController().cloud();
- var jobControl = new JobControl(new JobControlFlags(tester.controller().curator(), tester.controller().flagSource()));
- runner = new JobRunner(tester.controller(), Duration.ofDays(1),
- JobRunnerTest.inThreadExecutor(), new InternalStepRunner(tester.controller()));
- upgrader = new Upgrader(tester.controller(), maintenanceInterval, tester.curator());
+ runner = new JobRunner(tester.controller(), maintenanceInterval, JobRunnerTest.inThreadExecutor(),
+ new InternalStepRunner(tester.controller()));
+ upgrader = new Upgrader(tester.controller(), maintenanceInterval);
upgrader.setUpgradesPerMinute(1); // Anything that makes it at least one for any maintenance period is fine.
readyJobsTrigger = new ReadyJobsTrigger(tester.controller(), maintenanceInterval);
outstandingChangeDeployer = new OutstandingChangeDeployer(tester.controller(), maintenanceInterval);
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java
index ba6da2a02b8..6370cfedc41 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java
@@ -266,7 +266,7 @@ public class SystemUpgraderTest {
convergeServices(SystemApplication.proxy, zone1);
// Confidence is reduced to broken and next zone is not scheduled for upgrade
- new Upgrader(tester.controller(), Duration.ofDays(1), tester.curator())
+ new Upgrader(tester.controller(), Duration.ofDays(1))
.overrideConfidence(version2, VespaVersion.Confidence.broken);
tester.computeVersionStatus();
systemUpgrader.maintain();
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
index eb8e154ee0c..a6acfce877a 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java
@@ -615,8 +615,7 @@ public class UpgraderTest {
tester.controllerTester().upgradeSystem(version);
// Setup our own upgrader as we need to control the interval
- Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10),
- tester.controllerTester().curator());
+ Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10));
upgrader.setUpgradesPerMinute(0.2);
// Setup applications
@@ -1084,8 +1083,7 @@ public class UpgraderTest {
// Throttle upgrades per run
((ManualClock) tester.controller().clock()).setInstant(Instant.ofEpochMilli(1589787109000L)); // Fixed random seed
- Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10),
- tester.controllerTester().curator());
+ Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10));
upgrader.setUpgradesPerMinute(0.1);
// Trigger some upgrades