diff options
author | Jon Marius Venstad <jonmv@users.noreply.github.com> | 2020-11-04 18:15:06 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-11-04 18:15:06 +0100 |
commit | 2c61049b176fbbea20714973bd7a90aaa0bb91de (patch) | |
tree | 34432c45d2ec3779da3091040fedfe25d616660b | |
parent | 26487c3acc487947b367ad85f94b19487f101ff5 (diff) | |
parent | 1e16c0cb4119ae2c9f0bb3f490993ea9f26e0bbe (diff) |
Merge pull request #15177 from vespa-engine/mpolden/enforce-cd-interval
Enforce maximum maintenance interval for CD systems
6 files changed, 101 insertions, 51 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index bc63c235027..82f37c9bc93 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -3,17 +3,22 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.google.inject.Inject; import com.yahoo.component.AbstractComponent; +import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.controller.Controller; -import com.yahoo.vespa.hosted.controller.maintenance.config.MaintainerConfig; -import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.time.Duration; +import java.time.temporal.TemporalUnit; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.stream.Collectors; +import static java.time.temporal.ChronoUnit.HOURS; +import static java.time.temporal.ChronoUnit.MINUTES; +import static java.time.temporal.ChronoUnit.SECONDS; + /** * Maintenance jobs of the controller. * Each maintenance job is a singleton instance of its implementing class, created and owned by this, @@ -48,38 +53,34 @@ public class ControllerMaintenance extends AbstractComponent { private final HostRepairMaintainer hostRepairMaintainer; private final ContainerImageExpirer containerImageExpirer; - @Inject @SuppressWarnings("unused") // instantiated by Dependency Injection - public ControllerMaintenance(MaintainerConfig maintainerConfig, - Controller controller, - CuratorDb curator, - Metric metric) { - Duration maintenanceInterval = Duration.ofMinutes(maintainerConfig.intervalMinutes()); - deploymentExpirer = new DeploymentExpirer(controller, maintenanceInterval); - deploymentIssueReporter = new DeploymentIssueReporter(controller, controller.serviceRegistry().deploymentIssues(), maintenanceInterval); + public ControllerMaintenance(Controller controller, Metric metric) { + Intervals intervals = new Intervals(controller.system()); + deploymentExpirer = new DeploymentExpirer(controller, intervals.defaultInterval); + deploymentIssueReporter = new DeploymentIssueReporter(controller, controller.serviceRegistry().deploymentIssues(), intervals.defaultInterval); metricsReporter = new MetricsReporter(controller, metric); - outstandingChangeDeployer = new OutstandingChangeDeployer(controller, Duration.ofMinutes(3)); - versionStatusUpdater = new VersionStatusUpdater(controller, Duration.ofMinutes(3)); - upgrader = new Upgrader(controller, maintenanceInterval, curator); - readyJobsTrigger = new ReadyJobsTrigger(controller, Duration.ofMinutes(1)); - deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(controller, Duration.ofMinutes(5)); - applicationOwnershipConfirmer = new ApplicationOwnershipConfirmer(controller, Duration.ofHours(12), controller.serviceRegistry().ownershipIssues()); - systemUpgrader = new SystemUpgrader(controller, Duration.ofMinutes(1)); - jobRunner = new JobRunner(controller, Duration.ofSeconds(90)); - osUpgraders = osUpgraders(controller); - osVersionStatusUpdater = new OsVersionStatusUpdater(controller, maintenanceInterval); - contactInformationMaintainer = new ContactInformationMaintainer(controller, Duration.ofHours(12)); - nameServiceDispatcher = new NameServiceDispatcher(controller, Duration.ofSeconds(10)); - costReportMaintainer = new CostReportMaintainer(controller, Duration.ofHours(2), controller.serviceRegistry().costReportConsumer()); - resourceMeterMaintainer = new ResourceMeterMaintainer(controller, Duration.ofMinutes(1), metric, controller.serviceRegistry().meteringService()); - cloudEventReporter = new CloudEventReporter(controller, Duration.ofMinutes(30), metric); - rotationStatusUpdater = new RotationStatusUpdater(controller, maintenanceInterval); - resourceTagMaintainer = new ResourceTagMaintainer(controller, Duration.ofMinutes(30), controller.serviceRegistry().resourceTagger()); - systemRoutingPolicyMaintainer = new SystemRoutingPolicyMaintainer(controller, Duration.ofMinutes(10)); - applicationMetaDataGarbageCollector = new ApplicationMetaDataGarbageCollector(controller, Duration.ofHours(12)); - hostRepairMaintainer = new HostRepairMaintainer(controller, Duration.ofHours(12)); - containerImageExpirer = new ContainerImageExpirer(controller, Duration.ofHours(2)); + outstandingChangeDeployer = new OutstandingChangeDeployer(controller, intervals.outstandingChangeDeployer); + versionStatusUpdater = new VersionStatusUpdater(controller, intervals.versionStatusUpdater); + upgrader = new Upgrader(controller, intervals.defaultInterval); + readyJobsTrigger = new ReadyJobsTrigger(controller, intervals.readyJobsTrigger); + deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(controller, intervals.deploymentMetricsMaintainer); + applicationOwnershipConfirmer = new ApplicationOwnershipConfirmer(controller, intervals.applicationOwnershipConfirmer, controller.serviceRegistry().ownershipIssues()); + systemUpgrader = new SystemUpgrader(controller, intervals.systemUpgrader); + jobRunner = new JobRunner(controller, intervals.jobRunner); + osUpgraders = osUpgraders(controller, intervals.osUpgrader); + osVersionStatusUpdater = new OsVersionStatusUpdater(controller, intervals.defaultInterval); + contactInformationMaintainer = new ContactInformationMaintainer(controller, intervals.contactInformationMaintainer); + nameServiceDispatcher = new NameServiceDispatcher(controller, intervals.nameServiceDispatcher); + costReportMaintainer = new CostReportMaintainer(controller, intervals.costReportMaintainer, controller.serviceRegistry().costReportConsumer()); + resourceMeterMaintainer = new ResourceMeterMaintainer(controller, intervals.resourceMeterMaintainer, metric, controller.serviceRegistry().meteringService()); + cloudEventReporter = new CloudEventReporter(controller, intervals.cloudEventReporter, metric); + rotationStatusUpdater = new RotationStatusUpdater(controller, intervals.defaultInterval); + resourceTagMaintainer = new ResourceTagMaintainer(controller, intervals.resourceTagMaintainer, controller.serviceRegistry().resourceTagger()); + systemRoutingPolicyMaintainer = new SystemRoutingPolicyMaintainer(controller, intervals.systemRoutingPolicyMaintainer); + applicationMetaDataGarbageCollector = new ApplicationMetaDataGarbageCollector(controller, intervals.applicationMetaDataGarbageCollector); + hostRepairMaintainer = new HostRepairMaintainer(controller, intervals.hostRepairMaintainer); + containerImageExpirer = new ContainerImageExpirer(controller, intervals.containerImageExpirer); } public Upgrader upgrader() { return upgrader; } @@ -113,13 +114,72 @@ public class ControllerMaintenance extends AbstractComponent { } /** Create one OS upgrader per cloud found in the zone registry of controller */ - private static List<OsUpgrader> osUpgraders(Controller controller) { + private static List<OsUpgrader> osUpgraders(Controller controller, Duration interval) { return controller.zoneRegistry().zones().controllerUpgraded().zones().stream() .map(ZoneApi::getCloudName) .distinct() .sorted() - .map(cloud -> new OsUpgrader(controller, Duration.ofMinutes(1), cloud)) + .map(cloud -> new OsUpgrader(controller, interval, cloud)) .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList)); } + private static class Intervals { + + private static final Duration MAX_CD_INTERVAL = Duration.ofHours(1); + + private final SystemName system; + + private final Duration defaultInterval; + private final Duration outstandingChangeDeployer; + private final Duration versionStatusUpdater; + private final Duration readyJobsTrigger; + private final Duration deploymentMetricsMaintainer; + private final Duration applicationOwnershipConfirmer; + private final Duration systemUpgrader; + private final Duration jobRunner; + private final Duration osUpgrader; + private final Duration contactInformationMaintainer; + private final Duration nameServiceDispatcher; + private final Duration costReportMaintainer; + private final Duration resourceMeterMaintainer; + private final Duration cloudEventReporter; + private final Duration resourceTagMaintainer; + private final Duration systemRoutingPolicyMaintainer; + private final Duration applicationMetaDataGarbageCollector; + private final Duration hostRepairMaintainer; + private final Duration containerImageExpirer; + + public Intervals(SystemName system) { + this.system = Objects.requireNonNull(system); + this.defaultInterval = duration(system.isCd() || system == SystemName.dev ? 1 : 5, MINUTES); + this.outstandingChangeDeployer = duration(3, MINUTES); + this.versionStatusUpdater = duration(3, MINUTES); + this.readyJobsTrigger = duration(1, MINUTES); + this.deploymentMetricsMaintainer = duration(5, MINUTES); + this.applicationOwnershipConfirmer = duration(12, HOURS); + this.systemUpgrader = duration(1, MINUTES); + this.jobRunner = duration(90, SECONDS); + this.osUpgrader = duration(1, MINUTES); + this.contactInformationMaintainer = duration(12, HOURS); + this.nameServiceDispatcher = duration(10, SECONDS); + this.costReportMaintainer = duration(2, HOURS); + this.resourceMeterMaintainer = duration(1, MINUTES); + this.cloudEventReporter = duration(30, MINUTES); + this.resourceTagMaintainer = duration(30, MINUTES); + this.systemRoutingPolicyMaintainer = duration(10, MINUTES); + this.applicationMetaDataGarbageCollector = duration(12, HOURS); + this.hostRepairMaintainer = duration(12, HOURS); + this.containerImageExpirer = duration(2, HOURS); + } + + private Duration duration(long amount, TemporalUnit unit) { + Duration duration = Duration.of(amount, unit); + if (system.isCd() && duration.compareTo(MAX_CD_INTERVAL) > 0) { + return MAX_CD_INTERVAL; // Ensure that maintainer is given enough time to run in CD + } + return duration; + } + + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 5ac89cc54be..2639f366a07 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -18,7 +18,6 @@ import java.time.Duration; import java.util.Collection; import java.util.LinkedHashMap; import java.util.Map; -import java.util.Objects; import java.util.Optional; import java.util.Random; import java.util.function.BinaryOperator; @@ -42,9 +41,9 @@ public class Upgrader extends ControllerMaintainer { private final CuratorDb curator; private final Random random; - public Upgrader(Controller controller, Duration interval, CuratorDb curator) { + public Upgrader(Controller controller, Duration interval) { super(controller, interval); - this.curator = Objects.requireNonNull(curator, "curator cannot be null"); + this.curator = controller.curator(); this.random = new Random(controller.clock().instant().toEpochMilli()); // Seed with clock for test determinism } diff --git a/controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def b/controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def deleted file mode 100644 index 7ec8860bef4..00000000000 --- a/controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.hosted.controller.maintenance.config - -intervalMinutes int default=30 diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java index 7fd02a8e780..b939598c704 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentTester.java @@ -1,7 +1,6 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.deployment; -import com.yahoo.concurrent.maintenance.JobControl; import com.yahoo.config.provision.ApplicationId; import com.yahoo.test.ManualClock; import com.yahoo.vespa.hosted.controller.Application; @@ -19,7 +18,6 @@ import com.yahoo.vespa.hosted.controller.maintenance.JobRunnerTest; import com.yahoo.vespa.hosted.controller.maintenance.OutstandingChangeDeployer; import com.yahoo.vespa.hosted.controller.maintenance.ReadyJobsTrigger; import com.yahoo.vespa.hosted.controller.maintenance.Upgrader; -import com.yahoo.vespa.hosted.controller.persistence.JobControlFlags; import java.time.DayOfWeek; import java.time.Duration; @@ -77,10 +75,9 @@ public class DeploymentTester { tester = controllerTester; jobs = tester.controller().jobController(); cloud = (MockTesterCloud) tester.controller().jobController().cloud(); - var jobControl = new JobControl(new JobControlFlags(tester.controller().curator(), tester.controller().flagSource())); - runner = new JobRunner(tester.controller(), Duration.ofDays(1), - JobRunnerTest.inThreadExecutor(), new InternalStepRunner(tester.controller())); - upgrader = new Upgrader(tester.controller(), maintenanceInterval, tester.curator()); + runner = new JobRunner(tester.controller(), maintenanceInterval, JobRunnerTest.inThreadExecutor(), + new InternalStepRunner(tester.controller())); + upgrader = new Upgrader(tester.controller(), maintenanceInterval); upgrader.setUpgradesPerMinute(1); // Anything that makes it at least one for any maintenance period is fine. readyJobsTrigger = new ReadyJobsTrigger(tester.controller(), maintenanceInterval); outstandingChangeDeployer = new OutstandingChangeDeployer(tester.controller(), maintenanceInterval); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java index ba6da2a02b8..6370cfedc41 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemUpgraderTest.java @@ -266,7 +266,7 @@ public class SystemUpgraderTest { convergeServices(SystemApplication.proxy, zone1); // Confidence is reduced to broken and next zone is not scheduled for upgrade - new Upgrader(tester.controller(), Duration.ofDays(1), tester.curator()) + new Upgrader(tester.controller(), Duration.ofDays(1)) .overrideConfidence(version2, VespaVersion.Confidence.broken); tester.computeVersionStatus(); systemUpgrader.maintain(); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java index eb8e154ee0c..a6acfce877a 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/UpgraderTest.java @@ -615,8 +615,7 @@ public class UpgraderTest { tester.controllerTester().upgradeSystem(version); // Setup our own upgrader as we need to control the interval - Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10), - tester.controllerTester().curator()); + Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10)); upgrader.setUpgradesPerMinute(0.2); // Setup applications @@ -1084,8 +1083,7 @@ public class UpgraderTest { // Throttle upgrades per run ((ManualClock) tester.controller().clock()).setInstant(Instant.ofEpochMilli(1589787109000L)); // Fixed random seed - Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10), - tester.controllerTester().curator()); + Upgrader upgrader = new Upgrader(tester.controller(), Duration.ofMinutes(10)); upgrader.setUpgradesPerMinute(0.1); // Trigger some upgrades |