diff options
author | Martin Polden <mpolden@mpolden.no> | 2020-11-04 13:40:56 +0100 |
---|---|---|
committer | Martin Polden <mpolden@mpolden.no> | 2020-11-04 14:24:42 +0100 |
commit | d618c2b1a6d50fd391a7a69ce39325485be35be6 (patch) | |
tree | cd53a516f1f7abd915e36b05b1aad070020a5cd4 | |
parent | 5c2e8e80d56db972d959b73f3cecf08e7b91f0de (diff) |
Enforce maximum maintenance interval for CD systems
2 files changed, 93 insertions, 36 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index bc63c235027..0c4164b523f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -3,17 +3,23 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.google.inject.Inject; import com.yahoo.component.AbstractComponent; +import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.controller.Controller; -import com.yahoo.vespa.hosted.controller.maintenance.config.MaintainerConfig; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.time.Duration; +import java.time.temporal.TemporalUnit; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.stream.Collectors; +import static java.time.temporal.ChronoUnit.HOURS; +import static java.time.temporal.ChronoUnit.MINUTES; +import static java.time.temporal.ChronoUnit.SECONDS; + /** * Maintenance jobs of the controller. * Each maintenance job is a singleton instance of its implementing class, created and owned by this, @@ -48,38 +54,34 @@ public class ControllerMaintenance extends AbstractComponent { private final HostRepairMaintainer hostRepairMaintainer; private final ContainerImageExpirer containerImageExpirer; - @Inject @SuppressWarnings("unused") // instantiated by Dependency Injection - public ControllerMaintenance(MaintainerConfig maintainerConfig, - Controller controller, - CuratorDb curator, - Metric metric) { - Duration maintenanceInterval = Duration.ofMinutes(maintainerConfig.intervalMinutes()); - deploymentExpirer = new DeploymentExpirer(controller, maintenanceInterval); - deploymentIssueReporter = new DeploymentIssueReporter(controller, controller.serviceRegistry().deploymentIssues(), maintenanceInterval); + public ControllerMaintenance(Controller controller, CuratorDb curator, Metric metric) { + Intervals intervals = new Intervals(controller.system()); + deploymentExpirer = new DeploymentExpirer(controller, intervals.defaultInterval); + deploymentIssueReporter = new DeploymentIssueReporter(controller, controller.serviceRegistry().deploymentIssues(), intervals.defaultInterval); metricsReporter = new MetricsReporter(controller, metric); - outstandingChangeDeployer = new OutstandingChangeDeployer(controller, Duration.ofMinutes(3)); - versionStatusUpdater = new VersionStatusUpdater(controller, Duration.ofMinutes(3)); - upgrader = new Upgrader(controller, maintenanceInterval, curator); - readyJobsTrigger = new ReadyJobsTrigger(controller, Duration.ofMinutes(1)); - deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(controller, Duration.ofMinutes(5)); - applicationOwnershipConfirmer = new ApplicationOwnershipConfirmer(controller, Duration.ofHours(12), controller.serviceRegistry().ownershipIssues()); - systemUpgrader = new SystemUpgrader(controller, Duration.ofMinutes(1)); - jobRunner = new JobRunner(controller, Duration.ofSeconds(90)); - osUpgraders = osUpgraders(controller); - osVersionStatusUpdater = new OsVersionStatusUpdater(controller, maintenanceInterval); - contactInformationMaintainer = new ContactInformationMaintainer(controller, Duration.ofHours(12)); - nameServiceDispatcher = new NameServiceDispatcher(controller, Duration.ofSeconds(10)); - costReportMaintainer = new CostReportMaintainer(controller, Duration.ofHours(2), controller.serviceRegistry().costReportConsumer()); - resourceMeterMaintainer = new ResourceMeterMaintainer(controller, Duration.ofMinutes(1), metric, controller.serviceRegistry().meteringService()); - cloudEventReporter = new CloudEventReporter(controller, Duration.ofMinutes(30), metric); - rotationStatusUpdater = new RotationStatusUpdater(controller, maintenanceInterval); - resourceTagMaintainer = new ResourceTagMaintainer(controller, Duration.ofMinutes(30), controller.serviceRegistry().resourceTagger()); - systemRoutingPolicyMaintainer = new SystemRoutingPolicyMaintainer(controller, Duration.ofMinutes(10)); - applicationMetaDataGarbageCollector = new ApplicationMetaDataGarbageCollector(controller, Duration.ofHours(12)); - hostRepairMaintainer = new HostRepairMaintainer(controller, Duration.ofHours(12)); - containerImageExpirer = new ContainerImageExpirer(controller, Duration.ofHours(2)); + outstandingChangeDeployer = new OutstandingChangeDeployer(controller, intervals.outstandingChangeDeployer); + versionStatusUpdater = new VersionStatusUpdater(controller, intervals.versionStatusUpdater); + upgrader = new Upgrader(controller, intervals.defaultInterval, curator); + readyJobsTrigger = new ReadyJobsTrigger(controller, intervals.readyJobsTrigger); + deploymentMetricsMaintainer = new DeploymentMetricsMaintainer(controller, intervals.deploymentMetricsMaintainer); + applicationOwnershipConfirmer = new ApplicationOwnershipConfirmer(controller, intervals.applicationOwnershipConfirmer, controller.serviceRegistry().ownershipIssues()); + systemUpgrader = new SystemUpgrader(controller, intervals.systemUpgrader); + jobRunner = new JobRunner(controller, intervals.jobRunner); + osUpgraders = osUpgraders(controller, intervals.osUpgrader); + osVersionStatusUpdater = new OsVersionStatusUpdater(controller, intervals.defaultInterval); + contactInformationMaintainer = new ContactInformationMaintainer(controller, intervals.contactInformationMaintainer); + nameServiceDispatcher = new NameServiceDispatcher(controller, intervals.nameServiceDispatcher); + costReportMaintainer = new CostReportMaintainer(controller, intervals.costReportMaintainer, controller.serviceRegistry().costReportConsumer()); + resourceMeterMaintainer = new ResourceMeterMaintainer(controller, intervals.resourceMeterMaintainer, metric, controller.serviceRegistry().meteringService()); + cloudEventReporter = new CloudEventReporter(controller, intervals.cloudEventReporter, metric); + rotationStatusUpdater = new RotationStatusUpdater(controller, intervals.defaultInterval); + resourceTagMaintainer = new ResourceTagMaintainer(controller, intervals.resourceTagMaintainer, controller.serviceRegistry().resourceTagger()); + systemRoutingPolicyMaintainer = new SystemRoutingPolicyMaintainer(controller, intervals.systemRoutingPolicyMaintainer); + applicationMetaDataGarbageCollector = new ApplicationMetaDataGarbageCollector(controller, intervals.applicationMetaDataGarbageCollector); + hostRepairMaintainer = new HostRepairMaintainer(controller, intervals.hostRepairMaintainer); + containerImageExpirer = new ContainerImageExpirer(controller, intervals.containerImageExpirer); } public Upgrader upgrader() { return upgrader; } @@ -113,13 +115,72 @@ public class ControllerMaintenance extends AbstractComponent { } /** Create one OS upgrader per cloud found in the zone registry of controller */ - private static List<OsUpgrader> osUpgraders(Controller controller) { + private static List<OsUpgrader> osUpgraders(Controller controller, Duration interval) { return controller.zoneRegistry().zones().controllerUpgraded().zones().stream() .map(ZoneApi::getCloudName) .distinct() .sorted() - .map(cloud -> new OsUpgrader(controller, Duration.ofMinutes(1), cloud)) + .map(cloud -> new OsUpgrader(controller, interval, cloud)) .collect(Collectors.collectingAndThen(Collectors.toList(), Collections::unmodifiableList)); } + private static class Intervals { + + private static final Duration MAX_CD_INTERVAL = Duration.ofHours(1); + + private final SystemName system; + + private final Duration defaultInterval; + private final Duration outstandingChangeDeployer; + private final Duration versionStatusUpdater; + private final Duration readyJobsTrigger; + private final Duration deploymentMetricsMaintainer; + private final Duration applicationOwnershipConfirmer; + private final Duration systemUpgrader; + private final Duration jobRunner; + private final Duration osUpgrader; + private final Duration contactInformationMaintainer; + private final Duration nameServiceDispatcher; + private final Duration costReportMaintainer; + private final Duration resourceMeterMaintainer; + private final Duration cloudEventReporter; + private final Duration resourceTagMaintainer; + private final Duration systemRoutingPolicyMaintainer; + private final Duration applicationMetaDataGarbageCollector; + private final Duration hostRepairMaintainer; + private final Duration containerImageExpirer; + + public Intervals(SystemName system) { + this.system = Objects.requireNonNull(system); + this.defaultInterval = duration(system.isCd() || system == SystemName.dev ? 1 : 5, MINUTES); + this.outstandingChangeDeployer = duration(3, MINUTES); + this.versionStatusUpdater = duration(3, MINUTES); + this.readyJobsTrigger = duration(1, MINUTES); + this.deploymentMetricsMaintainer = duration(5, MINUTES); + this.applicationOwnershipConfirmer = duration(12, HOURS); + this.systemUpgrader = duration(1, MINUTES); + this.jobRunner = duration(90, SECONDS); + this.osUpgrader = duration(1, MINUTES); + this.contactInformationMaintainer = duration(12, HOURS); + this.nameServiceDispatcher = duration(10, SECONDS); + this.costReportMaintainer = duration(2, HOURS); + this.resourceMeterMaintainer = duration(1, MINUTES); + this.cloudEventReporter = duration(30, MINUTES); + this.resourceTagMaintainer = duration(30, MINUTES); + this.systemRoutingPolicyMaintainer = duration(10, MINUTES); + this.applicationMetaDataGarbageCollector = duration(12, HOURS); + this.hostRepairMaintainer = duration(12, HOURS); + this.containerImageExpirer = duration(2, HOURS); + } + + private Duration duration(long amount, TemporalUnit unit) { + Duration duration = Duration.of(amount, unit); + if (system.isCd() && duration.compareTo(MAX_CD_INTERVAL) > 0) { + return MAX_CD_INTERVAL; // Ensure that maintainer is given enough time to run in CD + } + return duration; + } + + } + } diff --git a/controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def b/controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def deleted file mode 100644 index 7ec8860bef4..00000000000 --- a/controller-server/src/main/resources/configdefinitions/vespa.hosted.controller.maintenance.config.maintainer.def +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -namespace=vespa.hosted.controller.maintenance.config - -intervalMinutes int default=30 |