diff options
7 files changed, 77 insertions, 94 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java index 21462bc4fec..fa82744de90 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.config.server.maintenance; import com.google.inject.Inject; import com.yahoo.cloud.config.ConfigserverConfig; import com.yahoo.component.AbstractComponent; +import com.yahoo.concurrent.maintenance.Maintainer; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.config.server.ApplicationRepository; import com.yahoo.vespa.config.server.ConfigServerBootstrap; @@ -13,6 +14,8 @@ import com.yahoo.vespa.flags.FlagSource; import java.time.Clock; import java.time.Duration; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; /** * Maintenance jobs of the config server. @@ -24,11 +27,7 @@ import java.time.Duration; */ public class ConfigServerMaintenance extends AbstractComponent { - private final TenantsMaintainer tenantsMaintainer; - private final FileDistributionMaintainer fileDistributionMaintainer; - private final SessionsMaintainer sessionsMaintainer; - private final ApplicationPackageMaintainer applicationPackageMaintainer; - private final ReindexingMaintainer reindexingMaintainer; + private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>(); @Inject public ConfigServerMaintenance(ConfigServerBootstrap configServerBootstrap, @@ -38,20 +37,17 @@ public class ConfigServerMaintenance extends AbstractComponent { FlagSource flagSource, ConfigConvergenceChecker convergence) { DefaultTimes defaults = new DefaultTimes(configserverConfig); - tenantsMaintainer = new TenantsMaintainer(applicationRepository, curator, flagSource, defaults.defaultInterval, Clock.systemUTC()); - fileDistributionMaintainer = new FileDistributionMaintainer(applicationRepository, curator, defaults.defaultInterval, flagSource); - sessionsMaintainer = new SessionsMaintainer(applicationRepository, curator, Duration.ofSeconds(30), flagSource); - applicationPackageMaintainer = new ApplicationPackageMaintainer(applicationRepository, curator, Duration.ofSeconds(30), flagSource); - reindexingMaintainer = new ReindexingMaintainer(applicationRepository, curator, flagSource, Duration.ofMinutes(3), convergence, Clock.systemUTC()); + maintainers.add(new TenantsMaintainer(applicationRepository, curator, flagSource, defaults.defaultInterval, Clock.systemUTC())); + maintainers.add(new FileDistributionMaintainer(applicationRepository, curator, defaults.defaultInterval, flagSource)); + maintainers.add(new SessionsMaintainer(applicationRepository, curator, Duration.ofSeconds(30), flagSource)); + maintainers.add(new ApplicationPackageMaintainer(applicationRepository, curator, Duration.ofSeconds(30), flagSource)); + maintainers.add(new ReindexingMaintainer(applicationRepository, curator, flagSource, Duration.ofMinutes(3), convergence, Clock.systemUTC())); } @Override public void deconstruct() { - fileDistributionMaintainer.close(); - sessionsMaintainer.close(); - applicationPackageMaintainer.close(); - tenantsMaintainer.close(); - reindexingMaintainer.close(); + maintainers.forEach(Maintainer::shutdown); + maintainers.forEach(Maintainer::close); } /* diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 06e489c7f58..6266e8da57e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.google.inject.Inject; import com.yahoo.component.AbstractComponent; +import com.yahoo.concurrent.maintenance.Maintainer; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.zone.ZoneApi; import com.yahoo.jdisc.Metric; @@ -14,6 +15,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Objects; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.stream.Collectors; import static java.time.temporal.ChronoUnit.HOURS; @@ -30,7 +32,7 @@ import static java.time.temporal.ChronoUnit.SECONDS; public class ControllerMaintenance extends AbstractComponent { private final Upgrader upgrader; - private final List<ControllerMaintainer> maintainers = new ArrayList<>(); + private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>(); @Inject @SuppressWarnings("unused") // instantiated by Dependency Injection @@ -68,7 +70,8 @@ public class ControllerMaintenance extends AbstractComponent { @Override public void deconstruct() { - maintainers.forEach(ControllerMaintainer::close); + maintainers.forEach(Maintainer::shutdown); + maintainers.forEach(Maintainer::close); } /** Create one OS upgrader per cloud found in the zone registry of controller */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index f08a23ab8ed..8823959fb9b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -56,9 +56,14 @@ public class JobRunner extends ControllerMaintainer { } @Override + public void shutdown() { + super.shutdown(); + executors.shutdown(); + } + + @Override public void close() { super.close(); - executors.shutdown(); try { if ( ! executors.awaitTermination(10, TimeUnit.SECONDS)) { executors.shutdownNow(); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java index 11d3962a7d6..96e9f087a67 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java @@ -65,7 +65,7 @@ public class ReindexingTriggerer extends ControllerMaintainer { static Duration offset(ApplicationId id, ZoneId zone) { double relativeOffset = ((id.serializedForm() + zone.value()).hashCode() & (-1 >>> 1)) / (double) (-1 >>> 1); - return Duration.ofMillis((long) (reindexingPeriod.toMillis() * (relativeOffset))); + return Duration.ofMillis((long) (reindexingPeriod.toMillis() * relativeOffset)); } static boolean reindexingIsReady(ApplicationReindexing reindexing, Instant now) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index 5df45bbc1b1..2c3785114de 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -95,9 +95,14 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer { } @Override + public void shutdown() { + super.shutdown(); + this.deploymentExecutor.shutdownNow(); + } + + @Override public void close() { super.close(); - this.deploymentExecutor.shutdownNow(); try { // Give deployments in progress some time to complete this.deploymentExecutor.awaitTermination(1, TimeUnit.MINUTES); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index ad835901ebf..b6e8bc145c5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.google.inject.Inject; import com.yahoo.component.AbstractComponent; +import com.yahoo.concurrent.maintenance.Maintainer; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.HostLivenessTracker; @@ -18,7 +19,8 @@ import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.time.Duration; -import java.util.Optional; +import java.util.List; +import java.util.concurrent.CopyOnWriteArrayList; /** * A component which sets up all the node repo maintenance jobs. @@ -27,28 +29,7 @@ import java.util.Optional; */ public class NodeRepositoryMaintenance extends AbstractComponent { - private final NodeFailer nodeFailer; - private final NodeHealthTracker nodeHealthTracker; - private final PeriodicApplicationMaintainer periodicApplicationMaintainer; - private final OperatorChangeApplicationMaintainer operatorChangeApplicationMaintainer; - private final ReservationExpirer reservationExpirer; - private final InactiveExpirer inactiveExpirer; - private final RetiredExpirer retiredExpirer; - private final FailedExpirer failedExpirer; - private final DirtyExpirer dirtyExpirer; - private final ProvisionedExpirer provisionedExpirer; - private final NodeRebooter nodeRebooter; - private final MetricsReporter metricsReporter; - private final InfrastructureProvisioner infrastructureProvisioner; - private final Optional<LoadBalancerExpirer> loadBalancerExpirer; - private final Optional<DynamicProvisioningMaintainer> dynamicProvisioningMaintainer; - private final SpareCapacityMaintainer spareCapacityMaintainer; - private final OsUpgradeActivator osUpgradeActivator; - private final Rebalancer rebalancer; - private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer; - private final AutoscalingMaintainer autoscalingMaintainer; - private final ScalingSuggestionsMaintainer scalingSuggestionsMaintainer; - private final SwitchRebalancer switchRebalancer; + private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>(); @SuppressWarnings("unused") @Inject @@ -59,60 +40,45 @@ public class NodeRepositoryMaintenance extends AbstractComponent { MetricsFetcher metricsFetcher, MetricsDb metricsDb) { DefaultTimes defaults = new DefaultTimes(zone, deployer); - nodeFailer = new NodeFailer(deployer, nodeRepository, defaults.failGrace, defaults.nodeFailerInterval, orchestrator, defaults.throttlePolicy, metric); - nodeHealthTracker = new NodeHealthTracker(hostLivenessTracker, serviceMonitor, nodeRepository, defaults.nodeFailureStatusUpdateInterval, metric); - periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, metric, nodeRepository, - defaults.redeployMaintainerInterval, defaults.periodicRedeployInterval, flagSource); - operatorChangeApplicationMaintainer = new OperatorChangeApplicationMaintainer(deployer, metric, nodeRepository, defaults.operatorChangeRedeployInterval); - reservationExpirer = new ReservationExpirer(nodeRepository, defaults.reservationExpiry, metric); - retiredExpirer = new RetiredExpirer(nodeRepository, orchestrator, deployer, metric, defaults.retiredInterval, defaults.retiredExpiry); - inactiveExpirer = new InactiveExpirer(nodeRepository, defaults.inactiveExpiry, metric); - failedExpirer = new FailedExpirer(nodeRepository, zone, defaults.failedExpirerInterval, metric); - dirtyExpirer = new DirtyExpirer(nodeRepository, defaults.dirtyExpiry, metric); - provisionedExpirer = new ProvisionedExpirer(nodeRepository, defaults.provisionedExpiry, metric); - nodeRebooter = new NodeRebooter(nodeRepository, flagSource, metric); - metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval); - infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval, metric); - loadBalancerExpirer = provisionServiceProvider.getLoadBalancerService(nodeRepository).map(lbService -> - new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric)); - dynamicProvisioningMaintainer = provisionServiceProvider.getHostProvisioner().map(hostProvisioner -> - new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric)); - spareCapacityMaintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval); - osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric); - rebalancer = new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval); - nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, metricsFetcher, metricsDb, defaults.nodeMetricsCollectionInterval, metric); - autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval); - scalingSuggestionsMaintainer = new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric); - switchRebalancer = new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer); - + PeriodicApplicationMaintainer periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, metric, nodeRepository, defaults.redeployMaintainerInterval, + defaults.periodicRedeployInterval, flagSource); + InfrastructureProvisioner infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval, metric); + maintainers.add(periodicApplicationMaintainer); + maintainers.add(infrastructureProvisioner); + + maintainers.add(new NodeFailer(deployer, nodeRepository, defaults.failGrace, defaults.nodeFailerInterval, orchestrator, defaults.throttlePolicy, metric)); + maintainers.add(new NodeHealthTracker(hostLivenessTracker, serviceMonitor, nodeRepository, defaults.nodeFailureStatusUpdateInterval, metric)); + maintainers.add(new OperatorChangeApplicationMaintainer(deployer, metric, nodeRepository, defaults.operatorChangeRedeployInterval)); + maintainers.add(new ReservationExpirer(nodeRepository, defaults.reservationExpiry, metric)); + maintainers.add(new RetiredExpirer(nodeRepository, orchestrator, deployer, metric, defaults.retiredInterval, defaults.retiredExpiry)); + maintainers.add(new InactiveExpirer(nodeRepository, defaults.inactiveExpiry, metric)); + maintainers.add(new FailedExpirer(nodeRepository, zone, defaults.failedExpirerInterval, metric)); + maintainers.add(new DirtyExpirer(nodeRepository, defaults.dirtyExpiry, metric)); + maintainers.add(new ProvisionedExpirer(nodeRepository, defaults.provisionedExpiry, metric)); + maintainers.add(new NodeRebooter(nodeRepository, flagSource, metric)); + maintainers.add(new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval)); + maintainers.add(new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval)); + maintainers.add(new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric)); + maintainers.add(new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval)); + maintainers.add(new NodeMetricsDbMaintainer(nodeRepository, metricsFetcher, metricsDb, defaults.nodeMetricsCollectionInterval, metric)); + maintainers.add(new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval)); + maintainers.add(new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric)); + maintainers.add(new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer)); + + provisionServiceProvider.getLoadBalancerService(nodeRepository) + .map(lbService -> new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric)) + .ifPresent(maintainers::add); + provisionServiceProvider.getHostProvisioner() + .map(hostProvisioner -> new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric)) + .ifPresent(maintainers::add); // The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now infrastructureProvisioner.maintainButThrowOnException(); } @Override public void deconstruct() { - nodeFailer.close(); - nodeHealthTracker.close(); - periodicApplicationMaintainer.close(); - operatorChangeApplicationMaintainer.close(); - reservationExpirer.close(); - inactiveExpirer.close(); - retiredExpirer.close(); - failedExpirer.close(); - dirtyExpirer.close(); - nodeRebooter.close(); - spareCapacityMaintainer.close(); - provisionedExpirer.close(); - metricsReporter.close(); - infrastructureProvisioner.close(); - loadBalancerExpirer.ifPresent(NodeRepositoryMaintainer::close); - dynamicProvisioningMaintainer.ifPresent(NodeRepositoryMaintainer::close); - osUpgradeActivator.close(); - rebalancer.close(); - nodeMetricsDbMaintainer.close(); - autoscalingMaintainer.close(); - scalingSuggestionsMaintainer.close(); - switchRebalancer.close(); + maintainers.forEach(Maintainer::shutdown); + maintainers.forEach(Maintainer::close); } private static class DefaultTimes { diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java index 2c123779a1e..2bf91775ecc 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java @@ -11,6 +11,7 @@ import java.util.Objects; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; import java.util.logging.Logger; @@ -21,7 +22,7 @@ import java.util.logging.Logger; * @author mpolden * @author jonmv */ -public abstract class Maintainer implements Runnable, AutoCloseable { +public abstract class Maintainer implements Runnable { protected final Logger log = Logger.getLogger(this.getClass().getName()); @@ -30,6 +31,7 @@ public abstract class Maintainer implements Runnable, AutoCloseable { private final JobMetrics jobMetrics; private final Duration interval; private final ScheduledExecutorService service; + private AtomicBoolean shutDown = new AtomicBoolean(); public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl, JobMetrics jobMetrics, List<String> clusterHostnames) { this(name, interval, staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames), jobControl, jobMetrics); @@ -60,10 +62,16 @@ public abstract class Maintainer implements Runnable, AutoCloseable { log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName()); } - @Override + /** Starts shutdown of this, typically by shutting down executors. {@link #close()} waits for shutdown to complete. */ + public void shutdown() { + if ( ! shutDown.getAndSet(true)) + service.shutdown(); + } + + /** Waits for shutdown to complete, calling {@link #shutdown} if this hasn't been done already. */ public void close() { + shutdown(); var timeout = Duration.ofSeconds(30); - service.shutdown(); try { if (!service.awaitTermination(timeout.toMillis(), TimeUnit.MILLISECONDS)) { log.log(Level.WARNING, "Maintainer " + name() + " failed to shutdown " + |