summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java26
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java7
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java7
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java108
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java14
7 files changed, 77 insertions, 94 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java
index 21462bc4fec..fa82744de90 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.config.server.maintenance;
import com.google.inject.Inject;
import com.yahoo.cloud.config.ConfigserverConfig;
import com.yahoo.component.AbstractComponent;
+import com.yahoo.concurrent.maintenance.Maintainer;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.config.server.ApplicationRepository;
import com.yahoo.vespa.config.server.ConfigServerBootstrap;
@@ -13,6 +14,8 @@ import com.yahoo.vespa.flags.FlagSource;
import java.time.Clock;
import java.time.Duration;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
/**
* Maintenance jobs of the config server.
@@ -24,11 +27,7 @@ import java.time.Duration;
*/
public class ConfigServerMaintenance extends AbstractComponent {
- private final TenantsMaintainer tenantsMaintainer;
- private final FileDistributionMaintainer fileDistributionMaintainer;
- private final SessionsMaintainer sessionsMaintainer;
- private final ApplicationPackageMaintainer applicationPackageMaintainer;
- private final ReindexingMaintainer reindexingMaintainer;
+ private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>();
@Inject
public ConfigServerMaintenance(ConfigServerBootstrap configServerBootstrap,
@@ -38,20 +37,17 @@ public class ConfigServerMaintenance extends AbstractComponent {
FlagSource flagSource,
ConfigConvergenceChecker convergence) {
DefaultTimes defaults = new DefaultTimes(configserverConfig);
- tenantsMaintainer = new TenantsMaintainer(applicationRepository, curator, flagSource, defaults.defaultInterval, Clock.systemUTC());
- fileDistributionMaintainer = new FileDistributionMaintainer(applicationRepository, curator, defaults.defaultInterval, flagSource);
- sessionsMaintainer = new SessionsMaintainer(applicationRepository, curator, Duration.ofSeconds(30), flagSource);
- applicationPackageMaintainer = new ApplicationPackageMaintainer(applicationRepository, curator, Duration.ofSeconds(30), flagSource);
- reindexingMaintainer = new ReindexingMaintainer(applicationRepository, curator, flagSource, Duration.ofMinutes(3), convergence, Clock.systemUTC());
+ maintainers.add(new TenantsMaintainer(applicationRepository, curator, flagSource, defaults.defaultInterval, Clock.systemUTC()));
+ maintainers.add(new FileDistributionMaintainer(applicationRepository, curator, defaults.defaultInterval, flagSource));
+ maintainers.add(new SessionsMaintainer(applicationRepository, curator, Duration.ofSeconds(30), flagSource));
+ maintainers.add(new ApplicationPackageMaintainer(applicationRepository, curator, Duration.ofSeconds(30), flagSource));
+ maintainers.add(new ReindexingMaintainer(applicationRepository, curator, flagSource, Duration.ofMinutes(3), convergence, Clock.systemUTC()));
}
@Override
public void deconstruct() {
- fileDistributionMaintainer.close();
- sessionsMaintainer.close();
- applicationPackageMaintainer.close();
- tenantsMaintainer.close();
- reindexingMaintainer.close();
+ maintainers.forEach(Maintainer::shutdown);
+ maintainers.forEach(Maintainer::close);
}
/*
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
index 06e489c7f58..6266e8da57e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.google.inject.Inject;
import com.yahoo.component.AbstractComponent;
+import com.yahoo.concurrent.maintenance.Maintainer;
import com.yahoo.config.provision.SystemName;
import com.yahoo.config.provision.zone.ZoneApi;
import com.yahoo.jdisc.Metric;
@@ -14,6 +15,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
+import java.util.concurrent.CopyOnWriteArrayList;
import java.util.stream.Collectors;
import static java.time.temporal.ChronoUnit.HOURS;
@@ -30,7 +32,7 @@ import static java.time.temporal.ChronoUnit.SECONDS;
public class ControllerMaintenance extends AbstractComponent {
private final Upgrader upgrader;
- private final List<ControllerMaintainer> maintainers = new ArrayList<>();
+ private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>();
@Inject
@SuppressWarnings("unused") // instantiated by Dependency Injection
@@ -68,7 +70,8 @@ public class ControllerMaintenance extends AbstractComponent {
@Override
public void deconstruct() {
- maintainers.forEach(ControllerMaintainer::close);
+ maintainers.forEach(Maintainer::shutdown);
+ maintainers.forEach(Maintainer::close);
}
/** Create one OS upgrader per cloud found in the zone registry of controller */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
index f08a23ab8ed..8823959fb9b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
@@ -56,9 +56,14 @@ public class JobRunner extends ControllerMaintainer {
}
@Override
+ public void shutdown() {
+ super.shutdown();
+ executors.shutdown();
+ }
+
+ @Override
public void close() {
super.close();
- executors.shutdown();
try {
if ( ! executors.awaitTermination(10, TimeUnit.SECONDS)) {
executors.shutdownNow();
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java
index 11d3962a7d6..96e9f087a67 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java
@@ -65,7 +65,7 @@ public class ReindexingTriggerer extends ControllerMaintainer {
static Duration offset(ApplicationId id, ZoneId zone) {
double relativeOffset = ((id.serializedForm() + zone.value()).hashCode() & (-1 >>> 1)) / (double) (-1 >>> 1);
- return Duration.ofMillis((long) (reindexingPeriod.toMillis() * (relativeOffset)));
+ return Duration.ofMillis((long) (reindexingPeriod.toMillis() * relativeOffset));
}
static boolean reindexingIsReady(ApplicationReindexing reindexing, Instant now) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
index 5df45bbc1b1..2c3785114de 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
@@ -95,9 +95,14 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer {
}
@Override
+ public void shutdown() {
+ super.shutdown();
+ this.deploymentExecutor.shutdownNow();
+ }
+
+ @Override
public void close() {
super.close();
- this.deploymentExecutor.shutdownNow();
try {
// Give deployments in progress some time to complete
this.deploymentExecutor.awaitTermination(1, TimeUnit.MINUTES);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index ad835901ebf..b6e8bc145c5 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.google.inject.Inject;
import com.yahoo.component.AbstractComponent;
+import com.yahoo.concurrent.maintenance.Maintainer;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostLivenessTracker;
@@ -18,7 +19,8 @@ import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
import java.time.Duration;
-import java.util.Optional;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
/**
* A component which sets up all the node repo maintenance jobs.
@@ -27,28 +29,7 @@ import java.util.Optional;
*/
public class NodeRepositoryMaintenance extends AbstractComponent {
- private final NodeFailer nodeFailer;
- private final NodeHealthTracker nodeHealthTracker;
- private final PeriodicApplicationMaintainer periodicApplicationMaintainer;
- private final OperatorChangeApplicationMaintainer operatorChangeApplicationMaintainer;
- private final ReservationExpirer reservationExpirer;
- private final InactiveExpirer inactiveExpirer;
- private final RetiredExpirer retiredExpirer;
- private final FailedExpirer failedExpirer;
- private final DirtyExpirer dirtyExpirer;
- private final ProvisionedExpirer provisionedExpirer;
- private final NodeRebooter nodeRebooter;
- private final MetricsReporter metricsReporter;
- private final InfrastructureProvisioner infrastructureProvisioner;
- private final Optional<LoadBalancerExpirer> loadBalancerExpirer;
- private final Optional<DynamicProvisioningMaintainer> dynamicProvisioningMaintainer;
- private final SpareCapacityMaintainer spareCapacityMaintainer;
- private final OsUpgradeActivator osUpgradeActivator;
- private final Rebalancer rebalancer;
- private final NodeMetricsDbMaintainer nodeMetricsDbMaintainer;
- private final AutoscalingMaintainer autoscalingMaintainer;
- private final ScalingSuggestionsMaintainer scalingSuggestionsMaintainer;
- private final SwitchRebalancer switchRebalancer;
+ private final List<Maintainer> maintainers = new CopyOnWriteArrayList<>();
@SuppressWarnings("unused")
@Inject
@@ -59,60 +40,45 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
MetricsFetcher metricsFetcher, MetricsDb metricsDb) {
DefaultTimes defaults = new DefaultTimes(zone, deployer);
- nodeFailer = new NodeFailer(deployer, nodeRepository, defaults.failGrace, defaults.nodeFailerInterval, orchestrator, defaults.throttlePolicy, metric);
- nodeHealthTracker = new NodeHealthTracker(hostLivenessTracker, serviceMonitor, nodeRepository, defaults.nodeFailureStatusUpdateInterval, metric);
- periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, metric, nodeRepository,
- defaults.redeployMaintainerInterval, defaults.periodicRedeployInterval, flagSource);
- operatorChangeApplicationMaintainer = new OperatorChangeApplicationMaintainer(deployer, metric, nodeRepository, defaults.operatorChangeRedeployInterval);
- reservationExpirer = new ReservationExpirer(nodeRepository, defaults.reservationExpiry, metric);
- retiredExpirer = new RetiredExpirer(nodeRepository, orchestrator, deployer, metric, defaults.retiredInterval, defaults.retiredExpiry);
- inactiveExpirer = new InactiveExpirer(nodeRepository, defaults.inactiveExpiry, metric);
- failedExpirer = new FailedExpirer(nodeRepository, zone, defaults.failedExpirerInterval, metric);
- dirtyExpirer = new DirtyExpirer(nodeRepository, defaults.dirtyExpiry, metric);
- provisionedExpirer = new ProvisionedExpirer(nodeRepository, defaults.provisionedExpiry, metric);
- nodeRebooter = new NodeRebooter(nodeRepository, flagSource, metric);
- metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval);
- infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval, metric);
- loadBalancerExpirer = provisionServiceProvider.getLoadBalancerService(nodeRepository).map(lbService ->
- new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric));
- dynamicProvisioningMaintainer = provisionServiceProvider.getHostProvisioner().map(hostProvisioner ->
- new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric));
- spareCapacityMaintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval);
- osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric);
- rebalancer = new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval);
- nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, metricsFetcher, metricsDb, defaults.nodeMetricsCollectionInterval, metric);
- autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval);
- scalingSuggestionsMaintainer = new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric);
- switchRebalancer = new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer);
-
+ PeriodicApplicationMaintainer periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, metric, nodeRepository, defaults.redeployMaintainerInterval,
+ defaults.periodicRedeployInterval, flagSource);
+ InfrastructureProvisioner infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval, metric);
+ maintainers.add(periodicApplicationMaintainer);
+ maintainers.add(infrastructureProvisioner);
+
+ maintainers.add(new NodeFailer(deployer, nodeRepository, defaults.failGrace, defaults.nodeFailerInterval, orchestrator, defaults.throttlePolicy, metric));
+ maintainers.add(new NodeHealthTracker(hostLivenessTracker, serviceMonitor, nodeRepository, defaults.nodeFailureStatusUpdateInterval, metric));
+ maintainers.add(new OperatorChangeApplicationMaintainer(deployer, metric, nodeRepository, defaults.operatorChangeRedeployInterval));
+ maintainers.add(new ReservationExpirer(nodeRepository, defaults.reservationExpiry, metric));
+ maintainers.add(new RetiredExpirer(nodeRepository, orchestrator, deployer, metric, defaults.retiredInterval, defaults.retiredExpiry));
+ maintainers.add(new InactiveExpirer(nodeRepository, defaults.inactiveExpiry, metric));
+ maintainers.add(new FailedExpirer(nodeRepository, zone, defaults.failedExpirerInterval, metric));
+ maintainers.add(new DirtyExpirer(nodeRepository, defaults.dirtyExpiry, metric));
+ maintainers.add(new ProvisionedExpirer(nodeRepository, defaults.provisionedExpiry, metric));
+ maintainers.add(new NodeRebooter(nodeRepository, flagSource, metric));
+ maintainers.add(new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval));
+ maintainers.add(new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval));
+ maintainers.add(new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric));
+ maintainers.add(new Rebalancer(deployer, nodeRepository, metric, defaults.rebalancerInterval));
+ maintainers.add(new NodeMetricsDbMaintainer(nodeRepository, metricsFetcher, metricsDb, defaults.nodeMetricsCollectionInterval, metric));
+ maintainers.add(new AutoscalingMaintainer(nodeRepository, metricsDb, deployer, metric, defaults.autoscalingInterval));
+ maintainers.add(new ScalingSuggestionsMaintainer(nodeRepository, metricsDb, defaults.scalingSuggestionsInterval, metric));
+ maintainers.add(new SwitchRebalancer(nodeRepository, defaults.switchRebalancerInterval, metric, deployer));
+
+ provisionServiceProvider.getLoadBalancerService(nodeRepository)
+ .map(lbService -> new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric))
+ .ifPresent(maintainers::add);
+ provisionServiceProvider.getHostProvisioner()
+ .map(hostProvisioner -> new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric))
+ .ifPresent(maintainers::add);
// The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now
infrastructureProvisioner.maintainButThrowOnException();
}
@Override
public void deconstruct() {
- nodeFailer.close();
- nodeHealthTracker.close();
- periodicApplicationMaintainer.close();
- operatorChangeApplicationMaintainer.close();
- reservationExpirer.close();
- inactiveExpirer.close();
- retiredExpirer.close();
- failedExpirer.close();
- dirtyExpirer.close();
- nodeRebooter.close();
- spareCapacityMaintainer.close();
- provisionedExpirer.close();
- metricsReporter.close();
- infrastructureProvisioner.close();
- loadBalancerExpirer.ifPresent(NodeRepositoryMaintainer::close);
- dynamicProvisioningMaintainer.ifPresent(NodeRepositoryMaintainer::close);
- osUpgradeActivator.close();
- rebalancer.close();
- nodeMetricsDbMaintainer.close();
- autoscalingMaintainer.close();
- scalingSuggestionsMaintainer.close();
- switchRebalancer.close();
+ maintainers.forEach(Maintainer::shutdown);
+ maintainers.forEach(Maintainer::close);
}
private static class DefaultTimes {
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
index 2c123779a1e..2bf91775ecc 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
@@ -11,6 +11,7 @@ import java.util.Objects;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -21,7 +22,7 @@ import java.util.logging.Logger;
* @author mpolden
* @author jonmv
*/
-public abstract class Maintainer implements Runnable, AutoCloseable {
+public abstract class Maintainer implements Runnable {
protected final Logger log = Logger.getLogger(this.getClass().getName());
@@ -30,6 +31,7 @@ public abstract class Maintainer implements Runnable, AutoCloseable {
private final JobMetrics jobMetrics;
private final Duration interval;
private final ScheduledExecutorService service;
+ private AtomicBoolean shutDown = new AtomicBoolean();
public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl, JobMetrics jobMetrics, List<String> clusterHostnames) {
this(name, interval, staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames), jobControl, jobMetrics);
@@ -60,10 +62,16 @@ public abstract class Maintainer implements Runnable, AutoCloseable {
log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName());
}
- @Override
+ /** Starts shutdown of this, typically by shutting down executors. {@link #close()} waits for shutdown to complete. */
+ public void shutdown() {
+ if ( ! shutDown.getAndSet(true))
+ service.shutdown();
+ }
+
+ /** Waits for shutdown to complete, calling {@link #shutdown} if this hasn't been done already. */
public void close() {
+ shutdown();
var timeout = Duration.ofSeconds(30);
- service.shutdown();
try {
if (!service.awaitTermination(timeout.toMillis(), TimeUnit.MILLISECONDS)) {
log.log(Level.WARNING, "Maintainer " + name() + " failed to shutdown " +