summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Polden <mpolden@mpolden.no>2020-07-16 09:56:14 +0200
committerMartin Polden <mpolden@mpolden.no>2020-07-16 11:12:10 +0200
commitf3efc9b88eba737b5036a60a381ced8960a26560 (patch)
tree03885dbf38ad5bbe636dd87c3528727779ff8de1
parentc608c8384315cebdc8adacb012a8c49a09cc0340 (diff)
Emit QoS metric for all maintainers
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java8
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java7
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java17
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java4
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java4
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java4
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java23
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java28
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java5
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RotationStatusUpdater.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java3
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java4
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java26
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java15
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java24
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ProvisionedExpirer.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirer.java5
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java7
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java13
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java8
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java12
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirerTest.java6
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirerTest.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java4
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java3
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java3
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java41
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java19
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlStateMock.java35
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java85
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java38
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java49
67 files changed, 485 insertions, 235 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java
index bc9fa96f943..937cf4dfe7f 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/ApplicationRepository.java
@@ -206,6 +206,14 @@ public class ApplicationRepository implements com.yahoo.config.provision.Deploye
this.metric = metric;
}
+ public Clock clock() {
+ return clock;
+ }
+
+ public Metric metric() {
+ return metric;
+ }
+
// ---------------- Deploying ----------------------------------------------------------------
public PrepareResult prepare(Tenant tenant, long sessionId, PrepareParams prepareParams, Instant now) {
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java
index ccbad4e21c7..92044eab5fe 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java
@@ -50,8 +50,9 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer {
}
@Override
- protected void maintain() {
- if (! distributeApplicationPackage.value()) return;
+ protected boolean maintain() {
+ boolean success = true;
+ if (! distributeApplicationPackage.value()) return success;
try (var fileDownloader = new FileDownloader(createConnectionPool(configserverConfig), downloadDirectory)) {
for (var applicationId : applicationRepository.listApplications()) {
@@ -68,6 +69,7 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer {
log.fine(() -> "Downloading missing application package for application " + applicationId + " - session " + sessionId);
if (fileDownloader.getFile(applicationPackage).isEmpty()) {
+ success = false;
log.warning("Failed to download application package for application " + applicationId + " - session " + sessionId);
continue;
}
@@ -76,6 +78,7 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer {
}
}
}
+ return success;
}
private void createLocalSessionIfMissing(ApplicationId applicationId, long sessionId) {
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
index 5369bbef366..007ca8dcf53 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
@@ -3,7 +3,9 @@ package com.yahoo.vespa.config.server.maintenance;
import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.concurrent.maintenance.JobControlState;
+import com.yahoo.concurrent.maintenance.JobMetrics;
import com.yahoo.concurrent.maintenance.Maintainer;
+import com.yahoo.jdisc.Metric;
import com.yahoo.path.Path;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.config.server.ApplicationRepository;
@@ -12,7 +14,9 @@ import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.ListFlag;
+import java.time.Clock;
import java.time.Duration;
+import java.util.Map;
import java.util.Set;
/**
@@ -26,16 +30,25 @@ public abstract class ConfigServerMaintainer extends Maintainer {
ConfigServerMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource,
Duration initialDelay, Duration interval) {
- super(null, interval, initialDelay, new JobControl(new JobControlFlags(curator, flagSource)));
+ super(null, interval, initialDelay, new JobControl(new JobControlFlags(curator, flagSource)),
+ jobMetrics(applicationRepository.clock(), applicationRepository.metric()));
this.applicationRepository = applicationRepository;
}
+ private static JobMetrics jobMetrics(Clock clock, Metric metric) {
+ return new JobMetrics(clock, (job, instant) -> {
+ Duration sinceSuccess = Duration.between(instant, clock.instant());
+ metric.set("maintenance.secondsSinceSuccess", sinceSuccess.getSeconds(), metric.createContext(Map.of("job", job)));
+ });
+ }
+
private static class JobControlFlags implements JobControlState {
private static final Path root = Path.fromString("/configserver/v1/");
- private static final Path lockRoot = root.append("locks");
+ private static final Path lockRoot = root.append("locks");
private final Curator curator;
+
private final ListFlag<String> inactiveJobsFlag;
public JobControlFlags(Curator curator, FlagSource flagSource) {
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java
index a6585be391c..adcaa3bb0e4 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintenance.java
@@ -5,6 +5,7 @@ import com.google.inject.Inject;
import com.yahoo.cloud.config.ConfigserverConfig;
import com.yahoo.component.AbstractComponent;
import com.yahoo.config.provision.SystemName;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.config.server.ApplicationRepository;
import com.yahoo.vespa.config.server.filedistribution.FileDistributionFactory;
import com.yahoo.vespa.curator.Curator;
@@ -31,7 +32,8 @@ public class ConfigServerMaintenance extends AbstractComponent {
ApplicationRepository applicationRepository,
Curator curator,
FileDistributionFactory fileDistributionFactory,
- FlagSource flagSource) {
+ FlagSource flagSource,
+ Metric metric) {
DefaultTimes defaults = new DefaultTimes(configserverConfig);
// TODO: Disabled until we have application metadata
//tenantsMaintainer = new TenantsMaintainer(applicationRepository, curator, defaults.tenantsMaintainerInterval);
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java
index ed57be799c7..835122c043c 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java
@@ -35,8 +35,8 @@ public class FileDistributionMaintainer extends ConfigServerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
applicationRepository.deleteUnusedFiledistributionReferences(fileReferencesDir, maxUnusedFileReferenceAge);
-
+ return true;
}
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java
index 4adf287448d..77da56588ba 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java
@@ -26,7 +26,7 @@ public class SessionsMaintainer extends ConfigServerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
applicationRepository.deleteExpiredLocalSessions();
// Expired remote sessions are sessions that belong to an application that have external deployments that
@@ -41,5 +41,7 @@ public class SessionsMaintainer extends ConfigServerMaintainer {
int deleted = applicationRepository.deleteExpiredLocks(lockExpiryTime);
if (deleted > 0)
log.log(LogLevel.INFO, "Deleted " + deleted + " locks older than " + lockExpiryTime);
+
+ return true;
}
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java
index 9a81d9f7547..d29eea842f5 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java
@@ -28,8 +28,9 @@ public class TenantsMaintainer extends ConfigServerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
applicationRepository.deleteUnusedTenants(ttlForUnusedTenant, clock.instant());
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
index 786819d9442..1f20e48edf5 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
@@ -17,6 +17,7 @@ import com.yahoo.yolean.Exceptions;
import java.time.Duration;
import java.util.HashMap;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
/**
@@ -38,14 +39,15 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
}
@Override
- protected void maintain() {
- confirmApplicationOwnerships();
- ensureConfirmationResponses();
- updateConfirmedApplicationOwners();
+ protected boolean maintain() {
+ return confirmApplicationOwnerships() &
+ ensureConfirmationResponses() &
+ updateConfirmedApplicationOwners();
}
/** File an ownership issue with the owners of all applications we know about. */
- private void confirmApplicationOwnerships() {
+ private boolean confirmApplicationOwnerships() {
+ AtomicBoolean success = new AtomicBoolean(true);
applications()
.withProjectId()
.withProductionDeployment()
@@ -63,10 +65,11 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
}).ifPresent(newIssueId -> store(newIssueId, application.id()));
}
catch (RuntimeException e) { // Catch errors due to wrong data in the controller, or issues client timeout.
+ success.set(false);
log.log(Level.INFO, "Exception caught when attempting to file an issue for '" + application.id() + "': " + Exceptions.toMessageString(e));
}
});
-
+ return success.get();
}
private ApplicationSummary summaryOf(TenantAndApplicationId application) {
@@ -85,7 +88,8 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
}
/** Escalate ownership issues which have not been closed before a defined amount of time has passed. */
- private void ensureConfirmationResponses() {
+ private boolean ensureConfirmationResponses() {
+ AtomicBoolean success = new AtomicBoolean(true);
for (Application application : applications())
application.ownershipIssueId().ifPresent(issueId -> {
try {
@@ -93,12 +97,14 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
ownershipIssues.ensureResponse(issueId, tenant.contact());
}
catch (RuntimeException e) {
+ success.set(false);
log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e));
}
});
+ return success.get();
}
- private void updateConfirmedApplicationOwners() {
+ private boolean updateConfirmedApplicationOwners() {
applications()
.withProjectId()
.withProductionDeployment()
@@ -112,6 +118,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
controller().applications().store(lockedApplication.withOwner(owner)));
});
});
+ return true;
}
private ApplicationList applications() {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
index 4b96bd404ee..10e5431dac1 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
@@ -24,6 +24,7 @@ import java.util.stream.Collectors;
* Automatically fetches and handles scheduled events from AWS:
* 1. Deprovisions the affected hosts if applicable
* 2. Submits an issue detailing the event if some hosts are not processed by 1.
+ *
* @author mgimle
*/
public class CloudEventReporter extends ControllerMaintainer {
@@ -44,8 +45,7 @@ public class CloudEventReporter extends ControllerMaintainer {
}
@Override
- protected void maintain() {
- log.log(Level.INFO, "Fetching events for cloud hosts.");
+ protected boolean maintain() {
for (var awsRegion : zonesByCloudNativeRegion.keySet()) {
List<CloudEvent> events = eventFetcher.getEvents(awsRegion);
for (var event : events) {
@@ -56,6 +56,7 @@ public class CloudEventReporter extends ControllerMaintainer {
submitIssue(event, deprovisionedHosts);
}
}
+ return true;
}
private List<String> deprovisionHosts(String awsRegion, CloudEvent event) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java
index 4aba8d881bf..e19f3b4f9a2 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java
@@ -35,8 +35,9 @@ public class ContactInformationMaintainer extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
TenantController tenants = controller().tenants();
+ boolean success = true;
for (Tenant tenant : tenants.asList()) {
log.log(INFO, "Updating contact information for " + tenant);
try {
@@ -55,11 +56,13 @@ public class ContactInformationMaintainer extends ControllerMaintainer {
throw new IllegalArgumentException("Unexpected tenant type '" + tenant.type() + "'.");
}
} catch (Exception e) {
+ success = false;
log.log(Level.WARNING, "Failed to update contact information for " + tenant + ": " +
Exceptions.toMessageString(e) + ". Retrying in " +
interval());
}
}
+ return success;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
index 2b7c78f96d0..76003a873fe 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
@@ -1,12 +1,16 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;
+import com.yahoo.concurrent.maintenance.JobMetrics;
import com.yahoo.concurrent.maintenance.Maintainer;
import com.yahoo.config.provision.SystemName;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.controller.Controller;
+import java.time.Clock;
import java.time.Duration;
import java.util.EnumSet;
+import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.logging.Logger;
@@ -30,7 +34,8 @@ public abstract class ControllerMaintainer extends Maintainer {
}
public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems) {
- super(name, interval, controller.clock().instant(), controller.jobControl(), controller.curator().cluster());
+ super(name, interval, controller.clock().instant(), controller.jobControl(),
+ jobMetrics(controller.clock(), controller.metric()), controller.curator().cluster());
this.controller = controller;
this.activeSystems = Set.copyOf(Objects.requireNonNull(activeSystems));
}
@@ -43,4 +48,11 @@ public abstract class ControllerMaintainer extends Maintainer {
super.run();
}
+ private static JobMetrics jobMetrics(Clock clock, Metric metric) {
+ return new JobMetrics(clock, (job, instant) -> {
+ Duration sinceSuccess = Duration.between(instant, clock.instant());
+ metric.set("maintenance.secondsSinceSuccess", sinceSuccess.getSeconds(), metric.createContext(Map.of("job", job)));
+ });
+ }
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java
index d028a88fb92..28b64b5bfe0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java
@@ -31,9 +31,10 @@ public class CostReportMaintainer extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
var csv = CostCalculator.resourceShareByPropertyToCsv(nodeRepository, controller(), clock, consumer.fixedAllocations());
consumer.consume(csv);
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java
index bb2161bca1d..7bd2c737fcb 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java
@@ -1,7 +1,6 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;
-import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.vespa.hosted.controller.Application;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.Instance;
@@ -24,7 +23,8 @@ public class DeploymentExpirer extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
+ boolean success = true;
for (Application application : controller().applications().readable())
for (Instance instance : application.instances().values())
for (Deployment deployment : instance.deployments().values()) {
@@ -34,11 +34,13 @@ public class DeploymentExpirer extends ControllerMaintainer {
log.log(Level.INFO, "Expiring deployment of " + instance.id() + " in " + deployment.zone());
controller().applications().deactivate(instance.id(), deployment.zone());
} catch (Exception e) {
+ success = false;
log.log(Level.WARNING, "Could not expire " + deployment + " of " + instance +
": " + Exceptions.toMessageString(e) + ". Retrying in " +
interval());
}
}
+ return success;
}
/** Returns whether given deployment has expired according to its TTL */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java
index 89f1e0fe840..a94e7407898 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java
@@ -2,7 +2,6 @@
package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.component.Version;
-import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.SystemName;
import com.yahoo.vespa.hosted.controller.Application;
@@ -20,6 +19,7 @@ import java.time.Duration;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
import static com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence.broken;
@@ -44,10 +44,10 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
}
@Override
- protected void maintain() {
- maintainDeploymentIssues(applications());
- maintainPlatformIssue(applications());
- escalateInactiveDeploymentIssues(applications());
+ protected boolean maintain() {
+ return maintainDeploymentIssues(applications()) &
+ maintainPlatformIssue(applications()) &
+ escalateInactiveDeploymentIssues(applications());
}
/** Returns the applications to maintain issue status for. */
@@ -62,7 +62,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
* and store the issue id for the filed issues. Also, clear the issueIds of applications
* where deployment has not failed for this amount of time.
*/
- private void maintainDeploymentIssues(List<Application> applications) {
+ private boolean maintainDeploymentIssues(List<Application> applications) {
List<TenantAndApplicationId> failingApplications = controller().jobController().deploymentStatuses(ApplicationList.from(applications))
.failingApplicationChangeSince(controller().clock().instant().minus(maxFailureAge))
.mapToList(status -> status.application().id());
@@ -72,6 +72,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
fileDeploymentIssueFor(application);
else
store(application.id(), null);
+ return true;
}
/**
@@ -79,24 +80,26 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
* applications that have been failing the upgrade to the system version for
* longer than the set grace period, or update this list if the issue already exists.
*/
- private void maintainPlatformIssue(List<Application> applications) {
+ private boolean maintainPlatformIssue(List<Application> applications) {
+ boolean success = true;
if (controller().system() == SystemName.cd)
- return;
+ return success;
Version systemVersion = controller().systemVersion();
if ((controller().versionStatus().version(systemVersion).confidence() != broken))
- return;
+ return success;
DeploymentStatusList statuses = controller().jobController().deploymentStatuses(ApplicationList.from(applications));
if (statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant().minus(upgradeGracePeriod)).isEmpty())
- return;
+ return success;
List<ApplicationId> failingApplications = statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant())
.mapToList(status -> status.application().id().defaultInstance());
// TODO jonmv: Send only tenant and application, here and elsewhere in this.
deploymentIssues.fileUnlessOpen(failingApplications, systemVersion);
+ return success;
}
private Tenant ownerOf(TenantAndApplicationId applicationId) {
@@ -121,7 +124,8 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
}
/** Escalate issues for which there has been no activity for a certain amount of time. */
- private void escalateInactiveDeploymentIssues(Collection<Application> applications) {
+ private boolean escalateInactiveDeploymentIssues(Collection<Application> applications) {
+ AtomicBoolean success = new AtomicBoolean(true);
applications.forEach(application -> application.deploymentIssueId().ifPresent(issueId -> {
try {
Tenant tenant = ownerOf(application.id());
@@ -130,9 +134,11 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
tenant.type() == Tenant.Type.athenz ? tenant.contact() : Optional.empty());
}
catch (RuntimeException e) {
+ success.set(false);
log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e));
}
}));
+ return success.get();
}
private void store(TenantAndApplicationId id, IssueId issueId) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
index c03be2ca1d1..c8416578932 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
@@ -1,7 +1,6 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;
-import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.config.provision.SystemName;
import com.yahoo.vespa.hosted.controller.ApplicationController;
import com.yahoo.vespa.hosted.controller.Controller;
@@ -39,7 +38,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
AtomicInteger failures = new AtomicInteger(0);
AtomicInteger attempts = new AtomicInteger(0);
AtomicReference<Exception> lastException = new AtomicReference<>(null);
@@ -91,6 +90,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer {
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
+ return lastException.get() == null;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
index 7006458538d..7952355d5fb 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
@@ -35,8 +35,9 @@ public abstract class InfrastructureUpgrader<VERSION> extends ControllerMaintain
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
targetVersion().ifPresent(target -> upgradeAll(target, SystemApplication.all()));
+ return true;
}
/** Deploy a list of system applications until they converge on the given version */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
index cfe9257bdf8..e0f2f0718ef 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
@@ -48,9 +48,10 @@ public class JobRunner extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
jobs.active().forEach(this::advance);
jobs.collectGarbage();
+ return true;
}
@Override
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
index cc4a8c628eb..0c5ef123eef 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
@@ -68,12 +68,13 @@ public class MetricsReporter extends ControllerMaintainer {
}
@Override
- public void maintain() {
+ public boolean maintain() {
reportDeploymentMetrics();
reportRemainingRotations();
reportQueuedNameServiceRequests();
reportInfrastructureUpgradeMetrics();
reportAuditLog();
+ return true;
}
private void reportAuditLog() {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java
index 9febc73a5a7..e223809a211 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java
@@ -38,12 +38,13 @@ public class NameServiceDispatcher extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
+ boolean success = true;
try (var lock = db.lockNameServiceQueue()) {
var queue = db.readNameServiceQueue();
var instant = clock.instant();
var remaining = queue.dispatchTo(nameService, requestCount);
- if (queue == remaining) return; // Queue unchanged
+ if (queue == remaining) return success; // Queue unchanged
var dispatched = queue.first(requestCount);
if (!dispatched.requests().isEmpty()) {
@@ -53,6 +54,7 @@ public class NameServiceDispatcher extends ControllerMaintainer {
}
db.writeNameServiceQueue(remaining);
}
+ return success;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
index a62b1745145..20febfaea1d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
@@ -1,7 +1,6 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;
-import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.vespa.hosted.controller.Controller;
import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus;
import com.yahoo.yolean.Exceptions;
@@ -19,14 +18,16 @@ public class OsVersionStatusUpdater extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
try {
OsVersionStatus newStatus = OsVersionStatus.compute(controller());
controller().updateOsVersionStatus(newStatus);
+ return true;
} catch (Exception e) {
log.log(Level.WARNING, "Failed to compute version status: " + Exceptions.toMessageString(e) +
". Retrying in " + interval());
}
+ return false;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java
index 5dd62251759..a032f266de5 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java
@@ -19,12 +19,13 @@ public class OutstandingChangeDeployer extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
for (Application application : ApplicationList.from(controller().applications().readable())
.withProductionDeployment()
.withDeploymentSpec()
.asList())
controller().applications().deploymentTrigger().triggerNewRevision(application.id());
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
index 32b65f05cac..a626f21359a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
@@ -1,7 +1,6 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;
-import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.vespa.hosted.controller.Controller;
import java.time.Duration;
@@ -18,8 +17,9 @@ public class ReadyJobsTrigger extends ControllerMaintainer {
}
@Override
- public void maintain() {
+ public boolean maintain() {
controller().applications().deploymentTrigger().triggerReadyJobs();
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
index 76a186a2f6b..f460561df08 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
@@ -50,13 +50,15 @@ public class ResourceMeterMaintainer extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
try {
collectResourceSnapshots();
+ return true;
} catch (Exception e) {
log.log(Level.WARNING, "Failed to collect resource snapshots. Retrying in " + interval() + ". Error: " +
Exceptions.toMessageString(e));
}
+ return false;
}
private void collectResourceSnapshots() {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java
index 31434de472d..863302223ac 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java
@@ -1,7 +1,6 @@
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.controller.maintenance;
-import com.yahoo.concurrent.maintenance.JobControl;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.CloudName;
import com.yahoo.config.provision.HostName;
@@ -27,7 +26,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer {
}
@Override
- public void maintain() {
+ public boolean maintain() {
controller().zoneRegistry().zones()
.ofCloud(CloudName.from("aws"))
.reachable()
@@ -37,8 +36,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer {
if (taggedResources > 0)
log.log(Level.INFO, "Tagged " + taggedResources + " resources in " + zone.getId());
});
-
-
+ return true;
}
private Map<HostName, ApplicationId> getTenantOfParentHosts(ZoneId zoneId) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RotationStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RotationStatusUpdater.java
index 245747a882f..935bcbec597 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RotationStatusUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RotationStatusUpdater.java
@@ -41,7 +41,7 @@ public class RotationStatusUpdater extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
var failures = new AtomicInteger(0);
var attempts = new AtomicInteger(0);
var lastException = new AtomicReference<Exception>(null);
@@ -78,6 +78,7 @@ public class RotationStatusUpdater extends ControllerMaintainer {
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
+ return lastException.get() == null;
}
private RotationStatus getStatus(Instance instance) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java
index 0fe6f7e0bfb..3b0a1fca4af 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java
@@ -21,13 +21,14 @@ public class SystemRoutingPolicyMaintainer extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
for (var zone : controller().zoneRegistry().zones().all().ids()) {
for (var application : SystemApplication.values()) {
if (!application.hasEndpoint()) continue;
controller().routing().policies().refresh(application.id(), DeploymentSpec.empty, zone);
}
}
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
index 5f0f2e4ba4e..9ab2b0e77e8 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
@@ -51,7 +51,7 @@ public class Upgrader extends ControllerMaintainer {
* Schedule application upgrades. Note that this implementation must be idempotent.
*/
@Override
- public void maintain() {
+ public boolean maintain() {
// Determine target versions for each upgrade policy
Version canaryTarget = controller().systemVersion();
Collection<Version> defaultTargets = targetVersions(Confidence.normal);
@@ -89,6 +89,7 @@ public class Upgrader extends ControllerMaintainer {
upgrade(instances.with(UpgradePolicy.canary), canaryTarget, instances.size());
defaultTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.defaultPolicy), target, numberOfApplicationsToUpgrade()));
conservativeTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.conservative), target, numberOfApplicationsToUpgrade()));
+ return true;
}
/** Returns the target versions for given confidence, one per major version in the system */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java
index d8b74a4ae99..a3e9672b715 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java
@@ -29,7 +29,7 @@ public class VersionStatusUpdater extends ControllerMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
try {
VersionStatus newStatus = VersionStatus.compute(controller());
controller().updateVersionStatus(newStatus);
@@ -37,10 +37,12 @@ public class VersionStatusUpdater extends ControllerMaintainer {
controller().serviceRegistry().systemMonitor().reportSystemVersion(version.versionNumber(),
convert(version.confidence()));
});
+ return true;
} catch (Exception e) {
log.log(Level.WARNING, "Failed to compute version status: " + Exceptions.toMessageString(e) +
". Retrying in " + interval());
}
+ return false;
}
static SystemMonitor.Confidence convert(VespaVersion.Confidence confidence) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
index 1151fdd07f0..4218e66703f 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance;
import com.yahoo.config.provision.SystemName;
import com.yahoo.vespa.hosted.controller.ControllerTester;
+import com.yahoo.vespa.hosted.controller.integration.MetricsMock;
import org.junit.Before;
import org.junit.Test;
@@ -32,12 +33,21 @@ public class ControllerMaintainerTest {
assertEquals(1, executions.get());
}
+ @Test
+ public void records_metric() {
+ maintainerIn(SystemName.main, new AtomicInteger()).run();
+ MetricsMock metrics = (MetricsMock) tester.controller().metric();
+ assertEquals(0L, metrics.getMetric((context) -> "MockMaintainer".equals(context.get("job")),
+ "maintenance.secondsSinceSuccess").get());
+ }
+
private ControllerMaintainer maintainerIn(SystemName system, AtomicInteger executions) {
return new ControllerMaintainer(tester.controller(), Duration.ofDays(1),
"MockMaintainer", EnumSet.of(system)) {
@Override
- protected void maintain() {
+ protected boolean maintain() {
executions.incrementAndGet();
+ return true;
}
};
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
index a762f718ab7..9980335bab0 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
@@ -35,14 +35,15 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer {
new DaemonThreadFactory("node repo application maintainer"));
protected ApplicationMaintainer(Deployer deployer, Metric metric, NodeRepository nodeRepository, Duration interval) {
- super(nodeRepository, interval);
+ super(nodeRepository, interval, metric);
this.deployer = deployer;
this.metric = metric;
}
@Override
- protected final void maintain() {
+ protected final boolean maintain() {
applicationsNeedingMaintenance().forEach(this::deploy);
+ return true;
}
/** Returns the number of deployments that are pending execution */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index c32b7854d4e..e2b98d8d000 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -14,7 +14,6 @@ import com.yahoo.vespa.hosted.provision.applications.Applications;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler;
import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb;
-import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import java.time.Duration;
import java.util.List;
@@ -38,17 +37,19 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
Deployer deployer,
Metric metric,
Duration interval) {
- super(nodeRepository, interval);
+ super(nodeRepository, interval, metric);
this.autoscaler = new Autoscaler(metricsDb, nodeRepository);
this.metric = metric;
this.deployer = deployer;
}
@Override
- protected void maintain() {
- if ( ! nodeRepository().zone().environment().isProduction()) return;
+ protected boolean maintain() {
+ boolean success = true;
+ if ( ! nodeRepository().zone().environment().isProduction()) return success;
activeNodesByApplication().forEach((applicationId, nodes) -> autoscale(applicationId, nodes));
+ return success;
}
private void autoscale(ApplicationId application, List<Node> applicationNodes) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java
index f428e276df8..eb5973f11a9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DirtyExpirer.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
@@ -25,8 +26,8 @@ public class DirtyExpirer extends Expirer {
private final NodeRepository nodeRepository;
- DirtyExpirer(NodeRepository nodeRepository, Clock clock, Duration dirtyTimeout) {
- super(Node.State.dirty, History.Event.Type.deallocated, nodeRepository, clock, dirtyTimeout);
+ DirtyExpirer(NodeRepository nodeRepository, Clock clock, Duration dirtyTimeout, Metric metric) {
+ super(Node.State.dirty, History.Event.Type.deallocated, nodeRepository, clock, dirtyTimeout, metric);
this.nodeRepository = nodeRepository;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 0a32970e056..b9005a028ff 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -6,6 +6,7 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.OutOfCapacityException;
+import com.yahoo.jdisc.Metric;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.Flags;
@@ -49,19 +50,21 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
DynamicProvisioningMaintainer(NodeRepository nodeRepository,
Duration interval,
HostProvisioner hostProvisioner,
- FlagSource flagSource) {
- super(nodeRepository, interval);
+ FlagSource flagSource,
+ Metric metric) {
+ super(nodeRepository, interval, metric);
this.hostProvisioner = hostProvisioner;
this.targetCapacityFlag = Flags.TARGET_CAPACITY.bindTo(flagSource);
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
try (Mutex lock = nodeRepository().lockUnallocated()) {
NodeList nodes = nodeRepository().list();
resumeProvisioning(nodes, lock);
convergeToCapacity(nodes);
}
+ return true;
}
/** Resume provisioning of already provisioned hosts and their children */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
index dc5155312e7..43f5210b233 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.History;
@@ -32,8 +33,8 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
private final Duration expiryTime;
Expirer(Node.State fromState, History.Event.Type eventType, NodeRepository nodeRepository,
- Clock clock, Duration expiryTime) {
- super(nodeRepository, min(Duration.ofMinutes(10), expiryTime));
+ Clock clock, Duration expiryTime, Metric metric) {
+ super(nodeRepository, min(Duration.ofMinutes(10), expiryTime), metric);
this.fromState = fromState;
this.eventType = eventType;
this.clock = clock;
@@ -41,7 +42,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
List<Node> expired = new ArrayList<>();
for (Node node : nodeRepository().getNodes(fromState)) {
if (isExpired(node))
@@ -50,6 +51,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
if ( ! expired.isEmpty())
log.info(fromState + " expirer found " + expired.size() + " expired nodes: " + expired);
expire(expired);
+ return true;
}
protected boolean isExpired(Node node) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
index d65b4ce4248..3f8cc58540d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
@@ -6,6 +6,7 @@ import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.Zone;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
@@ -56,8 +57,8 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
private final Duration defaultExpiry; // Grace period to allow recovery of data
private final Duration containerExpiry; // Stateless nodes, no data to recover
- FailedExpirer(NodeRepository nodeRepository, Zone zone, Clock clock, Duration interval) {
- super(nodeRepository, interval);
+ FailedExpirer(NodeRepository nodeRepository, Zone zone, Clock clock, Duration interval, Metric metric) {
+ super(nodeRepository, interval, metric);
this.nodeRepository = nodeRepository;
this.zone = zone;
this.clock = clock;
@@ -74,7 +75,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
List<Node> remainingNodes = new ArrayList<>(nodeRepository.list()
.state(Node.State.failed)
.nodeType(NodeType.tenant, NodeType.host)
@@ -86,6 +87,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
node.history().hasEventBefore(History.Event.Type.failed, clock.instant().minus(containerExpiry)));
recycleIf(remainingNodes, node ->
node.history().hasEventBefore(History.Event.Type.failed, clock.instant().minus(defaultExpiry)));
+ return true;
}
/** Recycle the nodes matching condition, and remove those nodes from the nodes list. */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java
index 3cb7cc218a7..389fc0ee907 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveExpirer.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
@@ -36,8 +37,8 @@ public class InactiveExpirer extends Expirer {
private final NodeRepository nodeRepository;
- InactiveExpirer(NodeRepository nodeRepository, Clock clock, Duration inactiveTimeout) {
- super(Node.State.inactive, History.Event.Type.deactivated, nodeRepository, clock, inactiveTimeout);
+ InactiveExpirer(NodeRepository nodeRepository, Clock clock, Duration inactiveTimeout, Metric metric) {
+ super(Node.State.inactive, History.Event.Type.deactivated, nodeRepository, clock, inactiveTimeout, metric);
this.nodeRepository = nodeRepository;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java
index b933e549357..e317333135c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java
@@ -2,10 +2,11 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.InfraDeployer;
-import java.util.logging.Level;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import java.time.Duration;
+import java.util.logging.Level;
import java.util.logging.Logger;
/**
@@ -20,8 +21,8 @@ public class InfrastructureProvisioner extends NodeRepositoryMaintainer {
private final InfraDeployer infraDeployer;
- InfrastructureProvisioner(NodeRepository nodeRepository, InfraDeployer infraDeployer, Duration interval) {
- super(nodeRepository, interval);
+ InfrastructureProvisioner(NodeRepository nodeRepository, InfraDeployer infraDeployer, Duration interval, Metric metric) {
+ super(nodeRepository, interval, metric);
this.infraDeployer = infraDeployer;
}
@@ -38,7 +39,9 @@ public class InfrastructureProvisioner extends NodeRepositoryMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
infraDeployer.activateAllSupportedInfraApplications(false);
+ return true;
}
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java
index 6edd57de1c1..90cf3ba8f54 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java
@@ -1,6 +1,7 @@
// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
@@ -39,17 +40,16 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
private final LoadBalancerService service;
private final CuratorDatabaseClient db;
- LoadBalancerExpirer(NodeRepository nodeRepository, Duration interval, LoadBalancerService service) {
- super(nodeRepository, interval);
+ LoadBalancerExpirer(NodeRepository nodeRepository, Duration interval, LoadBalancerService service, Metric metric) {
+ super(nodeRepository, interval, metric);
this.service = Objects.requireNonNull(service, "service must be non-null");
this.db = nodeRepository.database();
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
expireReserved();
- removeInactive();
- pruneReals();
+ return removeInactive() & pruneReals();
}
/** Move reserved load balancer that have expired to inactive */
@@ -63,7 +63,7 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
}
/** Deprovision inactive load balancers that have expired */
- private void removeInactive() {
+ private boolean removeInactive() {
var failed = new ArrayList<LoadBalancerId>();
var lastException = new AtomicReference<Exception>();
var now = nodeRepository().clock().instant();
@@ -88,10 +88,11 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
interval()),
lastException.get());
}
+ return lastException.get() == null;
}
/** Remove reals from inactive load balancers */
- private void pruneReals() {
+ private boolean pruneReals() {
var failed = new ArrayList<LoadBalancerId>();
var lastException = new AtomicReference<Exception>();
withLoadBalancersIn(State.inactive, lb -> {
@@ -109,13 +110,14 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
});
if (!failed.isEmpty()) {
log.log(Level.WARNING, String.format("Failed to remove reals from %d load balancers: %s, retrying in %s",
- failed.size(),
- failed.stream()
- .map(LoadBalancerId::serializedForm)
- .collect(Collectors.joining(", ")),
- interval()),
+ failed.size(),
+ failed.stream()
+ .map(LoadBalancerId::serializedForm)
+ .collect(Collectors.joining(", ")),
+ interval()),
lastException.get());
}
+ return lastException.get() == null;
}
/** Apply operation to all load balancers that exist in given state, while holding lock */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index c631de5f17b..e0d7dc5f19e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -50,7 +50,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
Supplier<Integer> pendingRedeploymentsSupplier,
Duration interval,
Clock clock) {
- super(nodeRepository, interval);
+ super(nodeRepository, interval, metric);
this.metric = metric;
this.orchestrator = orchestrator;
this.serviceMonitor = serviceMonitor;
@@ -59,7 +59,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
}
@Override
- public void maintain() {
+ public boolean maintain() {
NodeList nodes = nodeRepository().list();
ServiceModel serviceModel = serviceMonitor.getServiceModelSnapshot();
@@ -68,6 +68,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
updateMaintenanceMetrics();
updateDockerMetrics(nodes);
updateTenantUsageMetrics(nodes);
+ return true;
}
private void updateMaintenanceMetrics() {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index 9c1892a1920..a2a189769bf 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -78,7 +78,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
Duration downTimeLimit, Clock clock, Orchestrator orchestrator,
ThrottlePolicy throttlePolicy, Metric metric) {
// check ping status every five minutes, but at least twice as often as the down time limit
- super(nodeRepository, min(downTimeLimit.dividedBy(2), Duration.ofMinutes(5)));
+ super(nodeRepository, min(downTimeLimit.dividedBy(2), Duration.ofMinutes(5)), metric);
this.deployer = deployer;
this.hostLivenessTracker = hostLivenessTracker;
this.serviceMonitor = serviceMonitor;
@@ -91,7 +91,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
int throttledHostFailures = 0;
int throttledNodeFailures = 0;
@@ -131,9 +131,11 @@ public class NodeFailer extends NodeRepositoryMaintainer {
failActive(node, reason);
}
- metric.set(throttlingActiveMetric, Math.min( 1, throttledHostFailures + throttledNodeFailures), null);
+ int throttlingActive = Math.min(1, throttledHostFailures + throttledNodeFailures);
+ metric.set(throttlingActiveMetric, throttlingActive, null);
metric.set(throttledHostFailuresMetric, throttledHostFailures, null);
metric.set(throttledNodeFailuresMetric, throttledNodeFailures, null);
+ return throttlingActive == 0;
}
private void updateNodeLivenessEventsForReadyNodes(Mutex lock) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
index eb2b46dd53e..222ee631968 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
@@ -2,8 +2,9 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.ApplicationId;
-import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.autoscale.NodeMetrics;
import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb;
import com.yahoo.yolean.Exceptions;
@@ -26,14 +27,15 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer {
public NodeMetricsDbMaintainer(NodeRepository nodeRepository,
NodeMetrics nodeMetrics,
NodeMetricsDb nodeMetricsDb,
- Duration interval) {
- super(nodeRepository, interval);
+ Duration interval,
+ Metric metric) {
+ super(nodeRepository, interval, metric);
this.nodeMetrics = nodeMetrics;
this.nodeMetricsDb = nodeMetricsDb;
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
int warnings = 0;
for (ApplicationId application : activeNodesByApplication().keySet()) {
try {
@@ -46,6 +48,7 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer {
}
}
nodeMetricsDb.gc(nodeRepository().clock());
+ return warnings == 0;
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
index c78ed72ff42..f64f27b1219 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.Flavor;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.IntFlag;
@@ -32,15 +33,15 @@ public class NodeRebooter extends NodeRepositoryMaintainer {
private final Clock clock;
private final Random random;
- NodeRebooter(NodeRepository nodeRepository, Clock clock, FlagSource flagSource) {
- super(nodeRepository, Duration.ofMinutes(25));
+ NodeRebooter(NodeRepository nodeRepository, Clock clock, FlagSource flagSource, Metric metric) {
+ super(nodeRepository, Duration.ofMinutes(25), metric);
this.rebootIntervalInDays = Flags.REBOOT_INTERVAL_IN_DAYS.bindTo(flagSource);
this.clock = clock;
this.random = new Random(clock.millis()); // seed with clock for test determinism
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
// Reboot candidates: Nodes in long-term states, where we know we can safely orchestrate a reboot
List<Node> nodesToReboot = nodeRepository().getNodes(Node.State.active, Node.State.ready).stream()
.filter(node -> node.flavor().getType() != Flavor.Type.DOCKER_CONTAINER)
@@ -49,6 +50,7 @@ public class NodeRebooter extends NodeRepositoryMaintainer {
if (!nodesToReboot.isEmpty())
nodeRepository().reboot(NodeListFilter.from(nodesToReboot));
+ return true;
}
private boolean shouldReboot(Node node) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
index 8368569cda0..85477dad729 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
@@ -1,12 +1,15 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.concurrent.maintenance.JobMetrics;
import com.yahoo.concurrent.maintenance.Maintainer;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import java.time.Clock;
import java.time.Duration;
import java.util.List;
import java.util.Map;
@@ -21,8 +24,9 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
private final NodeRepository nodeRepository;
- public NodeRepositoryMaintainer(NodeRepository nodeRepository, Duration interval) {
- super(null, interval, nodeRepository.clock().instant(), nodeRepository.jobControl(), nodeRepository.database().cluster());
+ public NodeRepositoryMaintainer(NodeRepository nodeRepository, Duration interval, Metric metric) {
+ super(null, interval, nodeRepository.clock().instant(), nodeRepository.jobControl(),
+ jobMetrics(nodeRepository.clock(), metric), nodeRepository.database().cluster());
this.nodeRepository = nodeRepository;
}
@@ -41,4 +45,11 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
.collect(Collectors.groupingBy(node -> node.allocation().get().owner()));
}
+ private static JobMetrics jobMetrics(Clock clock, Metric metric) {
+ return new JobMetrics(clock, (job, instant) -> {
+ Duration sinceSuccess = Duration.between(instant, clock.instant());
+ metric.set("maintenance.secondsSinceSuccess", sinceSuccess.getSeconds(), metric.createContext(Map.of("job", job)));
+ });
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 4323622df8b..a5482281ef1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -75,25 +75,25 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
nodeFailer = new NodeFailer(deployer, hostLivenessTracker, serviceMonitor, nodeRepository, defaults.failGrace, clock, orchestrator, throttlePolicyFromEnv().orElse(defaults.throttlePolicy), metric);
periodicApplicationMaintainer = new PeriodicApplicationMaintainer(deployer, metric, nodeRepository, defaults.redeployMaintainerInterval, defaults.periodicRedeployInterval);
operatorChangeApplicationMaintainer = new OperatorChangeApplicationMaintainer(deployer, metric, nodeRepository, defaults.operatorChangeRedeployInterval);
- reservationExpirer = new ReservationExpirer(nodeRepository, clock, defaults.reservationExpiry);
+ reservationExpirer = new ReservationExpirer(nodeRepository, clock, defaults.reservationExpiry, metric);
retiredExpirer = new RetiredExpirer(nodeRepository, orchestrator, deployer, metric, clock, defaults.retiredInterval, defaults.retiredExpiry);
- inactiveExpirer = new InactiveExpirer(nodeRepository, clock, defaults.inactiveExpiry);
- failedExpirer = new FailedExpirer(nodeRepository, zone, clock, defaults.failedExpirerInterval);
- dirtyExpirer = new DirtyExpirer(nodeRepository, clock, defaults.dirtyExpiry);
- provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, defaults.provisionedExpiry);
- nodeRebooter = new NodeRebooter(nodeRepository, clock, flagSource);
+ inactiveExpirer = new InactiveExpirer(nodeRepository, clock, defaults.inactiveExpiry, metric);
+ failedExpirer = new FailedExpirer(nodeRepository, zone, clock, defaults.failedExpirerInterval, metric);
+ dirtyExpirer = new DirtyExpirer(nodeRepository, clock, defaults.dirtyExpiry, metric);
+ provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, defaults.provisionedExpiry, metric);
+ nodeRebooter = new NodeRebooter(nodeRepository, clock, flagSource, metric);
metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval, clock);
- infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval);
+ infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval, metric);
loadBalancerExpirer = provisionServiceProvider.getLoadBalancerService(nodeRepository).map(lbService ->
- new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService));
+ new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService, metric));
dynamicProvisioningMaintainer = provisionServiceProvider.getHostProvisioner().map(hostProvisioner ->
- new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource));
+ new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric));
spareCapacityMaintainer = new SpareCapacityMaintainer(deployer, nodeRepository, metric, defaults.spareCapacityMaintenanceInterval);
- osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval);
+ osUpgradeActivator = new OsUpgradeActivator(nodeRepository, defaults.osUpgradeActivatorInterval, metric);
rebalancer = new Rebalancer(deployer, nodeRepository, metric, clock, defaults.rebalancerInterval);
- nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval);
+ nodeMetricsDbMaintainer = new NodeMetricsDbMaintainer(nodeRepository, nodeMetrics, nodeMetricsDb, defaults.nodeMetricsCollectionInterval, metric);
autoscalingMaintainer = new AutoscalingMaintainer(nodeRepository, nodeMetricsDb, deployer, metric, defaults.autoscalingInterval);
- scalingSuggestionsMaintainer = new ScalingSuggestionsMaintainer(nodeRepository, nodeMetricsDb, defaults.scalingSuggestionsInterval);
+ scalingSuggestionsMaintainer = new ScalingSuggestionsMaintainer(nodeRepository, nodeMetricsDb, defaults.scalingSuggestionsInterval, metric);
// The DuperModel is filled with infrastructure applications by the infrastructure provisioner, so explicitly run that now
infrastructureProvisioner.maintainButThrowOnException();
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
index 11afbd785e8..be1190ccff4 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
@@ -2,6 +2,7 @@
package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -17,17 +18,18 @@ import java.time.Duration;
*/
public class OsUpgradeActivator extends NodeRepositoryMaintainer {
- public OsUpgradeActivator(NodeRepository nodeRepository, Duration interval) {
- super(nodeRepository, interval);
+ public OsUpgradeActivator(NodeRepository nodeRepository, Duration interval, Metric metric) {
+ super(nodeRepository, interval, metric);
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
for (var nodeType : NodeType.values()) {
if (!nodeType.isHost()) continue;
var active = canUpgradeOsOf(nodeType);
nodeRepository().osVersions().resumeUpgradeOf(nodeType, active);
}
+ return true;
}
/** Returns whether to allow OS upgrade of nodes of given type */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ProvisionedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ProvisionedExpirer.java
index e1407f2a41d..d38bff091b0 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ProvisionedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ProvisionedExpirer.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
@@ -19,8 +20,8 @@ public class ProvisionedExpirer extends Expirer {
private final NodeRepository nodeRepository;
- ProvisionedExpirer(NodeRepository nodeRepository, Clock clock, Duration dirtyTimeout) {
- super(Node.State.provisioned, History.Event.Type.provisioned, nodeRepository, clock, dirtyTimeout);
+ ProvisionedExpirer(NodeRepository nodeRepository, Clock clock, Duration dirtyTimeout, Metric metric) {
+ super(Node.State.provisioned, History.Event.Type.provisioned, nodeRepository, clock, dirtyTimeout, metric);
this.nodeRepository = nodeRepository;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index 3df20fa9d08..9b9c7df5d0d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -31,22 +31,24 @@ public class Rebalancer extends NodeRepositoryMaintainer {
Metric metric,
Clock clock,
Duration interval) {
- super(nodeRepository, interval);
+ super(nodeRepository, interval, metric);
this.deployer = deployer;
this.metric = metric;
this.clock = clock;
}
@Override
- protected void maintain() {
- if ( ! nodeRepository().zone().getCloud().allowHostSharing()) return; // Rebalancing not necessary
- if (nodeRepository().zone().environment().isTest()) return; // Short lived deployments; no need to rebalance
+ protected boolean maintain() {
+ boolean success = true;
+ if ( ! nodeRepository().zone().getCloud().allowHostSharing()) return success; // Rebalancing not necessary
+ if (nodeRepository().zone().environment().isTest()) return success; // Short lived deployments; no need to rebalance
// Work with an unlocked snapshot as this can take a long time and full consistency is not needed
NodeList allNodes = nodeRepository().list();
updateSkewMetric(allNodes);
- if ( ! zoneIsStable(allNodes)) return;
+ if ( ! zoneIsStable(allNodes)) return success;
findBestMove(allNodes).execute(true, Agent.Rebalancer, deployer, metric, nodeRepository());
+ return success;
}
/** We do this here rather than in MetricsReporter because it is expensive and frequent updates are unnecessary */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirer.java
index 03d466dbf09..27f77dd08a3 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirer.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;
+import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
@@ -22,8 +23,8 @@ public class ReservationExpirer extends Expirer {
private final NodeRepository nodeRepository;
- public ReservationExpirer(NodeRepository nodeRepository, Clock clock, Duration reservationPeriod) {
- super(Node.State.reserved, History.Event.Type.reserved, nodeRepository, clock, reservationPeriod);
+ public ReservationExpirer(NodeRepository nodeRepository, Clock clock, Duration reservationPeriod, Metric metric) {
+ super(Node.State.reserved, History.Event.Type.reserved, nodeRepository, clock, reservationPeriod, metric);
this.nodeRepository = nodeRepository;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
index a8566e24743..5b7f90102ba 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
@@ -39,7 +39,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
Clock clock,
Duration maintenanceInterval,
Duration retiredExpiry) {
- super(nodeRepository, maintenanceInterval);
+ super(nodeRepository, maintenanceInterval, metric);
this.deployer = deployer;
this.metric = metric;
this.orchestrator = orchestrator;
@@ -48,7 +48,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
}
@Override
- protected void maintain() {
+ protected boolean maintain() {
List<Node> activeNodes = nodeRepository().getNodes(Node.State.active);
Map<ApplicationId, List<Node>> retiredNodesByApplication = activeNodes.stream()
@@ -69,11 +69,12 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
nodeRepository().setRemovable(application, nodesToRemove);
boolean success = deployment.activate();
- if ( ! success) return;
+ if ( ! success) return success;
String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", "));
log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList);
}
}
+ return true;
}
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
index b68e8eacbaa..b0c52d10f7d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
@@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.jdisc.Metric;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
@@ -12,7 +13,6 @@ import com.yahoo.vespa.hosted.provision.applications.Applications;
import com.yahoo.vespa.hosted.provision.applications.Cluster;
import com.yahoo.vespa.hosted.provision.autoscale.Autoscaler;
import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricsDb;
-import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import java.time.Duration;
import java.util.List;
@@ -31,16 +31,19 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer {
public ScalingSuggestionsMaintainer(NodeRepository nodeRepository,
NodeMetricsDb metricsDb,
- Duration interval) {
- super(nodeRepository, interval);
+ Duration interval,
+ Metric metric) {
+ super(nodeRepository, interval, metric);
this.autoscaler = new Autoscaler(metricsDb, nodeRepository);
}
@Override
- protected void maintain() {
- if ( ! nodeRepository().zone().environment().isProduction()) return;
+ protected boolean maintain() {
+ boolean success = true;
+ if ( ! nodeRepository().zone().environment().isProduction()) return success;
activeNodesByApplication().forEach((applicationId, nodes) -> suggest(applicationId, nodes));
+ return success;
}
private void suggest(ApplicationId application, List<Node> applicationNodes) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
index 90c3a277080..20258e7947b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -56,15 +56,16 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
Metric metric,
Duration interval,
int maxIterations) {
- super(nodeRepository, interval);
+ super(nodeRepository, interval, metric);
this.deployer = deployer;
this.metric = metric;
this.maxIterations = maxIterations;
}
@Override
- protected void maintain() {
- if ( ! nodeRepository().zone().getCloud().allowHostSharing()) return;
+ protected boolean maintain() {
+ boolean success = true;
+ if ( ! nodeRepository().zone().getCloud().allowHostSharing()) return success;
CapacityChecker capacityChecker = new CapacityChecker(nodeRepository());
@@ -89,6 +90,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
}
metric.set("spareHostCapacity", spareHostCapacity, null);
}
+ return success;
}
private boolean execute(List<Move> mitigation) {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
index ba859655ab7..cecedb0e909 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
@@ -233,7 +233,8 @@ public class DynamicProvisioningMaintainerTest {
this.maintainer = new DynamicProvisioningMaintainer(nodeRepository,
Duration.ofDays(1),
hostProvisioner,
- flagSource);
+ flagSource,
+ new TestMetric());
}
private DynamicProvisioningTester addInitialNodes() {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java
index ed6f31984a5..f8e21ebbfce 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirerTest.java
@@ -263,7 +263,7 @@ public class FailedExpirerTest {
false,
0);
this.provisioner = new NodeRepositoryProvisioner(nodeRepository, zone, new MockProvisionServiceProvider(), new InMemoryFlagSource());
- this.expirer = new FailedExpirer(nodeRepository, zone, clock, Duration.ofMinutes(30));
+ this.expirer = new FailedExpirer(nodeRepository, zone, clock, Duration.ofMinutes(30), new TestMetric());
}
public ManualClock clock() {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java
index 89e43f80479..3d17cbf0217 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java
@@ -64,7 +64,7 @@ public class InactiveAndFailedExpirerTest {
// Inactive times out
tester.advanceTime(Duration.ofMinutes(14));
- new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10)).run();
+ new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10), new TestMetric()).run();
assertEquals(0, tester.nodeRepository().getNodes(Node.State.inactive).size());
List<Node> dirty = tester.nodeRepository().getNodes(Node.State.dirty);
assertEquals(2, dirty.size());
@@ -79,7 +79,7 @@ public class InactiveAndFailedExpirerTest {
// Dirty times out for the other one
tester.advanceTime(Duration.ofMinutes(14));
- new DirtyExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10)).run();
+ new DirtyExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10), new TestMetric()).run();
assertEquals(0, tester.nodeRepository().getNodes(NodeType.tenant, Node.State.dirty).size());
List<Node> failed = tester.nodeRepository().getNodes(NodeType.tenant, Node.State.failed);
assertEquals(1, failed.size());
@@ -107,7 +107,7 @@ public class InactiveAndFailedExpirerTest {
// Inactive times out and node is moved to dirty
tester.advanceTime(Duration.ofMinutes(14));
- new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10)).run();
+ new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10), new TestMetric()).run();
List<Node> dirty = tester.nodeRepository().getNodes(Node.State.dirty);
assertEquals(2, dirty.size());
@@ -158,7 +158,7 @@ public class InactiveAndFailedExpirerTest {
// Inactive times out and one node is moved to parked
tester.advanceTime(Duration.ofMinutes(11)); // Trigger InactiveExpirer
- new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10)).run();
+ new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10), new TestMetric()).run();
assertEquals(1, tester.nodeRepository().getNodes(Node.State.parked).size());
}
@@ -180,7 +180,7 @@ public class InactiveAndFailedExpirerTest {
assertEquals(1, inactiveNodes.size());
// See that nodes are moved to dirty immediately.
- new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10)).run();
+ new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10), new TestMetric()).run();
assertEquals(0, tester.nodeRepository().getNodes(Node.State.inactive).size());
List<Node> dirty = tester.nodeRepository().getNodes(Node.State.dirty);
assertEquals(1, dirty.size());
@@ -207,7 +207,7 @@ public class InactiveAndFailedExpirerTest {
.map(node -> node.withWantToRetire(true, true, Agent.system, tester.clock().instant()))
.collect(Collectors.toList()), () -> {});
tester.advanceTime(Duration.ofMinutes(11));
- new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10)).run();
+ new InactiveExpirer(tester.nodeRepository(), tester.clock(), Duration.ofMinutes(10), new TestMetric()).run();
assertEquals(2, tester.nodeRepository().getNodes(Node.State.parked).size());
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirerTest.java
index a5e96369591..6c22f798fe0 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirerTest.java
@@ -38,7 +38,8 @@ public class LoadBalancerExpirerTest {
public void expire_inactive() {
LoadBalancerExpirer expirer = new LoadBalancerExpirer(tester.nodeRepository(),
Duration.ofDays(1),
- tester.loadBalancerService());
+ tester.loadBalancerService(),
+ new TestMetric());
Supplier<Map<LoadBalancerId, LoadBalancer>> loadBalancers = () -> tester.nodeRepository().database().readLoadBalancers((ignored) -> true);
// Deploy two applications with a total of three load balancers
@@ -103,7 +104,8 @@ public class LoadBalancerExpirerTest {
public void expire_reserved() {
LoadBalancerExpirer expirer = new LoadBalancerExpirer(tester.nodeRepository(),
Duration.ofDays(1),
- tester.loadBalancerService());
+ tester.loadBalancerService(),
+ new TestMetric());
Supplier<Map<LoadBalancerId, LoadBalancer>> loadBalancers = () -> tester.nodeRepository().database().readLoadBalancers((ignored) -> true);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
index bae6de5a095..3ff81070516 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java
@@ -26,7 +26,7 @@ public class NodeRebooterTest {
var flagSource = new InMemoryFlagSource().withIntFlag(Flags.REBOOT_INTERVAL_IN_DAYS.id(), (int) rebootInterval.toDays());
var tester = new MaintenanceTester();
tester.createReadyHostNodes(15);
- NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, flagSource);
+ NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, flagSource, new TestMetric());
assertReadyHosts(15, tester, 0L);
@@ -69,7 +69,7 @@ public class NodeRebooterTest {
var flagSource = new InMemoryFlagSource().withIntFlag(Flags.REBOOT_INTERVAL_IN_DAYS.id(), (int) rebootInterval.toDays());
var tester = new MaintenanceTester();
tester.createReadyHostNodes(2);
- NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, flagSource);
+ NodeRebooter rebooter = new NodeRebooter(tester.nodeRepository, tester.clock, flagSource, new TestMetric());
assertReadyHosts(2, tester, 0L);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java
index 65c7bf13b42..218812f9a3d 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivatorTest.java
@@ -34,7 +34,7 @@ public class OsUpgradeActivatorTest {
@Test
public void activates_upgrade() {
var osVersions = tester.nodeRepository().osVersions();
- var osUpgradeActivator = new OsUpgradeActivator(tester.nodeRepository(), Duration.ofDays(1));
+ var osUpgradeActivator = new OsUpgradeActivator(tester.nodeRepository(), Duration.ofDays(1), new TestMetric());
var version0 = Version.fromString("7.0");
// Create infrastructure nodes
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirerTest.java
index 6ca154f5f17..bd92c2a9aa2 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ReservationExpirerTest.java
@@ -75,7 +75,7 @@ public class ReservationExpirerTest {
// Reservation times out
clock.advance(Duration.ofMinutes(14)); // Reserved but not used time out
- new ReservationExpirer(nodeRepository, clock, Duration.ofMinutes(10)).run();
+ new ReservationExpirer(nodeRepository, clock, Duration.ofMinutes(10), new TestMetric()).run();
// Assert nothing is reserved
assertEquals(0, nodeRepository.getNodes(NodeType.tenant, Node.State.reserved).size());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
index b7f21eb3114..be5c7f423c7 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainerTest.java
@@ -25,7 +25,6 @@ import java.time.Duration;
import java.util.List;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
/**
* Tests the scaling suggestions maintainer integration.
@@ -66,7 +65,8 @@ public class ScalingSuggestionsMaintainerTest {
ScalingSuggestionsMaintainer maintainer = new ScalingSuggestionsMaintainer(tester.nodeRepository(),
nodeMetricsDb,
- Duration.ofMinutes(1));
+ Duration.ofMinutes(1),
+ new TestMetric());
maintainer.maintain();
assertEquals("14 nodes with [vcpu: 6.9, memory: 5.1 Gb, disk 15.0 Gb, bandwidth: 0.1 Gbps, storage type: remote]",
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
index 3eb379b0914..845eeba972c 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java
@@ -22,6 +22,7 @@ import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.maintenance.ReservationExpirer;
+import com.yahoo.vespa.hosted.provision.maintenance.TestMetric;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.node.History;
import org.junit.Test;
@@ -791,7 +792,7 @@ public class ProvisioningTest {
// Over 10 minutes pass since first reservation. First set of reserved nodes are not expired
tester.clock().advance(Duration.ofMinutes(8).plus(Duration.ofSeconds(1)));
ReservationExpirer expirer = new ReservationExpirer(tester.nodeRepository(), tester.clock(),
- Duration.ofMinutes(10));
+ Duration.ofMinutes(10), new TestMetric());
expirer.run();
assertEquals("Nodes remain reserved", 4,
tester.getNodes(application, Node.State.reserved).size());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
index b1ecd03aa13..81bf999a184 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiTest.java
@@ -9,6 +9,7 @@ import com.yahoo.text.Utf8;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.maintenance.OsUpgradeActivator;
+import com.yahoo.vespa.hosted.provision.maintenance.TestMetric;
import com.yahoo.vespa.hosted.provision.testutils.MockNodeRepository;
import com.yahoo.vespa.hosted.provision.testutils.OrchestratorMock;
import org.junit.After;
@@ -763,7 +764,7 @@ public class NodesV2ApiTest {
// Activate target
var nodeRepository = (NodeRepository)tester.container().components().getComponent(MockNodeRepository.class.getName());
- var osUpgradeActivator = new OsUpgradeActivator(nodeRepository, Duration.ofDays(1));
+ var osUpgradeActivator = new OsUpgradeActivator(nodeRepository, Duration.ofDays(1), new TestMetric());
osUpgradeActivator.run();
// Other node type does not return wanted OS version
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
new file mode 100644
index 00000000000..4c05d46d782
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
@@ -0,0 +1,41 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.concurrent.maintenance;
+
+import java.time.Clock;
+import java.time.Instant;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.BiConsumer;
+
+/**
+ * Tracks and forwards maintenance job metrics.
+ *
+ * @author mpolden
+ */
+public class JobMetrics {
+
+ private final Clock clock;
+ private final BiConsumer<String, Instant> metricConsumer;
+
+ private final Map<String, Instant> successfulRuns = new ConcurrentHashMap<>();
+
+ public JobMetrics(Clock clock, BiConsumer<String, Instant> metricConsumer) {
+ this.clock = Objects.requireNonNull(clock);
+ this.metricConsumer = metricConsumer;
+ }
+
+ /** Record successful run of given job */
+ public void recordSuccessOf(String job) {
+ successfulRuns.put(job, clock.instant());
+ }
+
+ /** Forward metrics for given job to metric consumer */
+ public void forward(String job) {
+ Instant lastSuccess = successfulRuns.get(job);
+ if (lastSuccess != null) {
+ metricConsumer.accept(job, lastSuccess);
+ }
+ }
+
+}
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
index 9c40e5ec54f..0385c27536d 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
@@ -26,17 +26,19 @@ public abstract class Maintainer implements Runnable, AutoCloseable {
private final String name;
private final JobControl jobControl;
+ private final JobMetrics jobMetrics;
private final Duration interval;
private final ScheduledExecutorService service;
- public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl, List<String> clusterHostnames) {
- this(name, interval, staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames), jobControl);
+ public Maintainer(String name, Duration interval, Instant startedAt, JobControl jobControl, JobMetrics jobMetrics, List<String> clusterHostnames) {
+ this(name, interval, staggeredDelay(interval, startedAt, HostName.getLocalhost(), clusterHostnames), jobControl, jobMetrics);
}
- public Maintainer(String name, Duration interval, Duration initialDelay, JobControl jobControl) {
+ public Maintainer(String name, Duration interval, Duration initialDelay, JobControl jobControl, JobMetrics jobMetrics) {
this.name = name;
this.interval = requireInterval(interval);
this.jobControl = Objects.requireNonNull(jobControl);
+ this.jobMetrics = Objects.requireNonNull(jobMetrics);
service = new ScheduledThreadPoolExecutor(1, r -> new Thread(r, name() + "-worker"));
service.scheduleAtFixedRate(this, initialDelay.toMillis(), interval.toMillis(), TimeUnit.MILLISECONDS);
jobControl.started(name(), this);
@@ -72,8 +74,8 @@ public abstract class Maintainer implements Runnable, AutoCloseable {
@Override
public final String toString() { return name(); }
- /** Called once each time this maintenance job should run */
- protected abstract void maintain();
+ /** Called once each time this maintenance job should run. Returns whether the maintenance run was succesful */
+ protected abstract boolean maintain();
/** Returns the interval at which this job is set to run */
protected Duration interval() { return interval; }
@@ -82,7 +84,12 @@ public abstract class Maintainer implements Runnable, AutoCloseable {
@SuppressWarnings("unused")
public final void lockAndMaintain() {
try (var lock = jobControl.lockJob(name())) {
- maintain();
+ try {
+ if (maintain()) jobMetrics.recordSuccessOf(name());
+ } finally {
+ // Always forward metrics
+ jobMetrics.forward(name());
+ }
}
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlStateMock.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlStateMock.java
new file mode 100644
index 00000000000..28c701a67db
--- /dev/null
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlStateMock.java
@@ -0,0 +1,35 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.concurrent.maintenance;
+
+import com.yahoo.transaction.Mutex;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * @author mpolden
+ */
+class JobControlStateMock implements JobControlState {
+
+ private final Set<String> inactiveJobs = new HashSet<>();
+
+ @Override
+ public Set<String> readInactiveJobs() {
+ return Collections.unmodifiableSet(inactiveJobs);
+ }
+
+ @Override
+ public Mutex lockMaintenanceJob(String job) {
+ return () -> {};
+ }
+
+ public void setActive(String job, boolean active) {
+ if (active) {
+ inactiveJobs.remove(job);
+ } else {
+ inactiveJobs.add(job);
+ }
+ }
+
+}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
index 0640ab2835a..a0ca9b529c5 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
@@ -1,15 +1,8 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
-import com.yahoo.transaction.Mutex;
import org.junit.Test;
-import java.time.Duration;
-import java.time.Instant;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -21,18 +14,13 @@ public class JobControlTest {
@Test
public void testJobControl() {
- MockJobControlState state = new MockJobControlState();
+ JobControlStateMock state = new JobControlStateMock();
JobControl jobControl = new JobControl(state);
- MockMaintainer maintainer1 = new MockMaintainer();
- MockMaintainer maintainer2 = new MockMaintainer();
- assertTrue(jobControl.jobs().isEmpty());
-
String job1 = "Job1";
String job2 = "Job2";
-
- jobControl.started(job1, maintainer1);
- jobControl.started(job2, maintainer2);
+ TestMaintainer maintainer1 = new TestMaintainer(job1, jobControl);
+ TestMaintainer maintainer2 = new TestMaintainer(job2, jobControl);
assertEquals(2, jobControl.jobs().size());
assertTrue(jobControl.jobs().contains(job1));
assertTrue(jobControl.jobs().contains(job2));
@@ -59,79 +47,36 @@ public class JobControlTest {
// Run jobs on-demand
jobControl.run(job1);
jobControl.run(job1);
- assertEquals(2, maintainer1.maintenanceInvocations);
+ assertEquals(2, maintainer1.totalRuns());
jobControl.run(job2);
- assertEquals(1, maintainer2.maintenanceInvocations);
+ assertEquals(1, maintainer2.totalRuns());
// Running jobs on-demand ignores inactive flag
state.setActive(job1, false);
jobControl.run(job1);
- assertEquals(3, maintainer1.maintenanceInvocations);
+ assertEquals(3, maintainer1.totalRuns());
}
@Test
public void testJobControlMayDeactivateJobs() {
- MockJobControlState state = new MockJobControlState();
+ JobControlStateMock state = new JobControlStateMock();
JobControl jobControl = new JobControl(state);
- MockMaintainer mockMaintainer = new MockMaintainer(jobControl);
+ TestMaintainer mockMaintainer = new TestMaintainer(null, jobControl);
- assertTrue(jobControl.jobs().contains("MockMaintainer"));
+ assertTrue(jobControl.jobs().contains("TestMaintainer"));
- assertEquals(0, mockMaintainer.maintenanceInvocations);
+ assertEquals(0, mockMaintainer.totalRuns());
mockMaintainer.run();
- assertEquals(1, mockMaintainer.maintenanceInvocations);
+ assertEquals(1, mockMaintainer.totalRuns());
- state.setActive("MockMaintainer", false);
+ state.setActive("TestMaintainer", false);
mockMaintainer.run();
- assertEquals(1, mockMaintainer.maintenanceInvocations);
+ assertEquals(1, mockMaintainer.totalRuns());
- state.setActive("MockMaintainer", true);
+ state.setActive("TestMaintainer", true);
mockMaintainer.run();
- assertEquals(2, mockMaintainer.maintenanceInvocations);
- }
-
- private static class MockJobControlState implements JobControlState {
-
- private final Set<String> inactiveJobs = new HashSet<>();
-
- @Override
- public Set<String> readInactiveJobs() {
- return new HashSet<>(inactiveJobs);
- }
-
- @Override
- public Mutex lockMaintenanceJob(String job) {
- return () -> {};
- }
-
- public void setActive(String job, boolean active) {
- if (active) {
- inactiveJobs.remove(job);
- } else {
- inactiveJobs.add(job);
- }
- }
-
- }
-
- private static class MockMaintainer extends Maintainer {
-
- int maintenanceInvocations = 0;
-
- private MockMaintainer(JobControl jobControl) {
- super(null, Duration.ofHours(1), Instant.now(), jobControl, List.of());
- }
-
- private MockMaintainer() {
- this(new JobControl(new MockJobControlState()));
- }
-
- @Override
- protected void maintain() {
- maintenanceInvocations++;
- }
-
+ assertEquals(2, mockMaintainer.totalRuns());
}
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
index 820d1fc3d1d..47ed010e95e 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
@@ -1,13 +1,16 @@
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.concurrent.maintenance;
+import com.yahoo.test.ManualClock;
import org.junit.Test;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
+import java.util.concurrent.atomic.AtomicReference;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
/**
* @author freva
@@ -36,4 +39,39 @@ public class MaintainerTest {
assertEquals(300, Maintainer.staggeredDelay(interval, now, "cfg0", cluster).toMillis());
}
+ @Test
+ public void success_metric() {
+ ManualClock clock = new ManualClock();
+ AtomicReference<Instant> lastSuccess = new AtomicReference<>();
+ JobMetrics jobMetrics = new JobMetrics(clock, (job, instant) -> lastSuccess.set(instant));
+ TestMaintainer maintainer = new TestMaintainer(jobMetrics);
+
+ // Maintainer not successful yet
+ maintainer.successOnNextRun(false).run();
+ assertNull(lastSuccess.get());
+
+ // Maintainer runs successfully
+ clock.advance(Duration.ofHours(1));
+ Instant lastSuccess0 = clock.instant();
+ maintainer.successOnNextRun(true).run();
+ assertEquals(lastSuccess0, lastSuccess.get());
+
+ // Maintainer runs successfully again
+ clock.advance(Duration.ofHours(2));
+ Instant lastSuccess1 = clock.instant();
+ maintainer.run();
+ assertEquals(lastSuccess1, lastSuccess.get());
+
+ // Maintainer throws
+ clock.advance(Duration.ofHours(5));
+ maintainer.throwOnNextRun(true).run();
+ assertEquals("Time of successful run is unchanged", lastSuccess1, lastSuccess.get());
+
+ // Maintainer recovers
+ clock.advance(Duration.ofHours(3));
+ Instant lastSuccess2 = clock.instant();
+ maintainer.throwOnNextRun(false).run();
+ assertEquals(lastSuccess2, lastSuccess.get());
+ }
+
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
new file mode 100644
index 00000000000..0ea24fb6c2b
--- /dev/null
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
@@ -0,0 +1,49 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.concurrent.maintenance;
+
+import java.time.Clock;
+import java.time.Duration;
+
+/**
+ * @author mpolden
+ */
+class TestMaintainer extends Maintainer {
+
+ private int totalRuns = 0;
+ private boolean success = true;
+ private boolean throwing = false;
+
+ public TestMaintainer(String name, JobControl jobControl, JobMetrics jobMetrics) {
+ super(name, Duration.ofDays(1), Duration.ofDays(1), jobControl, jobMetrics);
+ }
+
+ public TestMaintainer(JobMetrics jobMetrics) {
+ this(null, new JobControl(new JobControlStateMock()), jobMetrics);
+ }
+
+ public TestMaintainer(String name, JobControl jobControl) {
+ this(name, jobControl, new JobMetrics(Clock.systemUTC(), (job, instant) -> {}));
+ }
+
+ public int totalRuns() {
+ return totalRuns;
+ }
+
+ public TestMaintainer successOnNextRun(boolean success) {
+ this.success = success;
+ return this;
+ }
+
+ public TestMaintainer throwOnNextRun(boolean throwing) {
+ this.throwing = throwing;
+ return this;
+ }
+
+ @Override
+ protected boolean maintain() {
+ if (throwing) throw new RuntimeException("Maintenance run failed");
+ totalRuns++;
+ return success;
+ }
+
+}