aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnstein Ressem <aressem@gmail.com>2021-06-04 21:57:32 +0200
committerGitHub <noreply@github.com>2021-06-04 21:57:32 +0200
commitcd1b747b4f65fa3a6ed6aace23235db7591638c5 (patch)
tree810ae8989030055b3cea65c684f69c0cfbd404b5
parentb14315a6f51bc8e5bce22e0d9d11d0e730aaf96d (diff)
Revert "Emit a success factor from maintainers"
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java10
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java18
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java4
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java11
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java4
-rw-r--r--configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java37
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java20
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java35
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java7
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java17
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java6
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java13
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java10
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java10
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java9
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java16
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java21
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java18
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java4
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java13
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java11
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java10
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java10
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java2
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java24
-rw-r--r--vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java24
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java16
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java28
-rw-r--r--vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java4
67 files changed, 255 insertions, 378 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java
index 003b4fbb345..5519ffc1bdc 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java
@@ -48,9 +48,8 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer {
}
@Override
- protected double maintain() {
- int attempts = 0;
- int failures = 0;
+ protected boolean maintain() {
+ boolean success = true;
try (var fileDownloader = new FileDownloader(connectionPool, downloadDirectory)) {
for (var applicationId : applicationRepository.listApplications()) {
@@ -63,12 +62,11 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer {
log.fine(() -> "Verifying application package file reference " + applicationPackage + " for session " + sessionId);
if (applicationPackage != null) {
- attempts++;
if (! fileReferenceExistsOnDisk(downloadDirectory, applicationPackage)) {
log.fine(() -> "Downloading missing application package for application " + applicationId + " - session " + sessionId);
if (fileDownloader.getFile(applicationPackage).isEmpty()) {
- failures++;
+ success = false;
log.warning("Failed to download application package for application " + applicationId + " - session " + sessionId);
continue;
}
@@ -77,7 +75,7 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer {
}
}
}
- return asSuccessFactor(attempts, failures);
+ return success;
}
@Override
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
index e0f0a4b4099..4938f34131e 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java
@@ -35,24 +35,14 @@ public abstract class ConfigServerMaintainer extends Maintainer {
ConfigServerMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource,
Instant now, Duration interval) {
super(null, interval, now, new JobControl(new JobControlFlags(curator, flagSource)),
- new ConfigServerJobMetrics(applicationRepository.metric()), cluster(curator), false);
+ jobMetrics(applicationRepository.metric()), cluster(curator), false);
this.applicationRepository = applicationRepository;
}
- private static class ConfigServerJobMetrics extends JobMetrics {
-
- private final Metric metric;
-
- public ConfigServerJobMetrics(Metric metric) {
- this.metric = metric;
- }
-
- @Override
- protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) {
+ private static JobMetrics jobMetrics(Metric metric) {
+ return new JobMetrics((job, consecutiveFailures) -> {
metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job)));
- metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
- }
-
+ });
}
private static class JobControlFlags implements JobControlState {
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java
index ca8db30c21f..b0876fb57e8 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java
@@ -33,9 +33,9 @@ public class FileDistributionMaintainer extends ConfigServerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
applicationRepository.deleteUnusedFiledistributionReferences(fileReferencesDir, maxUnusedFileReferenceAge);
- return 1.0;
+ return true;
}
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java
index af9ea917aaf..971c2c20ae9 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java
@@ -22,7 +22,6 @@ import java.util.Comparator;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
import java.util.logging.Level;
@@ -52,9 +51,8 @@ public class ReindexingMaintainer extends ConfigServerMaintainer {
}
@Override
- protected double maintain() {
- AtomicInteger attempts = new AtomicInteger(0);
- AtomicInteger failures = new AtomicInteger(0);
+ protected boolean maintain() {
+ AtomicBoolean success = new AtomicBoolean(true);
for (Tenant tenant : applicationRepository.tenantRepository().getAllTenants()) {
ApplicationCuratorDatabase database = tenant.getApplicationRepo().database();
for (ApplicationId id : database.activeApplications())
@@ -62,7 +60,6 @@ public class ReindexingMaintainer extends ConfigServerMaintainer {
.map(application -> application.getForVersionOrLatest(Optional.empty(), clock.instant()))
.ifPresent(application -> {
try {
- attempts.incrementAndGet();
applicationRepository.modifyReindexing(id, reindexing -> {
reindexing = withNewReady(reindexing, lazyGeneration(application), clock.instant());
reindexing = withOnlyCurrentData(reindexing, application);
@@ -71,11 +68,11 @@ public class ReindexingMaintainer extends ConfigServerMaintainer {
}
catch (RuntimeException e) {
log.log(Level.INFO, "Failed to update reindexing status for " + id + ": " + Exceptions.toMessageString(e));
- failures.incrementAndGet();
+ success.set(false);
}
});
}
- return asSuccessFactor(attempts.get(), failures.get());
+ return success.get();
}
private Supplier<Long> lazyGeneration(Application application) {
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java
index 1f85dd4579d..7482980e221 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java
@@ -25,7 +25,7 @@ public class SessionsMaintainer extends ConfigServerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
if (iteration % 10 == 0)
log.log(Level.INFO, () -> "Running " + SessionsMaintainer.class.getSimpleName() + ", iteration " + iteration);
@@ -38,7 +38,7 @@ public class SessionsMaintainer extends ConfigServerMaintainer {
}
iteration++;
- return 1.0;
+ return true;
}
}
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java
index 0a7df2c9d21..7c01045ee72 100644
--- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java
+++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java
@@ -31,12 +31,12 @@ public class TenantsMaintainer extends ConfigServerMaintainer {
}
@Override
- protected double maintain() {
- if ( ! applicationRepository.configserverConfig().hostedVespa()) return 1.0;
+ protected boolean maintain() {
+ if ( ! applicationRepository.configserverConfig().hostedVespa()) return true;
Set<TenantName> tenants = applicationRepository.deleteUnusedTenants(ttlForUnusedTenant, clock.instant());
if (tenants.size() > 0) log.log(Level.INFO, "Deleted tenants " + tenants);
- return 1.0;
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java
index 9ec8e4d1a2d..7d94a4c728f 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java
@@ -19,14 +19,14 @@ public class ApplicationMetaDataGarbageCollector extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
try {
controller().applications().applicationStore().pruneMeta(controller().clock().instant().minus(Duration.ofDays(365)));
- return 1.0;
+ return true;
}
catch (Exception e) {
log.log(Level.WARNING, "Exception pruning old application meta data", e);
- return 0.0;
+ return false;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
index 69e0eb26f16..1f20e48edf5 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
@@ -18,7 +18,6 @@ import com.yahoo.yolean.Exceptions;
import java.time.Duration;
import java.util.HashMap;
import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
/**
@@ -40,17 +39,15 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
- return ( confirmApplicationOwnerships() +
- ensureConfirmationResponses() +
- updateConfirmedApplicationOwners() )
- / 3;
+ protected boolean maintain() {
+ return confirmApplicationOwnerships() &
+ ensureConfirmationResponses() &
+ updateConfirmedApplicationOwners();
}
/** File an ownership issue with the owners of all applications we know about. */
- private double confirmApplicationOwnerships() {
- AtomicInteger attempts = new AtomicInteger(0);
- AtomicInteger failures = new AtomicInteger(0);
+ private boolean confirmApplicationOwnerships() {
+ AtomicBoolean success = new AtomicBoolean(true);
applications()
.withProjectId()
.withProductionDeployment()
@@ -59,7 +56,6 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
.filter(application -> application.createdAt().isBefore(controller().clock().instant().minus(Duration.ofDays(90))))
.forEach(application -> {
try {
- attempts.incrementAndGet();
// TODO jvenstad: Makes sense to require, and run this only in main?
tenantOf(application.id()).contact().flatMap(contact -> {
return ownershipIssues.confirmOwnership(application.ownershipIssueId(),
@@ -69,17 +65,17 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
}).ifPresent(newIssueId -> store(newIssueId, application.id()));
}
catch (RuntimeException e) { // Catch errors due to wrong data in the controller, or issues client timeout.
- failures.incrementAndGet();
+ success.set(false);
log.log(Level.INFO, "Exception caught when attempting to file an issue for '" + application.id() + "': " + Exceptions.toMessageString(e));
}
});
- return asSuccessFactor(attempts.get(), failures.get());
+ return success.get();
}
private ApplicationSummary summaryOf(TenantAndApplicationId application) {
var app = applications.requireApplication(application);
var metrics = new HashMap<ZoneId, ApplicationSummary.Metric>();
- for (Instance instance : app.instances().values()) {
+ for (Instance instance : app.instances().values())
for (var kv : instance.deployments().entrySet()) {
var zone = kv.getKey();
var deploymentMetrics = kv.getValue().metrics();
@@ -87,31 +83,28 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
deploymentMetrics.queriesPerSecond(),
deploymentMetrics.writesPerSecond()));
}
- }
return new ApplicationSummary(app.id().defaultInstance(), app.activity().lastQueried(), app.activity().lastWritten(),
app.latestVersion().flatMap(version -> version.buildTime()), metrics);
}
/** Escalate ownership issues which have not been closed before a defined amount of time has passed. */
- private double ensureConfirmationResponses() {
- AtomicInteger attempts = new AtomicInteger(0);
- AtomicInteger failures = new AtomicInteger(0);
+ private boolean ensureConfirmationResponses() {
+ AtomicBoolean success = new AtomicBoolean(true);
for (Application application : applications())
application.ownershipIssueId().ifPresent(issueId -> {
try {
- attempts.incrementAndGet();
Tenant tenant = tenantOf(application.id());
ownershipIssues.ensureResponse(issueId, tenant.contact());
}
catch (RuntimeException e) {
- failures.incrementAndGet();
+ success.set(false);
log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e));
}
});
- return asSuccessFactor(attempts.get(), failures.get());
+ return success.get();
}
- private double updateConfirmedApplicationOwners() {
+ private boolean updateConfirmedApplicationOwners() {
applications()
.withProjectId()
.withProductionDeployment()
@@ -125,7 +118,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
controller().applications().store(lockedApplication.withOwner(owner)));
});
});
- return 1.0;
+ return true;
}
private ApplicationList applications() {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java
index b096a853541..1a9889284e1 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java
@@ -37,7 +37,8 @@ public class ArchiveAccessMaintainer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
+
// Count buckets - so we can alert if we get close to the account limit of 1000
zoneRegistry.zones().all().ids().forEach(zoneId ->
metric.set(bucketCountMetricName, archiveBucketDb.buckets(zoneId).size(),
@@ -58,7 +59,6 @@ public class ArchiveAccessMaintainer extends ControllerMaintainer {
)
);
- return 1.0;
+ return true;
}
-
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java
index ab8e5efa0bd..d2141b097b3 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java
@@ -38,7 +38,7 @@ public class ArchiveUriUpdater extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
Map<ZoneId, Set<TenantName>> tenantsByZone = new HashMap<>();
for (var application : applications.asList()) {
for (var instance : application.instances().values()) {
@@ -63,7 +63,7 @@ public class ArchiveUriUpdater extends ControllerMaintainer {
.forEach(tenant -> nodeRepository.removeArchiveUri(zone, tenant));
});
- return 1.0;
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java
index 14e3e685a8a..1f360c477b9 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java
@@ -43,14 +43,14 @@ public class ChangeRequestMaintainer extends ControllerMaintainer {
@Override
- protected double maintain() {
+ protected boolean maintain() {
var currentChangeRequests = pruneOldChangeRequests();
var changeRequests = changeRequestClient.getChangeRequests(currentChangeRequests);
logger.fine(() -> "Found requests: " + changeRequests);
storeChangeRequests(changeRequests);
- return 1.0;
+ return true;
}
private void storeChangeRequests(List<ChangeRequest> changeRequests) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
index 5acd0c63670..d923db936cb 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
@@ -38,7 +38,7 @@ public class CloudEventReporter extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
for (var region : zonesByCloudNativeRegion.keySet()) {
List<CloudEvent> events = eventFetcher.getEvents(region);
for (var event : events) {
@@ -48,7 +48,7 @@ public class CloudEventReporter extends ControllerMaintainer {
deprovisionAffectedHosts(region, event);
}
}
- return 1.0;
+ return true;
}
/** Deprovision any host affected by given event */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java
index 5ee39f7c8f2..7b846fa288c 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java
@@ -35,14 +35,12 @@ public class ContactInformationMaintainer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
TenantController tenants = controller().tenants();
- int attempts = 0;
- int failures = 0;
+ boolean success = true;
for (Tenant tenant : tenants.asList()) {
log.log(FINE, () -> "Updating contact information for " + tenant);
try {
- attempts++;
switch (tenant.type()) {
case athenz:
tenants.lockIfPresent(tenant.name(), LockedTenant.Athenz.class, lockedTenant -> {
@@ -58,13 +56,13 @@ public class ContactInformationMaintainer extends ControllerMaintainer {
throw new IllegalArgumentException("Unexpected tenant type '" + tenant.type() + "'.");
}
} catch (Exception e) {
- failures++;
+ success = false;
log.log(Level.WARNING, "Failed to update contact information for " + tenant + ": " +
Exceptions.toMessageString(e) + ". Retrying in " +
interval());
}
}
- return asSuccessFactor(attempts, failures);
+ return success;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java
index f1574381a3d..ff5fc4d2051 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java
@@ -34,7 +34,7 @@ public class ContainerImageExpirer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
Instant now = controller().clock().instant();
VersionStatus versionStatus = controller().readVersionStatus();
List<ContainerImage> imagesToExpire = controller().serviceRegistry().containerRegistry().list().stream()
@@ -44,7 +44,7 @@ public class ContainerImageExpirer extends ControllerMaintainer {
log.log(Level.INFO, "Expiring " + imagesToExpire.size() + " container images: " + imagesToExpire);
controller().serviceRegistry().containerRegistry().deleteAll(imagesToExpire);
}
- return 1.0;
+ return true;
}
/** Returns whether given image is expired */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
index 810c412fcc0..03a6268397e 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
@@ -34,7 +34,7 @@ public abstract class ControllerMaintainer extends Maintainer {
public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems) {
super(name, interval, controller.clock().instant(), controller.jobControl(),
- new ControllerJobMetrics(controller.metric()), controller.curator().cluster(), true);
+ jobMetrics(controller.metric()), controller.curator().cluster(), true);
this.controller = controller;
this.activeSystems = Set.copyOf(Objects.requireNonNull(activeSystems));
}
@@ -47,20 +47,10 @@ public abstract class ControllerMaintainer extends Maintainer {
super.run();
}
- private static class ControllerJobMetrics extends JobMetrics {
-
- private final Metric metric;
-
- public ControllerJobMetrics(Metric metric) {
- this.metric = metric;
- }
-
- @Override
- protected void recordCompletion(String job, Long incompleteRuns, double successFactor) {
- metric.set("maintenance.consecutiveFailures", incompleteRuns, metric.createContext(Map.of("job", job)));
- metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
- }
-
+ private static JobMetrics jobMetrics(Metric metric) {
+ return new JobMetrics((job, consecutiveFailures) -> {
+ metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job)));
+ });
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java
index 21cda09d92a..28b64b5bfe0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java
@@ -31,10 +31,10 @@ public class CostReportMaintainer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
var csv = CostCalculator.resourceShareByPropertyToCsv(nodeRepository, controller(), clock, consumer.fixedAllocations());
consumer.consume(csv);
- return 1.0;
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java
index 9e3da506ca8..e5316788802 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java
@@ -28,9 +28,8 @@ public class DeploymentExpirer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
- int attempts = 0;
- int failures = 0;
+ protected boolean maintain() {
+ boolean success = true;
for (Application application : controller().applications().readable()) {
for (Instance instance : application.instances().values())
for (Deployment deployment : instance.deployments().values()) {
@@ -38,17 +37,16 @@ public class DeploymentExpirer extends ControllerMaintainer {
try {
log.log(Level.INFO, "Expiring deployment of " + instance.id() + " in " + deployment.zone());
- attempts++;
controller().applications().deactivate(instance.id(), deployment.zone());
} catch (Exception e) {
- failures++;
+ success = false;
log.log(Level.WARNING, "Could not expire " + deployment + " of " + instance +
": " + Exceptions.toMessageString(e) + ". Retrying in " +
interval());
}
}
}
- return asSuccessFactor(attempts, failures);
+ return success;
}
/** Returns whether given deployment has expired according to its TTL */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java
index 4e53e07f5af..a3070ef55a0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java
@@ -21,7 +21,6 @@ import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import static com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence.broken;
@@ -46,11 +45,10 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
}
@Override
- protected double maintain() {
- return ( maintainDeploymentIssues(applications()) +
- maintainPlatformIssue(applications()) +
- escalateInactiveDeploymentIssues(applications()))
- / 3;
+ protected boolean maintain() {
+ return maintainDeploymentIssues(applications()) &
+ maintainPlatformIssue(applications()) &
+ escalateInactiveDeploymentIssues(applications());
}
/** Returns the applications to maintain issue status for. */
@@ -65,7 +63,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
* and store the issue id for the filed issues. Also, clear the issueIds of applications
* where deployment has not failed for this amount of time.
*/
- private double maintainDeploymentIssues(List<Application> applications) {
+ private boolean maintainDeploymentIssues(List<Application> applications) {
List<TenantAndApplicationId> failingApplications = controller().jobController().deploymentStatuses(ApplicationList.from(applications))
.failingApplicationChangeSince(controller().clock().instant().minus(maxFailureAge))
.mapToList(status -> status.application().id());
@@ -75,7 +73,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
fileDeploymentIssueFor(application);
else
store(application.id(), null);
- return 1.0;
+ return true;
}
/**
@@ -83,26 +81,27 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
* applications that have been failing the upgrade to the system version for
* longer than the set grace period, or update this list if the issue already exists.
*/
- private double maintainPlatformIssue(List<Application> applications) {
+ private boolean maintainPlatformIssue(List<Application> applications) {
+ boolean success = true;
if (controller().system() == SystemName.cd)
- return 1.0;
+ return success;
VersionStatus versionStatus = controller().readVersionStatus();
Version systemVersion = controller().systemVersion(versionStatus);
if (versionStatus.version(systemVersion).confidence() != broken)
- return 1.0;
+ return success;
DeploymentStatusList statuses = controller().jobController().deploymentStatuses(ApplicationList.from(applications));
if (statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant().minus(upgradeGracePeriod)).isEmpty())
- return 1.0;
+ return success;
List<ApplicationId> failingApplications = statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant())
.mapToList(status -> status.application().id().defaultInstance());
// TODO jonmv: Send only tenant and application, here and elsewhere in this.
deploymentIssues.fileUnlessOpen(failingApplications, systemVersion);
- return 1.0;
+ return success;
}
private Tenant ownerOf(TenantAndApplicationId applicationId) {
@@ -127,23 +126,21 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
}
/** Escalate issues for which there has been no activity for a certain amount of time. */
- private double escalateInactiveDeploymentIssues(Collection<Application> applications) {
- AtomicInteger attempts = new AtomicInteger(0);
- AtomicInteger failures = new AtomicInteger(0);
+ private boolean escalateInactiveDeploymentIssues(Collection<Application> applications) {
+ AtomicBoolean success = new AtomicBoolean(true);
applications.forEach(application -> application.deploymentIssueId().ifPresent(issueId -> {
try {
- attempts.incrementAndGet();
Tenant tenant = ownerOf(application.id());
deploymentIssues.escalateIfInactive(issueId,
maxInactivity,
tenant.type() == Tenant.Type.athenz ? tenant.contact() : Optional.empty());
}
catch (RuntimeException e) {
- failures.incrementAndGet();
+ success.set(false);
log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e));
}
}));
- return asSuccessFactor(attempts.get(), failures.get());
+ return success.get();
}
private void store(TenantAndApplicationId id, IssueId issueId) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
index 20154c4f122..a8214ac8a09 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
@@ -44,7 +44,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
AtomicInteger failures = new AtomicInteger(0);
AtomicInteger attempts = new AtomicInteger(0);
AtomicReference<Exception> lastException = new AtomicReference<>(null);
@@ -92,7 +92,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer {
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
- return asSuccessFactor(attempts.get(), failures.get());
+ return lastException.get() == null;
}
static DeploymentMetrics updateDeploymentMetrics(DeploymentMetrics current, List<ClusterMetrics> metrics) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java
index 85a69b0f338..55a957f0247 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java
@@ -54,7 +54,7 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
try {
// In order of importance
deployRefreshedCertificates();
@@ -62,10 +62,10 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer {
deleteUnusedCertificates();
} catch (Exception e) {
log.log(LogLevel.ERROR, "Exception caught while maintaining endpoint certificates", e);
- return 0.0;
+ return false;
}
- return 1.0;
+ return true;
}
private void updateRefreshedCertificates() {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java
index 10e6f9eb039..83ccda422e6 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java
@@ -38,7 +38,7 @@ public class HostInfoUpdater extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
Map<String, NodeEntity> nodeEntities = controller().serviceRegistry().entityService().listNodes().stream()
.collect(Collectors.toMap(NodeEntity::hostname,
Function.identity()));
@@ -62,7 +62,7 @@ public class HostInfoUpdater extends ControllerMaintainer {
LOG.info("Updated information for " + hostsUpdated + " hosts(s)");
}
}
- return 1.0;
+ return true;
}
private static Optional<String> buildModelName(NodeEntity nodeEntity) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
index 5101de73a33..9859d12510a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
@@ -39,28 +39,23 @@ public abstract class InfrastructureUpgrader<VERSION> extends ControllerMaintain
}
@Override
- protected double maintain() {
- if (targetVersion().isEmpty()) return 1.0;
- return upgradeAll(targetVersion().get(), managedApplications);
+ protected boolean maintain() {
+ targetVersion().ifPresent(target -> upgradeAll(target, managedApplications));
+ return true;
}
/** Deploy a list of system applications until they converge on the given version */
- private double upgradeAll(VERSION target, List<SystemApplication> applications) {
- int attempts = 0;
- int failures = 0;
+ private void upgradeAll(VERSION target, List<SystemApplication> applications) {
for (List<ZoneApi> zones : upgradePolicy.asList()) {
boolean converged = true;
for (ZoneApi zone : zones) {
try {
- attempts++;
converged &= upgradeAll(target, applications, zone);
} catch (UnreachableNodeRepositoryException e) {
- failures++;
converged = false;
log.warning(String.format("%s: Failed to communicate with node repository in %s, continuing with next parallel zone: %s",
this, zone, Exceptions.toMessageString(e)));
} catch (Exception e) {
- failures++;
converged = false;
log.warning(String.format("%s: Failed to upgrade zone: %s, continuing with next parallel zone: %s",
this, zone, Exceptions.toMessageString(e)));
@@ -70,7 +65,6 @@ public abstract class InfrastructureUpgrader<VERSION> extends ControllerMaintain
break;
}
}
- return asSuccessFactor(attempts, failures);
}
/** Returns whether all applications have converged to the target version in zone */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
index 25207b733f0..b84cfe5af9b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
@@ -49,10 +49,10 @@ public class JobRunner extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
jobs.active().forEach(this::advance);
jobs.collectGarbage();
- return 1.0;
+ return true;
}
@Override
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
index 3f65c2e49cd..b26b94f0b28 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
@@ -73,7 +73,7 @@ public class MetricsReporter extends ControllerMaintainer {
}
@Override
- public double maintain() {
+ public boolean maintain() {
reportDeploymentMetrics();
reportRemainingRotations();
reportQueuedNameServiceRequests();
@@ -82,7 +82,7 @@ public class MetricsReporter extends ControllerMaintainer {
reportAuditLog();
reportBrokenSystemVersion(versionStatus);
reportTenantMetrics();
- return 1.0;
+ return true;
}
private void reportBrokenSystemVersion(VersionStatus versionStatus) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java
index fe20db00e05..e57affdc15d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java
@@ -37,12 +37,13 @@ public class NameServiceDispatcher extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
+ boolean success = true;
try (var lock = db.lockNameServiceQueue()) {
var queue = db.readNameServiceQueue();
var instant = clock.instant();
var remaining = queue.dispatchTo(nameService, requestCount);
- if (queue == remaining) return 1.0; // Queue unchanged
+ if (queue == remaining) return success; // Queue unchanged
var dispatched = queue.first(requestCount);
if (!dispatched.requests().isEmpty()) {
@@ -53,7 +54,7 @@ public class NameServiceDispatcher extends ControllerMaintainer {
}
db.writeNameServiceQueue(remaining);
}
- return 1.0;
+ return success;
}
private static int requestCount(SystemName system) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
index 666d1c3b23a..e1618f05a7d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
@@ -42,13 +42,13 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
for (var cloud : supportedClouds()) {
Optional<Version> newTarget = newTargetIn(cloud);
if (newTarget.isEmpty()) continue;
controller().upgradeOsIn(cloud, newTarget.get(), upgradeBudget(), false);
}
- return 1.0;
+ return true;
}
/** Returns the new target version for given cloud, if any */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
index 271dd277e1c..cbd9207fda4 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
@@ -18,16 +18,16 @@ public class OsVersionStatusUpdater extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
try {
OsVersionStatus newStatus = OsVersionStatus.compute(controller());
controller().updateOsVersionStatus(newStatus);
- return 1.0;
+ return true;
} catch (Exception e) {
log.log(Level.WARNING, "Failed to compute OS version status: " + Exceptions.toMessageString(e) +
". Retrying in " + interval());
}
- return 0.0;
+ return false;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java
index 9d93ac719b7..a032f266de5 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java
@@ -19,13 +19,13 @@ public class OutstandingChangeDeployer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
for (Application application : ApplicationList.from(controller().applications().readable())
.withProductionDeployment()
.withDeploymentSpec()
.asList())
controller().applications().deploymentTrigger().triggerNewRevision(application.id());
- return 1.0;
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
index ffe958cb63a..a626f21359a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
@@ -17,9 +17,9 @@ public class ReadyJobsTrigger extends ControllerMaintainer {
}
@Override
- public double maintain() {
+ public boolean maintain() {
controller().applications().deploymentTrigger().triggerReadyJobs();
- return 1.0;
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java
index 0bd74c844ae..263a33cf266 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java
@@ -40,7 +40,7 @@ public class ReindexingTriggerer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
try {
Instant now = controller().clock().instant();
for (Application application : controller().applications().asList())
@@ -51,11 +51,11 @@ public class ReindexingTriggerer extends ControllerMaintainer {
&& reindexingIsReady(controller().applications().applicationReindexing(id, deployment.zone()), now))
controller().applications().reindex(id, deployment.zone(), List.of(), List.of(), true);
});
- return 1.0;
+ return true;
}
catch (RuntimeException e) {
log.log(Level.WARNING, "Failed to trigger reindexing: " + Exceptions.toMessageString(e));
- return 0.0;
+ return false;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
index 39ad233ce46..aed2e637e4b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
@@ -79,19 +79,19 @@ public class ResourceMeterMaintainer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
Collection<ResourceSnapshot> resourceSnapshots;
try {
resourceSnapshots = getAllResourceSnapshots();
} catch (Exception e) {
log.log(Level.WARNING, "Failed to collect resource snapshots. Retrying in " + interval() + ". Error: " +
Exceptions.toMessageString(e));
- return 0.0;
+ return false;
}
if (systemName.isPublic()) reportResourceSnapshots(resourceSnapshots);
updateDeploymentCost(resourceSnapshots);
- return 1.0;
+ return true;
}
void updateDeploymentCost(Collection<ResourceSnapshot> resourceSnapshots) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java
index ab988bcf0ac..c7bf7e765ed 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java
@@ -28,7 +28,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer {
}
@Override
- public double maintain() {
+ public boolean maintain() {
controller().zoneRegistry().zones()
.ofCloud(CloudName.from("aws"))
.reachable()
@@ -38,7 +38,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer {
if (taggedResources > 0)
log.log(Level.INFO, "Tagged " + taggedResources + " resources in " + zone.getId());
});
- return 1.0;
+ return true;
}
private Map<HostName, Optional<ApplicationId>> getTenantOfParentHosts(ZoneId zoneId) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java
index e40d772a673..3b0a1fca4af 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java
@@ -21,14 +21,14 @@ public class SystemRoutingPolicyMaintainer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
for (var zone : controller().zoneRegistry().zones().all().ids()) {
for (var application : SystemApplication.values()) {
if (!application.hasEndpoint()) continue;
controller().routing().policies().refresh(application.id(), DeploymentSpec.empty, zone);
}
}
- return 1.0;
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java
index 637ae10bcc6..1265d687850 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java
@@ -23,7 +23,7 @@ public class TenantRoleMaintainer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
var roleService = controller().serviceRegistry().roleService();
var tenants = controller().tenants().asList();
var tenantsWithRoles = tenants.stream()
@@ -31,7 +31,7 @@ public class TenantRoleMaintainer extends ControllerMaintainer {
.filter(this::hasProductionDeployment)
.collect(Collectors.toList());
roleService.maintainRoles(tenantsWithRoles);
- return 1.0;
+ return true;
}
private boolean hasProductionDeployment(TenantName tenant) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
index 0af0d01478b..fbe9faa9754 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
@@ -36,34 +36,30 @@ public class TrafficShareUpdater extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
+ boolean success = false;
Exception lastException = null;
- int attempts = 0;
- int failures = 0;
for (var application : applications.asList()) {
for (var instance : application.instances().values()) {
for (var deployment : instance.deployments().values()) {
if ( ! deployment.zone().environment().isProduction()) continue;
try {
- attempts++;
- updateTrafficFraction(instance, deployment);
+ success |= updateTrafficFraction(instance, deployment);
}
catch (Exception e) {
// Some failures due to locked applications are expected and benign
- failures++;
lastException = e;
}
}
}
}
- double successFactor = asSuccessFactor(attempts, failures);
- if ( successFactor == 0 )
+ if ( ! success && lastException != null) // log on complete failure
log.log(Level.WARNING, "Could not update traffic share on any applications", lastException);
- return successFactor;
+ return success;
}
- private void updateTrafficFraction(Instance instance, Deployment deployment) {
+ private boolean updateTrafficFraction(Instance instance, Deployment deployment) {
double qpsInZone = deployment.metrics().queriesPerSecond();
double totalQps = instance.deployments().values().stream()
.filter(i -> i.zone().environment().isProduction())
@@ -77,6 +73,7 @@ public class TrafficShareUpdater extends ControllerMaintainer {
maxReadShare = currentReadShare; // distribution can be incorrect
nodeRepository.patchApplication(deployment.zone(), instance.id(), currentReadShare, maxReadShare);
+ return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
index 2326f7b66ee..8d5019904fa 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
@@ -51,7 +51,7 @@ public class Upgrader extends ControllerMaintainer {
* Schedule application upgrades. Note that this implementation must be idempotent.
*/
@Override
- public double maintain() {
+ public boolean maintain() {
// Determine target versions for each upgrade policy
VersionStatus versionStatus = controller().readVersionStatus();
Version canaryTarget = controller().systemVersion(versionStatus);
@@ -91,7 +91,7 @@ public class Upgrader extends ControllerMaintainer {
upgrade(instances.with(UpgradePolicy.canary), canaryTarget, targetMajorVersion, instances.size());
defaultTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.defaultPolicy), target, targetMajorVersion, numberOfApplicationsToUpgrade()));
conservativeTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.conservative), target, targetMajorVersion, numberOfApplicationsToUpgrade()));
- return 1.0;
+ return true;
}
/** Returns the target versions for given confidence, one per major version in the system */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java
index 4cd24289676..fedf3d90760 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java
@@ -57,7 +57,7 @@ public class VCMRMaintainer extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
var changeRequests = curator.readChangeRequests()
.stream()
.filter(shouldUpdate())
@@ -81,7 +81,7 @@ public class VCMRMaintainer extends ControllerMaintainer {
});
}
});
- return 1.0;
+ return true;
}
/**
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java
index e4866c43f13..a3e9672b715 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java
@@ -29,7 +29,7 @@ public class VersionStatusUpdater extends ControllerMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
try {
VersionStatus newStatus = VersionStatus.compute(controller());
controller().updateVersionStatus(newStatus);
@@ -37,12 +37,12 @@ public class VersionStatusUpdater extends ControllerMaintainer {
controller().serviceRegistry().systemMonitor().reportSystemVersion(version.versionNumber(),
convert(version.confidence()));
});
- return 1.0;
+ return true;
} catch (Exception e) {
log.log(Level.WARNING, "Failed to compute version status: " + Exceptions.toMessageString(e) +
". Retrying in " + interval());
}
- return 0.0;
+ return false;
}
static SystemMonitor.Confidence convert(VespaVersion.Confidence confidence) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
index 7dc5cb34818..27b4f3744e7 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
@@ -39,16 +39,13 @@ public class ControllerMaintainerTest {
TestControllerMaintainer maintainer = new TestControllerMaintainer(tester.controller(), SystemName.main, new AtomicInteger());
maintainer.run();
assertEquals(0L, consecutiveFailuresMetric());
- assertEquals(1.0, successFactorMetric(), 0.0000001);
maintainer.success = false;
maintainer.run();
maintainer.run();
assertEquals(2L, consecutiveFailuresMetric());
- assertEquals(0.0, successFactorMetric(), 0.0000001);
maintainer.success = true;
maintainer.run();
assertEquals(0, consecutiveFailuresMetric());
- assertEquals(1.0, successFactorMetric(), 0.0000001);
}
private long consecutiveFailuresMetric() {
@@ -57,12 +54,6 @@ public class ControllerMaintainerTest {
"maintenance.consecutiveFailures").get().longValue();
}
- private long successFactorMetric() {
- MetricsMock metrics = (MetricsMock) tester.controller().metric();
- return metrics.getMetric((context) -> "TestControllerMaintainer".equals(context.get("job")),
- "maintenance.successFactor").get().longValue();
- }
-
private static class TestControllerMaintainer extends ControllerMaintainer {
private final AtomicInteger executions;
@@ -74,9 +65,9 @@ public class ControllerMaintainerTest {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
executions.incrementAndGet();
- return success ? 1.0 : 0.0;
+ return success;
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java
index ce219b8beed..66bda66bbf9 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java
@@ -33,7 +33,7 @@ public class EndpointCertificateMaintainerTest {
@Test
public void old_and_unused_cert_is_deleted() {
tester.curator().writeEndpointCertificateMetadata(ApplicationId.defaultId(), exampleMetadata);
- assertEquals(1.0, maintainer.maintain(), 0.0000001);
+ assertTrue(maintainer.maintain());
assertTrue(tester.curator().readEndpointCertificateMetadata(ApplicationId.defaultId()).isEmpty());
}
@@ -41,7 +41,7 @@ public class EndpointCertificateMaintainerTest {
public void unused_but_recently_used_cert_is_not_deleted() {
EndpointCertificateMetadata recentlyRequestedCert = exampleMetadata.withLastRequested(tester.clock().instant().minusSeconds(3600).getEpochSecond());
tester.curator().writeEndpointCertificateMetadata(ApplicationId.defaultId(), recentlyRequestedCert);
- assertEquals(1.0, maintainer.maintain(), 0.0000001);
+ assertTrue(maintainer.maintain());
assertEquals(Optional.of(recentlyRequestedCert), tester.curator().readEndpointCertificateMetadata(ApplicationId.defaultId()));
}
@@ -53,7 +53,7 @@ public class EndpointCertificateMaintainerTest {
secretStore.setSecret(exampleMetadata.keyName(), "foo", 1);
secretStore.setSecret(exampleMetadata.certName(), "bar", 1);
- assertEquals(1.0, maintainer.maintain(), 0.0000001);
+ assertTrue(maintainer.maintain());
var updatedCert = Optional.of(recentlyRequestedCert.withLastRefreshed(tester.clock().instant().getEpochSecond()).withVersion(1));
@@ -77,7 +77,7 @@ public class EndpointCertificateMaintainerTest {
tester.curator().writeEndpointCertificateMetadata(appId, exampleMetadata);
- assertEquals(1.0, maintainer.maintain(), 0.0000001);
+ assertTrue(maintainer.maintain());
assertTrue(tester.curator().readEndpointCertificateMetadata(appId).isPresent()); // cert should not be deleted, the app is deployed!
}
@@ -97,7 +97,7 @@ public class EndpointCertificateMaintainerTest {
tester.curator().writeEndpointCertificateMetadata(appId, exampleMetadata);
- assertEquals(1.0, maintainer.maintain(), 0.0000001);
+ assertTrue(maintainer.maintain());
assertTrue(tester.curator().readEndpointCertificateMetadata(appId).isPresent()); // cert should not be deleted, the app is deployed!
tester.clock().advance(Duration.ofDays(3));
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
index 7b4882de3ff..2afa3a0faea 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
@@ -39,7 +39,7 @@ public class TrafficShareUpdaterTest {
// Single zone
setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester);
deploymentMetricsMaintainer.maintain();
- assertEquals(1.0, updater.maintain(), 0.0000001);
+ assertTrue(updater.maintain());
assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester);
// Two zones
@@ -48,14 +48,14 @@ public class TrafficShareUpdaterTest {
setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester);
setQpsMetric(0.0, application.application().id().defaultInstance(), prod2, tester);
deploymentMetricsMaintainer.maintain();
- assertEquals(1.0, updater.maintain(), 0.0000001);
+ assertTrue(updater.maintain());
assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester);
assertTrafficFraction(0.0, 1.0, application.instanceId(), prod2, tester);
// - both hot
setQpsMetric(53.0, application.application().id().defaultInstance(), prod1, tester);
setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester);
deploymentMetricsMaintainer.maintain();
- assertEquals(1.0, updater.maintain(), 0.0000001);
+ assertTrue(updater.maintain());
assertTrafficFraction(0.53, 1.0, application.instanceId(), prod1, tester);
assertTrafficFraction(0.47, 1.0, application.instanceId(), prod2, tester);
@@ -66,7 +66,7 @@ public class TrafficShareUpdaterTest {
setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester);
setQpsMetric(0.0, application.application().id().defaultInstance(), prod3, tester);
deploymentMetricsMaintainer.maintain();
- assertEquals(1.0, updater.maintain(), 0.0000001);
+ assertTrue(updater.maintain());
assertTrafficFraction(0.53, 0.53, application.instanceId(), prod1, tester);
assertTrafficFraction(0.47, 0.50, application.instanceId(), prod2, tester);
assertTrafficFraction(0.00, 0.50, application.instanceId(), prod3, tester);
@@ -75,7 +75,7 @@ public class TrafficShareUpdaterTest {
setQpsMetric(25.0, application.application().id().defaultInstance(), prod2, tester);
setQpsMetric(25.0, application.application().id().defaultInstance(), prod3, tester);
deploymentMetricsMaintainer.maintain();
- assertEquals(1.0, updater.maintain(), 0.0000001);
+ assertTrue(updater.maintain());
assertTrafficFraction(0.50, 0.5, application.instanceId(), prod1, tester);
assertTrafficFraction(0.25, 0.5, application.instanceId(), prod2, tester);
assertTrafficFraction(0.25, 0.5, application.instanceId(), prod3, tester);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
index 24160c19dfa..9ac1ca2b4c1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java
@@ -41,9 +41,9 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected final double maintain() {
+ protected final boolean maintain() {
applicationsNeedingMaintenance().forEach(this::deploy);
- return 1.0;
+ return true;
}
/** Returns the number of deployments that are pending execution */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
index 3914a0c9f07..05eb878e1b1 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java
@@ -44,13 +44,14 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return false;
- if ( ! nodeRepository().zone().environment().isAnyOf(Environment.dev, Environment.prod)) return 1.0;
+ boolean success = true;
+ if ( ! nodeRepository().zone().environment().isAnyOf(Environment.dev, Environment.prod)) return success;
activeNodesByApplication().forEach(this::autoscale);
- return 1.0;
+ return success;
}
private void autoscale(ApplicationId application, NodeList applicationNodes) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 0eb2038e233..af32c285d99 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -74,13 +74,13 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
try (Mutex lock = nodeRepository().nodes().lockUnallocated()) {
NodeList nodes = nodeRepository().nodes().list();
resumeProvisioning(nodes, lock);
convergeToCapacity(nodes);
}
- return 1.0;
+ return true;
}
/** Resume provisioning of already provisioned hosts and their children */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
index 25108425e6e..2443a12d198 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java
@@ -40,7 +40,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
NodeList expired = nodeRepository().nodes().list(fromState).matching(this::isExpired);
if ( ! expired.isEmpty()) {
@@ -49,7 +49,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer {
}
metric.add("expired." + fromState, expired.size(), null);
- return 1.0;
+ return true;
}
protected boolean isExpired(Node node) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
index 7505ce42668..e98da35aa6a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java
@@ -66,7 +66,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
List<Node> remainingNodes = new ArrayList<>(nodeRepository.nodes().list(Node.State.failed)
.nodeType(NodeType.tenant, NodeType.host)
.asList());
@@ -78,7 +78,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer {
recycleIf(remainingNodes, node ->
node.allocation().get().membership().cluster().isStateful() &&
node.history().hasEventBefore(History.Event.Type.failed, clock().instant().minus(statefulExpiry)));
- return 1.0;
+ return true;
}
/** Recycle the nodes matching condition, and remove those nodes from the nodes list. */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java
index 80f74a011c0..caf20463f60 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java
@@ -43,7 +43,7 @@ public class HostEncrypter extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
Instant now = nodeRepository().clock().instant();
NodeList allNodes = nodeRepository().nodes().list();
for (var nodeType : NodeType.values()) {
@@ -51,7 +51,7 @@ public class HostEncrypter extends NodeRepositoryMaintainer {
if (upgradingVespa(allNodes, nodeType)) continue;
unencryptedHosts(allNodes, nodeType).forEach(host -> encrypt(host, now));
}
- return 1.0;
+ return true;
}
/** Returns whether any node of given type is currently upgrading its Vespa version */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java
index d9f5ea6a7a9..e317333135c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java
@@ -39,9 +39,9 @@ public class InfrastructureProvisioner extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
infraDeployer.activateAllSupportedInfraApplications(false);
- return 1.0;
+ return true;
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java
index ac9d8d6671a..10069fd1a18 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java
@@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.maintenance;
import com.google.common.collect.Sets;
import com.yahoo.jdisc.Metric;
-import com.yahoo.lang.MutableInteger;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
@@ -55,9 +54,9 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
expireReserved();
- return ( removeInactive() + pruneReals() ) / 2;
+ return removeInactive() & pruneReals();
}
/** Move reserved load balancer that have expired to inactive */
@@ -69,8 +68,7 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
}
/** Deprovision inactive load balancers that have expired */
- private double removeInactive() {
- MutableInteger attempts = new MutableInteger(0);
+ private boolean removeInactive() {
var failed = new ArrayList<LoadBalancerId>();
var lastException = new AtomicReference<Exception>();
var expiry = nodeRepository().clock().instant().minus(inactiveExpiry);
@@ -78,7 +76,6 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
lb.changedAt().isBefore(expiry) &&
allocatedNodes(lb.id()).isEmpty(), lb -> {
try {
- attempts.add(1);
log.log(Level.INFO, () -> "Removing expired inactive load balancer " + lb.id());
service.remove(lb.id().application(), lb.id().cluster());
db.removeLoadBalancer(lb.id());
@@ -95,12 +92,11 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
.collect(Collectors.joining(", ")),
interval()));
}
- return asSuccessFactor(attempts.get(), failed.size());
+ return lastException.get() == null;
}
/** Remove reals from inactive load balancers */
- private double pruneReals() {
- var attempts = new MutableInteger(0);
+ private boolean pruneReals() {
var failed = new ArrayList<LoadBalancerId>();
var lastException = new AtomicReference<Exception>();
patchLoadBalancers(lb -> lb.state() == State.inactive, lb -> {
@@ -111,7 +107,6 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
reals.removeIf(real -> !allocatedNodes.contains(real.hostname().value()));
if (reals.equals(lb.instance().get().reals())) return; // Nothing to remove
try {
- attempts.add(1);
LOG.log(Level.INFO, () -> "Removing reals from inactive load balancer " + lb.id() + ": " + Sets.difference(lb.instance().get().reals(), reals));
service.create(new LoadBalancerSpec(lb.id().application(), lb.id().cluster(), reals), true);
db.writeLoadBalancer(lb.with(lb.instance().map(instance -> instance.withReals(reals))));
@@ -129,7 +124,7 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer {
interval()),
lastException.get());
}
- return asSuccessFactor(attempts.get(), failed.size());
+ return lastException.get() == null;
}
/** Patch load balancers matching given filter, while holding lock */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 3990c5099eb..85437b3e78a 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -65,7 +65,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
}
@Override
- public double maintain() {
+ public boolean maintain() {
NodeList nodes = nodeRepository().nodes().list();
ServiceModel serviceModel = serviceMonitor.getServiceModelSnapshot();
@@ -80,7 +80,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer {
updateRepairTicketMetrics(nodes);
updateAllocationMetrics(nodes);
updateExclusiveSwitchMetrics(nodes);
- return 1.0;
+ return true;
}
private void updateAllocationMetrics(NodeList nodes) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index f16459ee8b9..effa41dc69f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -72,21 +72,17 @@ public class NodeFailer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return false;
- int attempts = 0;
- int failures = 0;
int throttledHostFailures = 0;
int throttledNodeFailures = 0;
// Ready nodes
try (Mutex lock = nodeRepository().nodes().lockUnallocated()) {
for (Map.Entry<Node, String> entry : getReadyNodesByFailureReason().entrySet()) {
- attempts++;
Node node = entry.getKey();
if (throttle(node)) {
- failures++;
if (node.type().isHost())
throttledHostFailures++;
else
@@ -100,12 +96,10 @@ public class NodeFailer extends NodeRepositoryMaintainer {
// Active nodes
for (Map.Entry<Node, String> entry : getActiveNodesByFailureReason().entrySet()) {
- attempts++;
Node node = entry.getKey();
if (!failAllowedFor(node.type())) continue;
if (throttle(node)) {
- failures++;
if (node.type().isHost())
throttledHostFailures++;
else
@@ -122,15 +116,11 @@ public class NodeFailer extends NodeRepositoryMaintainer {
if ( ! activeNodes.childrenOf(host).isEmpty()) continue;
Optional<NodeMutex> locked = Optional.empty();
try {
- attempts++;
locked = nodeRepository().nodes().lockAndGet(host);
if (locked.isEmpty()) continue;
nodeRepository().nodes().fail(List.of(locked.get().node()), Agent.NodeFailer,
"Host should be failed and have no tenant nodes");
}
- catch (Exception e) {
- failures++;
- }
finally {
locked.ifPresent(NodeMutex::close);
}
@@ -140,7 +130,7 @@ public class NodeFailer extends NodeRepositoryMaintainer {
metric.set(throttlingActiveMetric, throttlingActive, null);
metric.set(throttledHostFailuresMetric, throttledHostFailures, null);
metric.set(throttledNodeFailuresMetric, throttledNodeFailures, null);
- return asSuccessFactor(attempts, failures);
+ return throttlingActive == 0;
}
private Map<Node, String> getReadyNodesByFailureReason() {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
index 37969a30b81..fe2fb5229f9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java
@@ -5,7 +5,6 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ApplicationLockException;
import com.yahoo.config.provision.HostLivenessTracker;
import com.yahoo.jdisc.Metric;
-import com.yahoo.lang.MutableInteger;
import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.applicationmodel.ServiceStatus;
@@ -49,11 +48,13 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- return ( updateReadyNodeLivenessEvents() + updateActiveNodeDownState() ) / 2;
+ protected boolean maintain() {
+ updateReadyNodeLivenessEvents();
+ updateActiveNodeDownState();
+ return true;
}
- private double updateReadyNodeLivenessEvents() {
+ private void updateReadyNodeLivenessEvents() {
// Update node last request events through ZooKeeper to collect request to all config servers.
// We do this here ("lazily") to avoid writing to zk for each config request.
try (Mutex lock = nodeRepository().nodes().lockUnallocated()) {
@@ -68,16 +69,13 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
}
}
}
- return 1.0;
}
/**
* If the node is down (see {@link #allDown}), and there is no "down" history record, we add it.
* Otherwise we remove any "down" history record.
*/
- private double updateActiveNodeDownState() {
- var attempts = new MutableInteger(0);
- var failures = new MutableInteger(0);
+ private void updateActiveNodeDownState() {
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
serviceMonitor.getServiceModelSnapshot().getServiceInstancesByHostName().forEach((hostname, serviceInstances) -> {
Optional<Node> node = activeNodes.node(hostname.toString());
@@ -92,7 +90,6 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
try (var lock = nodeRepository().nodes().lock(owner)) {
node = getNode(hostname.toString(), owner, lock); // Re-get inside lock
if (node.isEmpty()) return; // Node disappeared or changed allocation
- attempts.add(1);
if (isDown) {
recordAsDown(node.get(), lock);
} else {
@@ -101,10 +98,8 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer {
} catch (ApplicationLockException e) {
// Fine, carry on with other nodes. We'll try updating this one in the next run
log.log(Level.WARNING, "Could not lock " + owner + ": " + Exceptions.toMessageString(e));
- failures.add(1);
}
});
- return asSuccessFactor(attempts.get(), failures.get());
}
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
index d671900d08c..1ea4577f7fe 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java
@@ -33,21 +33,19 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- int attempts = 0;
- var failures = new MutableInteger(0);
+ protected boolean maintain() {
try {
+ var warnings = new MutableInteger(0);
Set<ApplicationId> applications = activeNodesByApplication().keySet();
- if (applications.isEmpty()) return 1.0;
+ if (applications.isEmpty()) return true;
long pauseMs = interval().toMillis() / applications.size() - 1; // spread requests over interval
int done = 0;
for (ApplicationId application : applications) {
- attempts++;
metricsFetcher.fetchMetrics(application)
.whenComplete((metricsResponse, exception) -> handleResponse(metricsResponse,
exception,
- failures,
+ warnings,
application));
if (++done < applications.size())
Thread.sleep(pauseMs);
@@ -58,22 +56,23 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer {
nodeRepository().metricsDb().gc();
- return asSuccessFactor(attempts, failures.get());
+ // Suppress failures for manual zones for now to avoid noise
+ return nodeRepository().zone().environment().isManuallyDeployed() || warnings.get() == 0;
}
catch (InterruptedException e) {
- return asSuccessFactor(attempts, failures.get());
+ return false;
}
}
private void handleResponse(MetricsResponse response,
Throwable exception,
- MutableInteger failures,
+ MutableInteger warnings,
ApplicationId application) {
if (exception != null) {
- if (failures.get() < maxWarningsPerInvocation)
+ if (warnings.get() < maxWarningsPerInvocation)
log.log(Level.WARNING, "Could not update metrics for " + application + ": " +
Exceptions.toMessageString(exception));
- failures.add(1);
+ warnings.add(1);
}
else if (response != null) {
nodeRepository().metricsDb().addNodeMetrics(response.nodeMetrics());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
index c282fcdb7fc..6ee657beadd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java
@@ -37,7 +37,7 @@ public class NodeRebooter extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
// Reboot candidates: Nodes in long-term states, where we know we can safely orchestrate a reboot
List<Node> nodesToReboot = nodeRepository().nodes().list(Node.State.active, Node.State.ready).stream()
.filter(node -> node.type().isHost())
@@ -46,7 +46,7 @@ public class NodeRebooter extends NodeRepositoryMaintainer {
if (!nodesToReboot.isEmpty())
nodeRepository().nodes().reboot(NodeListFilter.from(nodesToReboot));
- return 1.0;
+ return true;
}
private boolean shouldReboot(Node node) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
index fe5cd419b31..0a1f6961f9f 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java
@@ -25,7 +25,7 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
public NodeRepositoryMaintainer(NodeRepository nodeRepository, Duration interval, Metric metric) {
super(null, interval, nodeRepository.clock().instant(), nodeRepository.jobControl(),
- new NodeRepositoryJobMetrics(metric), nodeRepository.database().cluster(), true);
+ jobMetrics(metric), nodeRepository.database().cluster(), true);
this.nodeRepository = nodeRepository;
}
@@ -48,20 +48,10 @@ public abstract class NodeRepositoryMaintainer extends Maintainer {
.groupingBy(node -> node.allocation().get().owner());
}
- private static class NodeRepositoryJobMetrics extends JobMetrics {
-
- private final Metric metric;
-
- public NodeRepositoryJobMetrics(Metric metric) {
- this.metric = metric;
- }
-
- @Override
- protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) {
+ private static JobMetrics jobMetrics(Metric metric) {
+ return new JobMetrics((job, consecutiveFailures) -> {
metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job)));
- metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
- }
-
+ });
}
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
index 749603a373d..4eba15307cb 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java
@@ -23,13 +23,13 @@ public class OsUpgradeActivator extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
for (var nodeType : NodeType.values()) {
if (!nodeType.isHost()) continue;
boolean resume = canUpgradeOsOf(nodeType);
nodeRepository().osVersions().resumeUpgradeOf(nodeType, resume);
}
- return 1.0;
+ return true;
}
/** Returns whether to allow OS upgrade of nodes of given type */
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index 7bb748c92c9..1543506a78e 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -33,18 +33,19 @@ public class Rebalancer extends NodeMover<Rebalancer.Move> {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return false;
- if (nodeRepository().zone().getCloud().dynamicProvisioning()) return 1.0; // Rebalancing not necessary
- if (nodeRepository().zone().environment().isTest()) return 1.0; // Short lived deployments; no need to rebalance
+ boolean success = true;
+ if (nodeRepository().zone().getCloud().dynamicProvisioning()) return success; // Rebalancing not necessary
+ if (nodeRepository().zone().environment().isTest()) return success; // Short lived deployments; no need to rebalance
// Work with an unlocked snapshot as this can take a long time and full consistency is not needed
NodeList allNodes = nodeRepository().nodes().list();
updateSkewMetric(allNodes);
- if ( ! zoneIsStable(allNodes)) return 1.0;
+ if ( ! zoneIsStable(allNodes)) return success;
findBestMove(allNodes).execute(true, Agent.Rebalancer, deployer, metric, nodeRepository());
- return 1.0;
+ return success;
}
@Override
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
index 3f5893b368a..f72daf1bc2b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java
@@ -48,10 +48,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- int attempts = 0;
- int successes = 0;
-
+ protected boolean maintain() {
NodeList activeNodes = nodeRepository().nodes().list(Node.State.active);
Map<ApplicationId, NodeList> retiredNodesByApplication = activeNodes.retired().groupingBy(node -> node.allocation().get().owner());
for (Map.Entry<ApplicationId, NodeList> entry : retiredNodesByApplication.entrySet()) {
@@ -60,19 +57,17 @@ public class RetiredExpirer extends NodeRepositoryMaintainer {
List<Node> nodesToRemove = retiredNodes.stream().filter(n -> canRemove(n, activeNodes)).collect(Collectors.toList());
if (nodesToRemove.isEmpty()) continue;
- attempts++;
try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) {
if ( ! deployment.isValid()) continue;
nodeRepository().nodes().setRemovable(application, nodesToRemove);
boolean success = deployment.activate().isPresent();
- if ( ! success) continue;
+ if ( ! success) return success;
String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", "));
log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList);
- successes++;
}
}
- return attempts == 0 ? 1.0 : ((double)successes / attempts);
+ return true;
}
/**
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
index 888f06a5004..c217580872b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java
@@ -36,16 +36,13 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().zone().environment().isProduction()) return 1.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().zone().environment().isProduction()) return true;
- int attempts = 0;
int successes = 0;
- for (var application : activeNodesByApplication().entrySet()) {
- attempts++;
+ for (var application : activeNodesByApplication().entrySet())
successes += suggest(application.getKey(), application.getValue());
- }
- return attempts == 0 ? 1.0 : ((double)successes / attempts);
+ return successes > 0;
}
private int suggest(ApplicationId application, NodeList applicationNodes) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
index 0589571e9d8..0307ae13b24 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java
@@ -66,11 +66,12 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
}
@Override
- protected double maintain() {
- if ( ! nodeRepository().nodes().isWorking()) return 0.0;
+ protected boolean maintain() {
+ if ( ! nodeRepository().nodes().isWorking()) return false;
+ boolean success = true;
// Don't need to maintain spare capacity in dynamically provisioned zones; can provision more on demand.
- if (nodeRepository().zone().getCloud().dynamicProvisioning()) return 1.0;
+ if (nodeRepository().zone().getCloud().dynamicProvisioning()) return success;
NodeList allNodes = nodeRepository().nodes().list();
CapacityChecker capacityChecker = new CapacityChecker(allNodes);
@@ -79,7 +80,6 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
metric.set("overcommittedHosts", overcommittedHosts.size(), null);
retireOvercommitedHosts(allNodes, overcommittedHosts);
- boolean success = true;
Optional<CapacityChecker.HostFailurePath> failurePath = capacityChecker.worstCaseHostLossLeadingToFailure();
if (failurePath.isPresent()) {
int spareHostCapacity = failurePath.get().hostsCausingFailure.size() - 1;
@@ -96,7 +96,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer {
}
metric.set("spareHostCapacity", spareHostCapacity, null);
}
- return success ? 1.0 : 0.0;
+ return success;
}
private boolean execute(List<Move> mitigation, CapacityChecker.HostFailurePath failurePath) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
index 44890f2f5af..cfab980570d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java
@@ -33,14 +33,14 @@ public class SwitchRebalancer extends NodeMover<Move> {
}
@Override
- protected double maintain() {
- if (!nodeRepository().nodes().isWorking()) return 0.0;
- if (!nodeRepository().zone().environment().isProduction()) return 1.0;
+ protected boolean maintain() {
+ if (!nodeRepository().nodes().isWorking()) return false;
+ if (!nodeRepository().zone().environment().isProduction()) return true;
NodeList allNodes = nodeRepository().nodes().list(); // Lockless as strong consistency is not needed
- if (!zoneIsStable(allNodes)) return 1.0;
+ if (!zoneIsStable(allNodes)) return true;
findBestMove(allNodes).execute(false, Agent.SwitchRebalancer, deployer, metric, nodeRepository());
- return 1.0;
+ return true;
}
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java
index cfe6e4d348d..5b2f7ce91e8 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java
@@ -43,7 +43,7 @@ public class NodeMetricsDbMaintainerTest {
fetcher,
Duration.ofHours(1),
new TestMetric());
- assertEquals(maintainer.maintain(), 1.0, 0.0000001);
+ assertTrue(maintainer.maintain());
List<NodeTimeseries> timeseriesList = tester.nodeRepository().metricsDb().getNodeTimeseries(Duration.ofDays(1),
Set.of("host-1.yahoo.com", "host-2.yahoo.com"));
assertEquals(2, timeseriesList.size());
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
index 73a5dc77743..fcc5b8e57a2 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java
@@ -2,18 +2,25 @@
package com.yahoo.concurrent.maintenance;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.BiConsumer;
/**
* Tracks and forwards maintenance job metrics.
*
* @author mpolden
*/
-public abstract class JobMetrics {
+public class JobMetrics {
+
+ private final BiConsumer<String, Long> metricConsumer;
private final ConcurrentHashMap<String, Long> incompleteRuns = new ConcurrentHashMap<>();
- /** Record starting of a run of a job */
- public void starting(String job) {
+ public JobMetrics(BiConsumer<String, Long> metricConsumer) {
+ this.metricConsumer = metricConsumer;
+ }
+
+ /** Record a run for given job */
+ public void recordRunOf(String job) {
incompleteRuns.merge(job, 1L, Long::sum);
}
@@ -22,17 +29,12 @@ public abstract class JobMetrics {
incompleteRuns.put(job, 0L);
}
- /**
- * Records completion of a run of a job.
- * This is guaranteed to always be called once whenever starting has been called.
- */
- public void completed(String job, double successFactor) {
+ /** Forward metrics for given job to metric consumer */
+ public void forward(String job) {
Long incompleteRuns = this.incompleteRuns.get(job);
if (incompleteRuns != null) {
- recordCompletion(job, incompleteRuns, successFactor);
+ metricConsumer.accept(job, incompleteRuns);
}
}
- protected abstract void recordCompletion(String job, Long incompleteRuns, double successFactor);
-
}
diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
index 2a9e6dda6b6..734c46a2819 100644
--- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
+++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java
@@ -83,19 +83,8 @@ public abstract class Maintainer implements Runnable {
@Override
public final String toString() { return name(); }
- /**
- * Called once each time this maintenance job should run.
- *
- * @return the degree to which the run was successful - a number between 0 (no success), to 1 (complete success).
- * Note that this indicates whether something is wrong, so e.g if the call did nothing because it should do
- * nothing, 1.0 should be returned.
- */
- protected abstract double maintain();
-
- /** Convenience methods to convert attempts and failures into a success factor */
- protected final double asSuccessFactor(int attempts, int failures) {
- return attempts == 0 ? 1.0 : 1 - (double)failures / attempts;
- }
+ /** Called once each time this maintenance job should run. Returns whether the maintenance run was successful */
+ protected abstract boolean maintain();
/** Returns the interval at which this job is set to run */
protected Duration interval() { return interval; }
@@ -104,12 +93,9 @@ public abstract class Maintainer implements Runnable {
public final void lockAndMaintain(boolean force) {
if (!force && !jobControl.isActive(name())) return;
log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName());
- jobMetrics.starting(name());
- double successFactor = 0;
+ jobMetrics.recordRunOf(name());
try (var lock = jobControl.lockJob(name())) {
- successFactor = maintain();
- if (successFactor > 0.0)
- jobMetrics.recordCompletionOf(name());
+ if (maintain()) jobMetrics.recordCompletionOf(name());
} catch (UncheckedTimeoutException e) {
if (ignoreCollision) {
jobMetrics.recordCompletionOf(name());
@@ -119,7 +105,7 @@ public abstract class Maintainer implements Runnable {
} catch (Throwable e) {
log.log(Level.WARNING, this + " failed. Will retry in " + interval, e);
} finally {
- jobMetrics.completed(name(), successFactor);
+ jobMetrics.forward(name());
}
log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName());
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
index 01560c050ff..139a2901cd3 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java
@@ -3,8 +3,6 @@ package com.yahoo.concurrent.maintenance;
import org.junit.Test;
-import java.util.concurrent.atomic.AtomicLong;
-
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -21,8 +19,9 @@ public class JobControlTest {
String job1 = "Job1";
String job2 = "Job2";
- TestMaintainer maintainer1 = new TestMaintainer(job1, jobControl, new NoopJobMetrics());
- TestMaintainer maintainer2 = new TestMaintainer(job2, jobControl, new NoopJobMetrics());
+ JobMetrics metrics = new JobMetrics((job, instant) -> {});
+ TestMaintainer maintainer1 = new TestMaintainer(job1, jobControl, metrics);
+ TestMaintainer maintainer2 = new TestMaintainer(job2, jobControl, metrics);
assertEquals(2, jobControl.jobs().size());
assertTrue(jobControl.jobs().contains(job1));
assertTrue(jobControl.jobs().contains(job2));
@@ -63,7 +62,7 @@ public class JobControlTest {
public void testJobControlMayDeactivateJobs() {
JobControlStateMock state = new JobControlStateMock();
JobControl jobControl = new JobControl(state);
- TestMaintainer mockMaintainer = new TestMaintainer(null, jobControl, new NoopJobMetrics());
+ TestMaintainer mockMaintainer = new TestMaintainer(null, jobControl, new JobMetrics((job, instant) -> {}));
assertTrue(jobControl.jobs().contains("TestMaintainer"));
@@ -81,11 +80,4 @@ public class JobControlTest {
assertEquals(2, mockMaintainer.totalRuns());
}
- private static class NoopJobMetrics extends JobMetrics {
-
- @Override
- protected void recordCompletion(String job, Long incompleteRuns, double successFactor) { }
-
- }
-
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
index d2db380f4a1..e881d4b3ff6 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java
@@ -42,45 +42,35 @@ public class MaintainerTest {
@Test
public void success_metric() {
- TestJobMetrics jobMetrics = new TestJobMetrics();
+ AtomicLong consecutiveFailures = new AtomicLong();
+ JobMetrics jobMetrics = new JobMetrics((job, count) -> consecutiveFailures.set(count));
TestMaintainer maintainer = new TestMaintainer(null, jobControl, jobMetrics);
// Maintainer fails twice in a row
maintainer.successOnNextRun(false).run();
- assertEquals(1, jobMetrics.consecutiveFailures.get());
+ assertEquals(1, consecutiveFailures.get());
maintainer.successOnNextRun(false).run();
- assertEquals(2, jobMetrics.consecutiveFailures.get());
+ assertEquals(2, consecutiveFailures.get());
// Maintainer runs successfully
maintainer.successOnNextRun(true).run();
- assertEquals(0, jobMetrics.consecutiveFailures.get());
+ assertEquals(0, consecutiveFailures.get());
// Maintainer runs successfully again
maintainer.run();
- assertEquals(0, jobMetrics.consecutiveFailures.get());
+ assertEquals(0, consecutiveFailures.get());
// Maintainer throws
maintainer.throwOnNextRun(new RuntimeException()).run();
- assertEquals(1, jobMetrics.consecutiveFailures.get());
+ assertEquals(1, consecutiveFailures.get());
// Maintainer recovers
maintainer.throwOnNextRun(null).run();
- assertEquals(0, jobMetrics.consecutiveFailures.get());
+ assertEquals(0, consecutiveFailures.get());
// Lock exception is treated as a failure
maintainer.throwOnNextRun(new UncheckedTimeoutException()).run();
- assertEquals(1, jobMetrics.consecutiveFailures.get());
- }
-
- private static class TestJobMetrics extends JobMetrics {
-
- AtomicLong consecutiveFailures = new AtomicLong();
-
- @Override
- protected void recordCompletion(String job, Long incompleteRuns, double successFactor) {
- consecutiveFailures.set(incompleteRuns);
- }
-
+ assertEquals(1, consecutiveFailures.get());
}
}
diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
index 7424b17cab2..44a00a37a83 100644
--- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
+++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java
@@ -33,10 +33,10 @@ class TestMaintainer extends Maintainer {
}
@Override
- protected double maintain() {
+ protected boolean maintain() {
if (exceptionToThrow != null) throw exceptionToThrow;
totalRuns++;
- return success ? 1.0 : 0.0;
+ return success;
}
}