aboutsummaryrefslogtreecommitdiffstats
path: root/controller-server
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@oath.com>2021-06-04 19:40:59 +0200
committerGitHub <noreply@github.com>2021-06-04 19:40:59 +0200
commitb14315a6f51bc8e5bce22e0d9d11d0e730aaf96d (patch)
tree5f604f87bf9d37bb2d37c161b15c03b73d770dd2 /controller-server
parente304fe27f449fae9cb71d5c969e662e601e61d2b (diff)
parentba36521578a55088c6e38d50b616af85eb33cf19 (diff)
Merge pull request #18113 from vespa-engine/bratseth/maintainer-success-degree
Emit a success factor from maintainers
Diffstat (limited to 'controller-server')
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java37
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java20
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java10
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java35
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java14
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java7
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java6
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java17
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java4
-rw-r--r--controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java6
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java13
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java10
-rw-r--r--controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java10
35 files changed, 167 insertions, 126 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java
index 7d94a4c728f..9ec8e4d1a2d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java
@@ -19,14 +19,14 @@ public class ApplicationMetaDataGarbageCollector extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
try {
controller().applications().applicationStore().pruneMeta(controller().clock().instant().minus(Duration.ofDays(365)));
- return true;
+ return 1.0;
}
catch (Exception e) {
log.log(Level.WARNING, "Exception pruning old application meta data", e);
- return false;
+ return 0.0;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
index 1f20e48edf5..69e0eb26f16 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java
@@ -18,6 +18,7 @@ import com.yahoo.yolean.Exceptions;
import java.time.Duration;
import java.util.HashMap;
import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
/**
@@ -39,15 +40,17 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
- return confirmApplicationOwnerships() &
- ensureConfirmationResponses() &
- updateConfirmedApplicationOwners();
+ protected double maintain() {
+ return ( confirmApplicationOwnerships() +
+ ensureConfirmationResponses() +
+ updateConfirmedApplicationOwners() )
+ / 3;
}
/** File an ownership issue with the owners of all applications we know about. */
- private boolean confirmApplicationOwnerships() {
- AtomicBoolean success = new AtomicBoolean(true);
+ private double confirmApplicationOwnerships() {
+ AtomicInteger attempts = new AtomicInteger(0);
+ AtomicInteger failures = new AtomicInteger(0);
applications()
.withProjectId()
.withProductionDeployment()
@@ -56,6 +59,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
.filter(application -> application.createdAt().isBefore(controller().clock().instant().minus(Duration.ofDays(90))))
.forEach(application -> {
try {
+ attempts.incrementAndGet();
// TODO jvenstad: Makes sense to require, and run this only in main?
tenantOf(application.id()).contact().flatMap(contact -> {
return ownershipIssues.confirmOwnership(application.ownershipIssueId(),
@@ -65,17 +69,17 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
}).ifPresent(newIssueId -> store(newIssueId, application.id()));
}
catch (RuntimeException e) { // Catch errors due to wrong data in the controller, or issues client timeout.
- success.set(false);
+ failures.incrementAndGet();
log.log(Level.INFO, "Exception caught when attempting to file an issue for '" + application.id() + "': " + Exceptions.toMessageString(e));
}
});
- return success.get();
+ return asSuccessFactor(attempts.get(), failures.get());
}
private ApplicationSummary summaryOf(TenantAndApplicationId application) {
var app = applications.requireApplication(application);
var metrics = new HashMap<ZoneId, ApplicationSummary.Metric>();
- for (Instance instance : app.instances().values())
+ for (Instance instance : app.instances().values()) {
for (var kv : instance.deployments().entrySet()) {
var zone = kv.getKey();
var deploymentMetrics = kv.getValue().metrics();
@@ -83,28 +87,31 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
deploymentMetrics.queriesPerSecond(),
deploymentMetrics.writesPerSecond()));
}
+ }
return new ApplicationSummary(app.id().defaultInstance(), app.activity().lastQueried(), app.activity().lastWritten(),
app.latestVersion().flatMap(version -> version.buildTime()), metrics);
}
/** Escalate ownership issues which have not been closed before a defined amount of time has passed. */
- private boolean ensureConfirmationResponses() {
- AtomicBoolean success = new AtomicBoolean(true);
+ private double ensureConfirmationResponses() {
+ AtomicInteger attempts = new AtomicInteger(0);
+ AtomicInteger failures = new AtomicInteger(0);
for (Application application : applications())
application.ownershipIssueId().ifPresent(issueId -> {
try {
+ attempts.incrementAndGet();
Tenant tenant = tenantOf(application.id());
ownershipIssues.ensureResponse(issueId, tenant.contact());
}
catch (RuntimeException e) {
- success.set(false);
+ failures.incrementAndGet();
log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e));
}
});
- return success.get();
+ return asSuccessFactor(attempts.get(), failures.get());
}
- private boolean updateConfirmedApplicationOwners() {
+ private double updateConfirmedApplicationOwners() {
applications()
.withProjectId()
.withProductionDeployment()
@@ -118,7 +125,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer {
controller().applications().store(lockedApplication.withOwner(owner)));
});
});
- return true;
+ return 1.0;
}
private ApplicationList applications() {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java
index 1a9889284e1..b096a853541 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java
@@ -37,8 +37,7 @@ public class ArchiveAccessMaintainer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
-
+ protected double maintain() {
// Count buckets - so we can alert if we get close to the account limit of 1000
zoneRegistry.zones().all().ids().forEach(zoneId ->
metric.set(bucketCountMetricName, archiveBucketDb.buckets(zoneId).size(),
@@ -59,6 +58,7 @@ public class ArchiveAccessMaintainer extends ControllerMaintainer {
)
);
- return true;
+ return 1.0;
}
+
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java
index d2141b097b3..ab8e5efa0bd 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java
@@ -38,7 +38,7 @@ public class ArchiveUriUpdater extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
Map<ZoneId, Set<TenantName>> tenantsByZone = new HashMap<>();
for (var application : applications.asList()) {
for (var instance : application.instances().values()) {
@@ -63,7 +63,7 @@ public class ArchiveUriUpdater extends ControllerMaintainer {
.forEach(tenant -> nodeRepository.removeArchiveUri(zone, tenant));
});
- return true;
+ return 1.0;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java
index 1f360c477b9..14e3e685a8a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java
@@ -43,14 +43,14 @@ public class ChangeRequestMaintainer extends ControllerMaintainer {
@Override
- protected boolean maintain() {
+ protected double maintain() {
var currentChangeRequests = pruneOldChangeRequests();
var changeRequests = changeRequestClient.getChangeRequests(currentChangeRequests);
logger.fine(() -> "Found requests: " + changeRequests);
storeChangeRequests(changeRequests);
- return true;
+ return 1.0;
}
private void storeChangeRequests(List<ChangeRequest> changeRequests) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
index d923db936cb..5acd0c63670 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java
@@ -38,7 +38,7 @@ public class CloudEventReporter extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
for (var region : zonesByCloudNativeRegion.keySet()) {
List<CloudEvent> events = eventFetcher.getEvents(region);
for (var event : events) {
@@ -48,7 +48,7 @@ public class CloudEventReporter extends ControllerMaintainer {
deprovisionAffectedHosts(region, event);
}
}
- return true;
+ return 1.0;
}
/** Deprovision any host affected by given event */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java
index 7b846fa288c..5ee39f7c8f2 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java
@@ -35,12 +35,14 @@ public class ContactInformationMaintainer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
TenantController tenants = controller().tenants();
- boolean success = true;
+ int attempts = 0;
+ int failures = 0;
for (Tenant tenant : tenants.asList()) {
log.log(FINE, () -> "Updating contact information for " + tenant);
try {
+ attempts++;
switch (tenant.type()) {
case athenz:
tenants.lockIfPresent(tenant.name(), LockedTenant.Athenz.class, lockedTenant -> {
@@ -56,13 +58,13 @@ public class ContactInformationMaintainer extends ControllerMaintainer {
throw new IllegalArgumentException("Unexpected tenant type '" + tenant.type() + "'.");
}
} catch (Exception e) {
- success = false;
+ failures++;
log.log(Level.WARNING, "Failed to update contact information for " + tenant + ": " +
Exceptions.toMessageString(e) + ". Retrying in " +
interval());
}
}
- return success;
+ return asSuccessFactor(attempts, failures);
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java
index ff5fc4d2051..f1574381a3d 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java
@@ -34,7 +34,7 @@ public class ContainerImageExpirer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
Instant now = controller().clock().instant();
VersionStatus versionStatus = controller().readVersionStatus();
List<ContainerImage> imagesToExpire = controller().serviceRegistry().containerRegistry().list().stream()
@@ -44,7 +44,7 @@ public class ContainerImageExpirer extends ControllerMaintainer {
log.log(Level.INFO, "Expiring " + imagesToExpire.size() + " container images: " + imagesToExpire);
controller().serviceRegistry().containerRegistry().deleteAll(imagesToExpire);
}
- return true;
+ return 1.0;
}
/** Returns whether given image is expired */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
index 03a6268397e..810c412fcc0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java
@@ -34,7 +34,7 @@ public abstract class ControllerMaintainer extends Maintainer {
public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems) {
super(name, interval, controller.clock().instant(), controller.jobControl(),
- jobMetrics(controller.metric()), controller.curator().cluster(), true);
+ new ControllerJobMetrics(controller.metric()), controller.curator().cluster(), true);
this.controller = controller;
this.activeSystems = Set.copyOf(Objects.requireNonNull(activeSystems));
}
@@ -47,10 +47,20 @@ public abstract class ControllerMaintainer extends Maintainer {
super.run();
}
- private static JobMetrics jobMetrics(Metric metric) {
- return new JobMetrics((job, consecutiveFailures) -> {
- metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job)));
- });
+ private static class ControllerJobMetrics extends JobMetrics {
+
+ private final Metric metric;
+
+ public ControllerJobMetrics(Metric metric) {
+ this.metric = metric;
+ }
+
+ @Override
+ protected void recordCompletion(String job, Long incompleteRuns, double successFactor) {
+ metric.set("maintenance.consecutiveFailures", incompleteRuns, metric.createContext(Map.of("job", job)));
+ metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job)));
+ }
+
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java
index 28b64b5bfe0..21cda09d92a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java
@@ -31,10 +31,10 @@ public class CostReportMaintainer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
var csv = CostCalculator.resourceShareByPropertyToCsv(nodeRepository, controller(), clock, consumer.fixedAllocations());
consumer.consume(csv);
- return true;
+ return 1.0;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java
index e5316788802..9e3da506ca8 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java
@@ -28,8 +28,9 @@ public class DeploymentExpirer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
- boolean success = true;
+ protected double maintain() {
+ int attempts = 0;
+ int failures = 0;
for (Application application : controller().applications().readable()) {
for (Instance instance : application.instances().values())
for (Deployment deployment : instance.deployments().values()) {
@@ -37,16 +38,17 @@ public class DeploymentExpirer extends ControllerMaintainer {
try {
log.log(Level.INFO, "Expiring deployment of " + instance.id() + " in " + deployment.zone());
+ attempts++;
controller().applications().deactivate(instance.id(), deployment.zone());
} catch (Exception e) {
- success = false;
+ failures++;
log.log(Level.WARNING, "Could not expire " + deployment + " of " + instance +
": " + Exceptions.toMessageString(e) + ". Retrying in " +
interval());
}
}
}
- return success;
+ return asSuccessFactor(attempts, failures);
}
/** Returns whether given deployment has expired according to its TTL */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java
index a3070ef55a0..4e53e07f5af 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java
@@ -21,6 +21,7 @@ import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import static com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence.broken;
@@ -45,10 +46,11 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
- return maintainDeploymentIssues(applications()) &
- maintainPlatformIssue(applications()) &
- escalateInactiveDeploymentIssues(applications());
+ protected double maintain() {
+ return ( maintainDeploymentIssues(applications()) +
+ maintainPlatformIssue(applications()) +
+ escalateInactiveDeploymentIssues(applications()))
+ / 3;
}
/** Returns the applications to maintain issue status for. */
@@ -63,7 +65,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
* and store the issue id for the filed issues. Also, clear the issueIds of applications
* where deployment has not failed for this amount of time.
*/
- private boolean maintainDeploymentIssues(List<Application> applications) {
+ private double maintainDeploymentIssues(List<Application> applications) {
List<TenantAndApplicationId> failingApplications = controller().jobController().deploymentStatuses(ApplicationList.from(applications))
.failingApplicationChangeSince(controller().clock().instant().minus(maxFailureAge))
.mapToList(status -> status.application().id());
@@ -73,7 +75,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
fileDeploymentIssueFor(application);
else
store(application.id(), null);
- return true;
+ return 1.0;
}
/**
@@ -81,27 +83,26 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
* applications that have been failing the upgrade to the system version for
* longer than the set grace period, or update this list if the issue already exists.
*/
- private boolean maintainPlatformIssue(List<Application> applications) {
- boolean success = true;
+ private double maintainPlatformIssue(List<Application> applications) {
if (controller().system() == SystemName.cd)
- return success;
+ return 1.0;
VersionStatus versionStatus = controller().readVersionStatus();
Version systemVersion = controller().systemVersion(versionStatus);
if (versionStatus.version(systemVersion).confidence() != broken)
- return success;
+ return 1.0;
DeploymentStatusList statuses = controller().jobController().deploymentStatuses(ApplicationList.from(applications));
if (statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant().minus(upgradeGracePeriod)).isEmpty())
- return success;
+ return 1.0;
List<ApplicationId> failingApplications = statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant())
.mapToList(status -> status.application().id().defaultInstance());
// TODO jonmv: Send only tenant and application, here and elsewhere in this.
deploymentIssues.fileUnlessOpen(failingApplications, systemVersion);
- return success;
+ return 1.0;
}
private Tenant ownerOf(TenantAndApplicationId applicationId) {
@@ -126,21 +127,23 @@ public class DeploymentIssueReporter extends ControllerMaintainer {
}
/** Escalate issues for which there has been no activity for a certain amount of time. */
- private boolean escalateInactiveDeploymentIssues(Collection<Application> applications) {
- AtomicBoolean success = new AtomicBoolean(true);
+ private double escalateInactiveDeploymentIssues(Collection<Application> applications) {
+ AtomicInteger attempts = new AtomicInteger(0);
+ AtomicInteger failures = new AtomicInteger(0);
applications.forEach(application -> application.deploymentIssueId().ifPresent(issueId -> {
try {
+ attempts.incrementAndGet();
Tenant tenant = ownerOf(application.id());
deploymentIssues.escalateIfInactive(issueId,
maxInactivity,
tenant.type() == Tenant.Type.athenz ? tenant.contact() : Optional.empty());
}
catch (RuntimeException e) {
- success.set(false);
+ failures.incrementAndGet();
log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e));
}
}));
- return success.get();
+ return asSuccessFactor(attempts.get(), failures.get());
}
private void store(TenantAndApplicationId id, IssueId issueId) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
index a8214ac8a09..20154c4f122 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java
@@ -44,7 +44,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
AtomicInteger failures = new AtomicInteger(0);
AtomicInteger attempts = new AtomicInteger(0);
AtomicReference<Exception> lastException = new AtomicReference<>(null);
@@ -92,7 +92,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer {
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
- return lastException.get() == null;
+ return asSuccessFactor(attempts.get(), failures.get());
}
static DeploymentMetrics updateDeploymentMetrics(DeploymentMetrics current, List<ClusterMetrics> metrics) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java
index 55a957f0247..85a69b0f338 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java
@@ -54,7 +54,7 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
try {
// In order of importance
deployRefreshedCertificates();
@@ -62,10 +62,10 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer {
deleteUnusedCertificates();
} catch (Exception e) {
log.log(LogLevel.ERROR, "Exception caught while maintaining endpoint certificates", e);
- return false;
+ return 0.0;
}
- return true;
+ return 1.0;
}
private void updateRefreshedCertificates() {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java
index 83ccda422e6..10e6f9eb039 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java
@@ -38,7 +38,7 @@ public class HostInfoUpdater extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
Map<String, NodeEntity> nodeEntities = controller().serviceRegistry().entityService().listNodes().stream()
.collect(Collectors.toMap(NodeEntity::hostname,
Function.identity()));
@@ -62,7 +62,7 @@ public class HostInfoUpdater extends ControllerMaintainer {
LOG.info("Updated information for " + hostsUpdated + " hosts(s)");
}
}
- return true;
+ return 1.0;
}
private static Optional<String> buildModelName(NodeEntity nodeEntity) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
index 9859d12510a..5101de73a33 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java
@@ -39,23 +39,28 @@ public abstract class InfrastructureUpgrader<VERSION> extends ControllerMaintain
}
@Override
- protected boolean maintain() {
- targetVersion().ifPresent(target -> upgradeAll(target, managedApplications));
- return true;
+ protected double maintain() {
+ if (targetVersion().isEmpty()) return 1.0;
+ return upgradeAll(targetVersion().get(), managedApplications);
}
/** Deploy a list of system applications until they converge on the given version */
- private void upgradeAll(VERSION target, List<SystemApplication> applications) {
+ private double upgradeAll(VERSION target, List<SystemApplication> applications) {
+ int attempts = 0;
+ int failures = 0;
for (List<ZoneApi> zones : upgradePolicy.asList()) {
boolean converged = true;
for (ZoneApi zone : zones) {
try {
+ attempts++;
converged &= upgradeAll(target, applications, zone);
} catch (UnreachableNodeRepositoryException e) {
+ failures++;
converged = false;
log.warning(String.format("%s: Failed to communicate with node repository in %s, continuing with next parallel zone: %s",
this, zone, Exceptions.toMessageString(e)));
} catch (Exception e) {
+ failures++;
converged = false;
log.warning(String.format("%s: Failed to upgrade zone: %s, continuing with next parallel zone: %s",
this, zone, Exceptions.toMessageString(e)));
@@ -65,6 +70,7 @@ public abstract class InfrastructureUpgrader<VERSION> extends ControllerMaintain
break;
}
}
+ return asSuccessFactor(attempts, failures);
}
/** Returns whether all applications have converged to the target version in zone */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
index b84cfe5af9b..25207b733f0 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java
@@ -49,10 +49,10 @@ public class JobRunner extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
jobs.active().forEach(this::advance);
jobs.collectGarbage();
- return true;
+ return 1.0;
}
@Override
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
index b26b94f0b28..3f65c2e49cd 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java
@@ -73,7 +73,7 @@ public class MetricsReporter extends ControllerMaintainer {
}
@Override
- public boolean maintain() {
+ public double maintain() {
reportDeploymentMetrics();
reportRemainingRotations();
reportQueuedNameServiceRequests();
@@ -82,7 +82,7 @@ public class MetricsReporter extends ControllerMaintainer {
reportAuditLog();
reportBrokenSystemVersion(versionStatus);
reportTenantMetrics();
- return true;
+ return 1.0;
}
private void reportBrokenSystemVersion(VersionStatus versionStatus) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java
index e57affdc15d..fe20db00e05 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java
@@ -37,13 +37,12 @@ public class NameServiceDispatcher extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
- boolean success = true;
+ protected double maintain() {
try (var lock = db.lockNameServiceQueue()) {
var queue = db.readNameServiceQueue();
var instant = clock.instant();
var remaining = queue.dispatchTo(nameService, requestCount);
- if (queue == remaining) return success; // Queue unchanged
+ if (queue == remaining) return 1.0; // Queue unchanged
var dispatched = queue.first(requestCount);
if (!dispatched.requests().isEmpty()) {
@@ -54,7 +53,7 @@ public class NameServiceDispatcher extends ControllerMaintainer {
}
db.writeNameServiceQueue(remaining);
}
- return success;
+ return 1.0;
}
private static int requestCount(SystemName system) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
index e1618f05a7d..666d1c3b23a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java
@@ -42,13 +42,13 @@ public class OsUpgradeScheduler extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
for (var cloud : supportedClouds()) {
Optional<Version> newTarget = newTargetIn(cloud);
if (newTarget.isEmpty()) continue;
controller().upgradeOsIn(cloud, newTarget.get(), upgradeBudget(), false);
}
- return true;
+ return 1.0;
}
/** Returns the new target version for given cloud, if any */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
index cbd9207fda4..271dd277e1c 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java
@@ -18,16 +18,16 @@ public class OsVersionStatusUpdater extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
try {
OsVersionStatus newStatus = OsVersionStatus.compute(controller());
controller().updateOsVersionStatus(newStatus);
- return true;
+ return 1.0;
} catch (Exception e) {
log.log(Level.WARNING, "Failed to compute OS version status: " + Exceptions.toMessageString(e) +
". Retrying in " + interval());
}
- return false;
+ return 0.0;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java
index a032f266de5..9d93ac719b7 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java
@@ -19,13 +19,13 @@ public class OutstandingChangeDeployer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
for (Application application : ApplicationList.from(controller().applications().readable())
.withProductionDeployment()
.withDeploymentSpec()
.asList())
controller().applications().deploymentTrigger().triggerNewRevision(application.id());
- return true;
+ return 1.0;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
index a626f21359a..ffe958cb63a 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java
@@ -17,9 +17,9 @@ public class ReadyJobsTrigger extends ControllerMaintainer {
}
@Override
- public boolean maintain() {
+ public double maintain() {
controller().applications().deploymentTrigger().triggerReadyJobs();
- return true;
+ return 1.0;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java
index 263a33cf266..0bd74c844ae 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java
@@ -40,7 +40,7 @@ public class ReindexingTriggerer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
try {
Instant now = controller().clock().instant();
for (Application application : controller().applications().asList())
@@ -51,11 +51,11 @@ public class ReindexingTriggerer extends ControllerMaintainer {
&& reindexingIsReady(controller().applications().applicationReindexing(id, deployment.zone()), now))
controller().applications().reindex(id, deployment.zone(), List.of(), List.of(), true);
});
- return true;
+ return 1.0;
}
catch (RuntimeException e) {
log.log(Level.WARNING, "Failed to trigger reindexing: " + Exceptions.toMessageString(e));
- return false;
+ return 0.0;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
index aed2e637e4b..39ad233ce46 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java
@@ -79,19 +79,19 @@ public class ResourceMeterMaintainer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
Collection<ResourceSnapshot> resourceSnapshots;
try {
resourceSnapshots = getAllResourceSnapshots();
} catch (Exception e) {
log.log(Level.WARNING, "Failed to collect resource snapshots. Retrying in " + interval() + ". Error: " +
Exceptions.toMessageString(e));
- return false;
+ return 0.0;
}
if (systemName.isPublic()) reportResourceSnapshots(resourceSnapshots);
updateDeploymentCost(resourceSnapshots);
- return true;
+ return 1.0;
}
void updateDeploymentCost(Collection<ResourceSnapshot> resourceSnapshots) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java
index c7bf7e765ed..ab988bcf0ac 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java
@@ -28,7 +28,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer {
}
@Override
- public boolean maintain() {
+ public double maintain() {
controller().zoneRegistry().zones()
.ofCloud(CloudName.from("aws"))
.reachable()
@@ -38,7 +38,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer {
if (taggedResources > 0)
log.log(Level.INFO, "Tagged " + taggedResources + " resources in " + zone.getId());
});
- return true;
+ return 1.0;
}
private Map<HostName, Optional<ApplicationId>> getTenantOfParentHosts(ZoneId zoneId) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java
index 3b0a1fca4af..e40d772a673 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java
@@ -21,14 +21,14 @@ public class SystemRoutingPolicyMaintainer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
for (var zone : controller().zoneRegistry().zones().all().ids()) {
for (var application : SystemApplication.values()) {
if (!application.hasEndpoint()) continue;
controller().routing().policies().refresh(application.id(), DeploymentSpec.empty, zone);
}
}
- return true;
+ return 1.0;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java
index 1265d687850..637ae10bcc6 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java
@@ -23,7 +23,7 @@ public class TenantRoleMaintainer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
var roleService = controller().serviceRegistry().roleService();
var tenants = controller().tenants().asList();
var tenantsWithRoles = tenants.stream()
@@ -31,7 +31,7 @@ public class TenantRoleMaintainer extends ControllerMaintainer {
.filter(this::hasProductionDeployment)
.collect(Collectors.toList());
roleService.maintainRoles(tenantsWithRoles);
- return true;
+ return 1.0;
}
private boolean hasProductionDeployment(TenantName tenant) {
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
index fbe9faa9754..0af0d01478b 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java
@@ -36,30 +36,34 @@ public class TrafficShareUpdater extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
- boolean success = false;
+ protected double maintain() {
Exception lastException = null;
+ int attempts = 0;
+ int failures = 0;
for (var application : applications.asList()) {
for (var instance : application.instances().values()) {
for (var deployment : instance.deployments().values()) {
if ( ! deployment.zone().environment().isProduction()) continue;
try {
- success |= updateTrafficFraction(instance, deployment);
+ attempts++;
+ updateTrafficFraction(instance, deployment);
}
catch (Exception e) {
// Some failures due to locked applications are expected and benign
+ failures++;
lastException = e;
}
}
}
}
- if ( ! success && lastException != null) // log on complete failure
+ double successFactor = asSuccessFactor(attempts, failures);
+ if ( successFactor == 0 )
log.log(Level.WARNING, "Could not update traffic share on any applications", lastException);
- return success;
+ return successFactor;
}
- private boolean updateTrafficFraction(Instance instance, Deployment deployment) {
+ private void updateTrafficFraction(Instance instance, Deployment deployment) {
double qpsInZone = deployment.metrics().queriesPerSecond();
double totalQps = instance.deployments().values().stream()
.filter(i -> i.zone().environment().isProduction())
@@ -73,7 +77,6 @@ public class TrafficShareUpdater extends ControllerMaintainer {
maxReadShare = currentReadShare; // distribution can be incorrect
nodeRepository.patchApplication(deployment.zone(), instance.id(), currentReadShare, maxReadShare);
- return true;
}
}
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
index 8d5019904fa..2326f7b66ee 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java
@@ -51,7 +51,7 @@ public class Upgrader extends ControllerMaintainer {
* Schedule application upgrades. Note that this implementation must be idempotent.
*/
@Override
- public boolean maintain() {
+ public double maintain() {
// Determine target versions for each upgrade policy
VersionStatus versionStatus = controller().readVersionStatus();
Version canaryTarget = controller().systemVersion(versionStatus);
@@ -91,7 +91,7 @@ public class Upgrader extends ControllerMaintainer {
upgrade(instances.with(UpgradePolicy.canary), canaryTarget, targetMajorVersion, instances.size());
defaultTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.defaultPolicy), target, targetMajorVersion, numberOfApplicationsToUpgrade()));
conservativeTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.conservative), target, targetMajorVersion, numberOfApplicationsToUpgrade()));
- return true;
+ return 1.0;
}
/** Returns the target versions for given confidence, one per major version in the system */
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java
index fedf3d90760..4cd24289676 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java
@@ -57,7 +57,7 @@ public class VCMRMaintainer extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
var changeRequests = curator.readChangeRequests()
.stream()
.filter(shouldUpdate())
@@ -81,7 +81,7 @@ public class VCMRMaintainer extends ControllerMaintainer {
});
}
});
- return true;
+ return 1.0;
}
/**
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java
index a3e9672b715..e4866c43f13 100644
--- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java
+++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java
@@ -29,7 +29,7 @@ public class VersionStatusUpdater extends ControllerMaintainer {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
try {
VersionStatus newStatus = VersionStatus.compute(controller());
controller().updateVersionStatus(newStatus);
@@ -37,12 +37,12 @@ public class VersionStatusUpdater extends ControllerMaintainer {
controller().serviceRegistry().systemMonitor().reportSystemVersion(version.versionNumber(),
convert(version.confidence()));
});
- return true;
+ return 1.0;
} catch (Exception e) {
log.log(Level.WARNING, "Failed to compute version status: " + Exceptions.toMessageString(e) +
". Retrying in " + interval());
}
- return false;
+ return 0.0;
}
static SystemMonitor.Confidence convert(VespaVersion.Confidence confidence) {
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
index 27b4f3744e7..7dc5cb34818 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java
@@ -39,13 +39,16 @@ public class ControllerMaintainerTest {
TestControllerMaintainer maintainer = new TestControllerMaintainer(tester.controller(), SystemName.main, new AtomicInteger());
maintainer.run();
assertEquals(0L, consecutiveFailuresMetric());
+ assertEquals(1.0, successFactorMetric(), 0.0000001);
maintainer.success = false;
maintainer.run();
maintainer.run();
assertEquals(2L, consecutiveFailuresMetric());
+ assertEquals(0.0, successFactorMetric(), 0.0000001);
maintainer.success = true;
maintainer.run();
assertEquals(0, consecutiveFailuresMetric());
+ assertEquals(1.0, successFactorMetric(), 0.0000001);
}
private long consecutiveFailuresMetric() {
@@ -54,6 +57,12 @@ public class ControllerMaintainerTest {
"maintenance.consecutiveFailures").get().longValue();
}
+ private long successFactorMetric() {
+ MetricsMock metrics = (MetricsMock) tester.controller().metric();
+ return metrics.getMetric((context) -> "TestControllerMaintainer".equals(context.get("job")),
+ "maintenance.successFactor").get().longValue();
+ }
+
private static class TestControllerMaintainer extends ControllerMaintainer {
private final AtomicInteger executions;
@@ -65,9 +74,9 @@ public class ControllerMaintainerTest {
}
@Override
- protected boolean maintain() {
+ protected double maintain() {
executions.incrementAndGet();
- return success;
+ return success ? 1.0 : 0.0;
}
}
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java
index 66bda66bbf9..ce219b8beed 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java
@@ -33,7 +33,7 @@ public class EndpointCertificateMaintainerTest {
@Test
public void old_and_unused_cert_is_deleted() {
tester.curator().writeEndpointCertificateMetadata(ApplicationId.defaultId(), exampleMetadata);
- assertTrue(maintainer.maintain());
+ assertEquals(1.0, maintainer.maintain(), 0.0000001);
assertTrue(tester.curator().readEndpointCertificateMetadata(ApplicationId.defaultId()).isEmpty());
}
@@ -41,7 +41,7 @@ public class EndpointCertificateMaintainerTest {
public void unused_but_recently_used_cert_is_not_deleted() {
EndpointCertificateMetadata recentlyRequestedCert = exampleMetadata.withLastRequested(tester.clock().instant().minusSeconds(3600).getEpochSecond());
tester.curator().writeEndpointCertificateMetadata(ApplicationId.defaultId(), recentlyRequestedCert);
- assertTrue(maintainer.maintain());
+ assertEquals(1.0, maintainer.maintain(), 0.0000001);
assertEquals(Optional.of(recentlyRequestedCert), tester.curator().readEndpointCertificateMetadata(ApplicationId.defaultId()));
}
@@ -53,7 +53,7 @@ public class EndpointCertificateMaintainerTest {
secretStore.setSecret(exampleMetadata.keyName(), "foo", 1);
secretStore.setSecret(exampleMetadata.certName(), "bar", 1);
- assertTrue(maintainer.maintain());
+ assertEquals(1.0, maintainer.maintain(), 0.0000001);
var updatedCert = Optional.of(recentlyRequestedCert.withLastRefreshed(tester.clock().instant().getEpochSecond()).withVersion(1));
@@ -77,7 +77,7 @@ public class EndpointCertificateMaintainerTest {
tester.curator().writeEndpointCertificateMetadata(appId, exampleMetadata);
- assertTrue(maintainer.maintain());
+ assertEquals(1.0, maintainer.maintain(), 0.0000001);
assertTrue(tester.curator().readEndpointCertificateMetadata(appId).isPresent()); // cert should not be deleted, the app is deployed!
}
@@ -97,7 +97,7 @@ public class EndpointCertificateMaintainerTest {
tester.curator().writeEndpointCertificateMetadata(appId, exampleMetadata);
- assertTrue(maintainer.maintain());
+ assertEquals(1.0, maintainer.maintain(), 0.0000001);
assertTrue(tester.curator().readEndpointCertificateMetadata(appId).isPresent()); // cert should not be deleted, the app is deployed!
tester.clock().advance(Duration.ofDays(3));
diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
index 2afa3a0faea..7b4882de3ff 100644
--- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
+++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java
@@ -39,7 +39,7 @@ public class TrafficShareUpdaterTest {
// Single zone
setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester);
deploymentMetricsMaintainer.maintain();
- assertTrue(updater.maintain());
+ assertEquals(1.0, updater.maintain(), 0.0000001);
assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester);
// Two zones
@@ -48,14 +48,14 @@ public class TrafficShareUpdaterTest {
setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester);
setQpsMetric(0.0, application.application().id().defaultInstance(), prod2, tester);
deploymentMetricsMaintainer.maintain();
- assertTrue(updater.maintain());
+ assertEquals(1.0, updater.maintain(), 0.0000001);
assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester);
assertTrafficFraction(0.0, 1.0, application.instanceId(), prod2, tester);
// - both hot
setQpsMetric(53.0, application.application().id().defaultInstance(), prod1, tester);
setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester);
deploymentMetricsMaintainer.maintain();
- assertTrue(updater.maintain());
+ assertEquals(1.0, updater.maintain(), 0.0000001);
assertTrafficFraction(0.53, 1.0, application.instanceId(), prod1, tester);
assertTrafficFraction(0.47, 1.0, application.instanceId(), prod2, tester);
@@ -66,7 +66,7 @@ public class TrafficShareUpdaterTest {
setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester);
setQpsMetric(0.0, application.application().id().defaultInstance(), prod3, tester);
deploymentMetricsMaintainer.maintain();
- assertTrue(updater.maintain());
+ assertEquals(1.0, updater.maintain(), 0.0000001);
assertTrafficFraction(0.53, 0.53, application.instanceId(), prod1, tester);
assertTrafficFraction(0.47, 0.50, application.instanceId(), prod2, tester);
assertTrafficFraction(0.00, 0.50, application.instanceId(), prod3, tester);
@@ -75,7 +75,7 @@ public class TrafficShareUpdaterTest {
setQpsMetric(25.0, application.application().id().defaultInstance(), prod2, tester);
setQpsMetric(25.0, application.application().id().defaultInstance(), prod3, tester);
deploymentMetricsMaintainer.maintain();
- assertTrue(updater.maintain());
+ assertEquals(1.0, updater.maintain(), 0.0000001);
assertTrafficFraction(0.50, 0.5, application.instanceId(), prod1, tester);
assertTrafficFraction(0.25, 0.5, application.instanceId(), prod2, tester);
assertTrafficFraction(0.25, 0.5, application.instanceId(), prod3, tester);