diff options
Diffstat (limited to 'controller-server')
22 files changed, 102 insertions, 48 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java index 786819d9442..1f20e48edf5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java @@ -17,6 +17,7 @@ import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.util.HashMap; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; /** @@ -38,14 +39,15 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { } @Override - protected void maintain() { - confirmApplicationOwnerships(); - ensureConfirmationResponses(); - updateConfirmedApplicationOwners(); + protected boolean maintain() { + return confirmApplicationOwnerships() & + ensureConfirmationResponses() & + updateConfirmedApplicationOwners(); } /** File an ownership issue with the owners of all applications we know about. */ - private void confirmApplicationOwnerships() { + private boolean confirmApplicationOwnerships() { + AtomicBoolean success = new AtomicBoolean(true); applications() .withProjectId() .withProductionDeployment() @@ -63,10 +65,11 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { }).ifPresent(newIssueId -> store(newIssueId, application.id())); } catch (RuntimeException e) { // Catch errors due to wrong data in the controller, or issues client timeout. + success.set(false); log.log(Level.INFO, "Exception caught when attempting to file an issue for '" + application.id() + "': " + Exceptions.toMessageString(e)); } }); - + return success.get(); } private ApplicationSummary summaryOf(TenantAndApplicationId application) { @@ -85,7 +88,8 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { } /** Escalate ownership issues which have not been closed before a defined amount of time has passed. */ - private void ensureConfirmationResponses() { + private boolean ensureConfirmationResponses() { + AtomicBoolean success = new AtomicBoolean(true); for (Application application : applications()) application.ownershipIssueId().ifPresent(issueId -> { try { @@ -93,12 +97,14 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { ownershipIssues.ensureResponse(issueId, tenant.contact()); } catch (RuntimeException e) { + success.set(false); log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } }); + return success.get(); } - private void updateConfirmedApplicationOwners() { + private boolean updateConfirmedApplicationOwners() { applications() .withProjectId() .withProductionDeployment() @@ -112,6 +118,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { controller().applications().store(lockedApplication.withOwner(owner))); }); }); + return true; } private ApplicationList applications() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java index 4b96bd404ee..10e5431dac1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java @@ -24,6 +24,7 @@ import java.util.stream.Collectors; * Automatically fetches and handles scheduled events from AWS: * 1. Deprovisions the affected hosts if applicable * 2. Submits an issue detailing the event if some hosts are not processed by 1. + * * @author mgimle */ public class CloudEventReporter extends ControllerMaintainer { @@ -44,8 +45,7 @@ public class CloudEventReporter extends ControllerMaintainer { } @Override - protected void maintain() { - log.log(Level.INFO, "Fetching events for cloud hosts."); + protected boolean maintain() { for (var awsRegion : zonesByCloudNativeRegion.keySet()) { List<CloudEvent> events = eventFetcher.getEvents(awsRegion); for (var event : events) { @@ -56,6 +56,7 @@ public class CloudEventReporter extends ControllerMaintainer { submitIssue(event, deprovisionedHosts); } } + return true; } private List<String> deprovisionHosts(String awsRegion, CloudEvent event) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java index 4aba8d881bf..e19f3b4f9a2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java @@ -35,8 +35,9 @@ public class ContactInformationMaintainer extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { TenantController tenants = controller().tenants(); + boolean success = true; for (Tenant tenant : tenants.asList()) { log.log(INFO, "Updating contact information for " + tenant); try { @@ -55,11 +56,13 @@ public class ContactInformationMaintainer extends ControllerMaintainer { throw new IllegalArgumentException("Unexpected tenant type '" + tenant.type() + "'."); } } catch (Exception e) { + success = false; log.log(Level.WARNING, "Failed to update contact information for " + tenant + ": " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } } + return success; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java index 2b7c78f96d0..76003a873fe 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java @@ -1,12 +1,16 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; +import com.yahoo.concurrent.maintenance.JobMetrics; import com.yahoo.concurrent.maintenance.Maintainer; import com.yahoo.config.provision.SystemName; +import com.yahoo.jdisc.Metric; import com.yahoo.vespa.hosted.controller.Controller; +import java.time.Clock; import java.time.Duration; import java.util.EnumSet; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.logging.Logger; @@ -30,7 +34,8 @@ public abstract class ControllerMaintainer extends Maintainer { } public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems) { - super(name, interval, controller.clock().instant(), controller.jobControl(), controller.curator().cluster()); + super(name, interval, controller.clock().instant(), controller.jobControl(), + jobMetrics(controller.clock(), controller.metric()), controller.curator().cluster()); this.controller = controller; this.activeSystems = Set.copyOf(Objects.requireNonNull(activeSystems)); } @@ -43,4 +48,11 @@ public abstract class ControllerMaintainer extends Maintainer { super.run(); } + private static JobMetrics jobMetrics(Clock clock, Metric metric) { + return new JobMetrics(clock, (job, instant) -> { + Duration sinceSuccess = Duration.between(instant, clock.instant()); + metric.set("maintenance.secondsSinceSuccess", sinceSuccess.getSeconds(), metric.createContext(Map.of("job", job))); + }); + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java index d028a88fb92..28b64b5bfe0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java @@ -31,9 +31,10 @@ public class CostReportMaintainer extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { var csv = CostCalculator.resourceShareByPropertyToCsv(nodeRepository, controller(), clock, consumer.fixedAllocations()); consumer.consume(csv); + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java index bb2161bca1d..7bd2c737fcb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; -import com.yahoo.concurrent.maintenance.JobControl; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.Instance; @@ -24,7 +23,8 @@ public class DeploymentExpirer extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { + boolean success = true; for (Application application : controller().applications().readable()) for (Instance instance : application.instances().values()) for (Deployment deployment : instance.deployments().values()) { @@ -34,11 +34,13 @@ public class DeploymentExpirer extends ControllerMaintainer { log.log(Level.INFO, "Expiring deployment of " + instance.id() + " in " + deployment.zone()); controller().applications().deactivate(instance.id(), deployment.zone()); } catch (Exception e) { + success = false; log.log(Level.WARNING, "Could not expire " + deployment + " of " + instance + ": " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } } + return success; } /** Returns whether given deployment has expired according to its TTL */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java index 89f1e0fe840..a94e7407898 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java @@ -2,7 +2,6 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.component.Version; -import com.yahoo.concurrent.maintenance.JobControl; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.SystemName; import com.yahoo.vespa.hosted.controller.Application; @@ -20,6 +19,7 @@ import java.time.Duration; import java.util.Collection; import java.util.List; import java.util.Optional; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; import static com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence.broken; @@ -44,10 +44,10 @@ public class DeploymentIssueReporter extends ControllerMaintainer { } @Override - protected void maintain() { - maintainDeploymentIssues(applications()); - maintainPlatformIssue(applications()); - escalateInactiveDeploymentIssues(applications()); + protected boolean maintain() { + return maintainDeploymentIssues(applications()) & + maintainPlatformIssue(applications()) & + escalateInactiveDeploymentIssues(applications()); } /** Returns the applications to maintain issue status for. */ @@ -62,7 +62,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer { * and store the issue id for the filed issues. Also, clear the issueIds of applications * where deployment has not failed for this amount of time. */ - private void maintainDeploymentIssues(List<Application> applications) { + private boolean maintainDeploymentIssues(List<Application> applications) { List<TenantAndApplicationId> failingApplications = controller().jobController().deploymentStatuses(ApplicationList.from(applications)) .failingApplicationChangeSince(controller().clock().instant().minus(maxFailureAge)) .mapToList(status -> status.application().id()); @@ -72,6 +72,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer { fileDeploymentIssueFor(application); else store(application.id(), null); + return true; } /** @@ -79,24 +80,26 @@ public class DeploymentIssueReporter extends ControllerMaintainer { * applications that have been failing the upgrade to the system version for * longer than the set grace period, or update this list if the issue already exists. */ - private void maintainPlatformIssue(List<Application> applications) { + private boolean maintainPlatformIssue(List<Application> applications) { + boolean success = true; if (controller().system() == SystemName.cd) - return; + return success; Version systemVersion = controller().systemVersion(); if ((controller().versionStatus().version(systemVersion).confidence() != broken)) - return; + return success; DeploymentStatusList statuses = controller().jobController().deploymentStatuses(ApplicationList.from(applications)); if (statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant().minus(upgradeGracePeriod)).isEmpty()) - return; + return success; List<ApplicationId> failingApplications = statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant()) .mapToList(status -> status.application().id().defaultInstance()); // TODO jonmv: Send only tenant and application, here and elsewhere in this. deploymentIssues.fileUnlessOpen(failingApplications, systemVersion); + return success; } private Tenant ownerOf(TenantAndApplicationId applicationId) { @@ -121,7 +124,8 @@ public class DeploymentIssueReporter extends ControllerMaintainer { } /** Escalate issues for which there has been no activity for a certain amount of time. */ - private void escalateInactiveDeploymentIssues(Collection<Application> applications) { + private boolean escalateInactiveDeploymentIssues(Collection<Application> applications) { + AtomicBoolean success = new AtomicBoolean(true); applications.forEach(application -> application.deploymentIssueId().ifPresent(issueId -> { try { Tenant tenant = ownerOf(application.id()); @@ -130,9 +134,11 @@ public class DeploymentIssueReporter extends ControllerMaintainer { tenant.type() == Tenant.Type.athenz ? tenant.contact() : Optional.empty()); } catch (RuntimeException e) { + success.set(false); log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } })); + return success.get(); } private void store(TenantAndApplicationId id, IssueId issueId) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java index c03be2ca1d1..c8416578932 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java @@ -1,7 +1,6 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; -import com.yahoo.concurrent.maintenance.JobControl; import com.yahoo.config.provision.SystemName; import com.yahoo.vespa.hosted.controller.ApplicationController; import com.yahoo.vespa.hosted.controller.Controller; @@ -39,7 +38,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { AtomicInteger failures = new AtomicInteger(0); AtomicInteger attempts = new AtomicInteger(0); AtomicReference<Exception> lastException = new AtomicReference<>(null); @@ -91,6 +90,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer { } catch (InterruptedException e) { throw new RuntimeException(e); } + return lastException.get() == null; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java index 7006458538d..7952355d5fb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java @@ -35,8 +35,9 @@ public abstract class InfrastructureUpgrader<VERSION> extends ControllerMaintain } @Override - protected void maintain() { + protected boolean maintain() { targetVersion().ifPresent(target -> upgradeAll(target, SystemApplication.all())); + return true; } /** Deploy a list of system applications until they converge on the given version */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index cfe9257bdf8..e0f2f0718ef 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -48,9 +48,10 @@ public class JobRunner extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { jobs.active().forEach(this::advance); jobs.collectGarbage(); + return true; } @Override diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java index cc4a8c628eb..0c5ef123eef 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java @@ -68,12 +68,13 @@ public class MetricsReporter extends ControllerMaintainer { } @Override - public void maintain() { + public boolean maintain() { reportDeploymentMetrics(); reportRemainingRotations(); reportQueuedNameServiceRequests(); reportInfrastructureUpgradeMetrics(); reportAuditLog(); + return true; } private void reportAuditLog() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java index 9febc73a5a7..e223809a211 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java @@ -38,12 +38,13 @@ public class NameServiceDispatcher extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { + boolean success = true; try (var lock = db.lockNameServiceQueue()) { var queue = db.readNameServiceQueue(); var instant = clock.instant(); var remaining = queue.dispatchTo(nameService, requestCount); - if (queue == remaining) return; // Queue unchanged + if (queue == remaining) return success; // Queue unchanged var dispatched = queue.first(requestCount); if (!dispatched.requests().isEmpty()) { @@ -53,6 +54,7 @@ public class NameServiceDispatcher extends ControllerMaintainer { } db.writeNameServiceQueue(remaining); } + return success; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java index a62b1745145..20febfaea1d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java @@ -1,7 +1,6 @@ // Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; -import com.yahoo.concurrent.maintenance.JobControl; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.versions.OsVersionStatus; import com.yahoo.yolean.Exceptions; @@ -19,14 +18,16 @@ public class OsVersionStatusUpdater extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { try { OsVersionStatus newStatus = OsVersionStatus.compute(controller()); controller().updateOsVersionStatus(newStatus); + return true; } catch (Exception e) { log.log(Level.WARNING, "Failed to compute version status: " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } + return false; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java index 5dd62251759..a032f266de5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java @@ -19,12 +19,13 @@ public class OutstandingChangeDeployer extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { for (Application application : ApplicationList.from(controller().applications().readable()) .withProductionDeployment() .withDeploymentSpec() .asList()) controller().applications().deploymentTrigger().triggerNewRevision(application.id()); + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java index 32b65f05cac..a626f21359a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java @@ -1,7 +1,6 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; -import com.yahoo.concurrent.maintenance.JobControl; import com.yahoo.vespa.hosted.controller.Controller; import java.time.Duration; @@ -18,8 +17,9 @@ public class ReadyJobsTrigger extends ControllerMaintainer { } @Override - public void maintain() { + public boolean maintain() { controller().applications().deploymentTrigger().triggerReadyJobs(); + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java index 76a186a2f6b..f460561df08 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java @@ -50,13 +50,15 @@ public class ResourceMeterMaintainer extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { try { collectResourceSnapshots(); + return true; } catch (Exception e) { log.log(Level.WARNING, "Failed to collect resource snapshots. Retrying in " + interval() + ". Error: " + Exceptions.toMessageString(e)); } + return false; } private void collectResourceSnapshots() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java index 31434de472d..863302223ac 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java @@ -1,7 +1,6 @@ // Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; -import com.yahoo.concurrent.maintenance.JobControl; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudName; import com.yahoo.config.provision.HostName; @@ -27,7 +26,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer { } @Override - public void maintain() { + public boolean maintain() { controller().zoneRegistry().zones() .ofCloud(CloudName.from("aws")) .reachable() @@ -37,8 +36,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer { if (taggedResources > 0) log.log(Level.INFO, "Tagged " + taggedResources + " resources in " + zone.getId()); }); - - + return true; } private Map<HostName, ApplicationId> getTenantOfParentHosts(ZoneId zoneId) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RotationStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RotationStatusUpdater.java index 245747a882f..935bcbec597 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RotationStatusUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/RotationStatusUpdater.java @@ -41,7 +41,7 @@ public class RotationStatusUpdater extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { var failures = new AtomicInteger(0); var attempts = new AtomicInteger(0); var lastException = new AtomicReference<Exception>(null); @@ -78,6 +78,7 @@ public class RotationStatusUpdater extends ControllerMaintainer { } catch (InterruptedException e) { throw new RuntimeException(e); } + return lastException.get() == null; } private RotationStatus getStatus(Instance instance) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java index 0fe6f7e0bfb..3b0a1fca4af 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java @@ -21,13 +21,14 @@ public class SystemRoutingPolicyMaintainer extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { for (var zone : controller().zoneRegistry().zones().all().ids()) { for (var application : SystemApplication.values()) { if (!application.hasEndpoint()) continue; controller().routing().policies().refresh(application.id(), DeploymentSpec.empty, zone); } } + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 5f0f2e4ba4e..9ab2b0e77e8 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -51,7 +51,7 @@ public class Upgrader extends ControllerMaintainer { * Schedule application upgrades. Note that this implementation must be idempotent. */ @Override - public void maintain() { + public boolean maintain() { // Determine target versions for each upgrade policy Version canaryTarget = controller().systemVersion(); Collection<Version> defaultTargets = targetVersions(Confidence.normal); @@ -89,6 +89,7 @@ public class Upgrader extends ControllerMaintainer { upgrade(instances.with(UpgradePolicy.canary), canaryTarget, instances.size()); defaultTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.defaultPolicy), target, numberOfApplicationsToUpgrade())); conservativeTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.conservative), target, numberOfApplicationsToUpgrade())); + return true; } /** Returns the target versions for given confidence, one per major version in the system */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java index d8b74a4ae99..a3e9672b715 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java @@ -29,7 +29,7 @@ public class VersionStatusUpdater extends ControllerMaintainer { } @Override - protected void maintain() { + protected boolean maintain() { try { VersionStatus newStatus = VersionStatus.compute(controller()); controller().updateVersionStatus(newStatus); @@ -37,10 +37,12 @@ public class VersionStatusUpdater extends ControllerMaintainer { controller().serviceRegistry().systemMonitor().reportSystemVersion(version.versionNumber(), convert(version.confidence())); }); + return true; } catch (Exception e) { log.log(Level.WARNING, "Failed to compute version status: " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } + return false; } static SystemMonitor.Confidence convert(VespaVersion.Confidence confidence) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java index 1151fdd07f0..4218e66703f 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.hosted.controller.maintenance; import com.yahoo.config.provision.SystemName; import com.yahoo.vespa.hosted.controller.ControllerTester; +import com.yahoo.vespa.hosted.controller.integration.MetricsMock; import org.junit.Before; import org.junit.Test; @@ -32,12 +33,21 @@ public class ControllerMaintainerTest { assertEquals(1, executions.get()); } + @Test + public void records_metric() { + maintainerIn(SystemName.main, new AtomicInteger()).run(); + MetricsMock metrics = (MetricsMock) tester.controller().metric(); + assertEquals(0L, metrics.getMetric((context) -> "MockMaintainer".equals(context.get("job")), + "maintenance.secondsSinceSuccess").get()); + } + private ControllerMaintainer maintainerIn(SystemName system, AtomicInteger executions) { return new ControllerMaintainer(tester.controller(), Duration.ofDays(1), "MockMaintainer", EnumSet.of(system)) { @Override - protected void maintain() { + protected boolean maintain() { executions.incrementAndGet(); + return true; } }; } |