diff options
67 files changed, 255 insertions, 378 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java index 003b4fbb345..5519ffc1bdc 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ApplicationPackageMaintainer.java @@ -48,9 +48,8 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer { } @Override - protected double maintain() { - int attempts = 0; - int failures = 0; + protected boolean maintain() { + boolean success = true; try (var fileDownloader = new FileDownloader(connectionPool, downloadDirectory)) { for (var applicationId : applicationRepository.listApplications()) { @@ -63,12 +62,11 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer { log.fine(() -> "Verifying application package file reference " + applicationPackage + " for session " + sessionId); if (applicationPackage != null) { - attempts++; if (! fileReferenceExistsOnDisk(downloadDirectory, applicationPackage)) { log.fine(() -> "Downloading missing application package for application " + applicationId + " - session " + sessionId); if (fileDownloader.getFile(applicationPackage).isEmpty()) { - failures++; + success = false; log.warning("Failed to download application package for application " + applicationId + " - session " + sessionId); continue; } @@ -77,7 +75,7 @@ public class ApplicationPackageMaintainer extends ConfigServerMaintainer { } } } - return asSuccessFactor(attempts, failures); + return success; } @Override diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java index e0f0a4b4099..4938f34131e 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ConfigServerMaintainer.java @@ -35,24 +35,14 @@ public abstract class ConfigServerMaintainer extends Maintainer { ConfigServerMaintainer(ApplicationRepository applicationRepository, Curator curator, FlagSource flagSource, Instant now, Duration interval) { super(null, interval, now, new JobControl(new JobControlFlags(curator, flagSource)), - new ConfigServerJobMetrics(applicationRepository.metric()), cluster(curator), false); + jobMetrics(applicationRepository.metric()), cluster(curator), false); this.applicationRepository = applicationRepository; } - private static class ConfigServerJobMetrics extends JobMetrics { - - private final Metric metric; - - public ConfigServerJobMetrics(Metric metric) { - this.metric = metric; - } - - @Override - protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) { + private static JobMetrics jobMetrics(Metric metric) { + return new JobMetrics((job, consecutiveFailures) -> { metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job))); - metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job))); - } - + }); } private static class JobControlFlags implements JobControlState { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java index ca8db30c21f..b0876fb57e8 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/FileDistributionMaintainer.java @@ -33,9 +33,9 @@ public class FileDistributionMaintainer extends ConfigServerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { applicationRepository.deleteUnusedFiledistributionReferences(fileReferencesDir, maxUnusedFileReferenceAge); - return 1.0; + return true; } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java index af9ea917aaf..971c2c20ae9 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/ReindexingMaintainer.java @@ -22,7 +22,6 @@ import java.util.Comparator; import java.util.Map; import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; import java.util.logging.Level; @@ -52,9 +51,8 @@ public class ReindexingMaintainer extends ConfigServerMaintainer { } @Override - protected double maintain() { - AtomicInteger attempts = new AtomicInteger(0); - AtomicInteger failures = new AtomicInteger(0); + protected boolean maintain() { + AtomicBoolean success = new AtomicBoolean(true); for (Tenant tenant : applicationRepository.tenantRepository().getAllTenants()) { ApplicationCuratorDatabase database = tenant.getApplicationRepo().database(); for (ApplicationId id : database.activeApplications()) @@ -62,7 +60,6 @@ public class ReindexingMaintainer extends ConfigServerMaintainer { .map(application -> application.getForVersionOrLatest(Optional.empty(), clock.instant())) .ifPresent(application -> { try { - attempts.incrementAndGet(); applicationRepository.modifyReindexing(id, reindexing -> { reindexing = withNewReady(reindexing, lazyGeneration(application), clock.instant()); reindexing = withOnlyCurrentData(reindexing, application); @@ -71,11 +68,11 @@ public class ReindexingMaintainer extends ConfigServerMaintainer { } catch (RuntimeException e) { log.log(Level.INFO, "Failed to update reindexing status for " + id + ": " + Exceptions.toMessageString(e)); - failures.incrementAndGet(); + success.set(false); } }); } - return asSuccessFactor(attempts.get(), failures.get()); + return success.get(); } private Supplier<Long> lazyGeneration(Application application) { diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java index 1f85dd4579d..7482980e221 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/SessionsMaintainer.java @@ -25,7 +25,7 @@ public class SessionsMaintainer extends ConfigServerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { if (iteration % 10 == 0) log.log(Level.INFO, () -> "Running " + SessionsMaintainer.class.getSimpleName() + ", iteration " + iteration); @@ -38,7 +38,7 @@ public class SessionsMaintainer extends ConfigServerMaintainer { } iteration++; - return 1.0; + return true; } } diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java index 0a7df2c9d21..7c01045ee72 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/maintenance/TenantsMaintainer.java @@ -31,12 +31,12 @@ public class TenantsMaintainer extends ConfigServerMaintainer { } @Override - protected double maintain() { - if ( ! applicationRepository.configserverConfig().hostedVespa()) return 1.0; + protected boolean maintain() { + if ( ! applicationRepository.configserverConfig().hostedVespa()) return true; Set<TenantName> tenants = applicationRepository.deleteUnusedTenants(ttlForUnusedTenant, clock.instant()); if (tenants.size() > 0) log.log(Level.INFO, "Deleted tenants " + tenants); - return 1.0; + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java index 9ec8e4d1a2d..7d94a4c728f 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationMetaDataGarbageCollector.java @@ -19,14 +19,14 @@ public class ApplicationMetaDataGarbageCollector extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { try { controller().applications().applicationStore().pruneMeta(controller().clock().instant().minus(Duration.ofDays(365))); - return 1.0; + return true; } catch (Exception e) { log.log(Level.WARNING, "Exception pruning old application meta data", e); - return 0.0; + return false; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java index 69e0eb26f16..1f20e48edf5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ApplicationOwnershipConfirmer.java @@ -18,7 +18,6 @@ import com.yahoo.yolean.Exceptions; import java.time.Duration; import java.util.HashMap; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; /** @@ -40,17 +39,15 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { } @Override - protected double maintain() { - return ( confirmApplicationOwnerships() + - ensureConfirmationResponses() + - updateConfirmedApplicationOwners() ) - / 3; + protected boolean maintain() { + return confirmApplicationOwnerships() & + ensureConfirmationResponses() & + updateConfirmedApplicationOwners(); } /** File an ownership issue with the owners of all applications we know about. */ - private double confirmApplicationOwnerships() { - AtomicInteger attempts = new AtomicInteger(0); - AtomicInteger failures = new AtomicInteger(0); + private boolean confirmApplicationOwnerships() { + AtomicBoolean success = new AtomicBoolean(true); applications() .withProjectId() .withProductionDeployment() @@ -59,7 +56,6 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { .filter(application -> application.createdAt().isBefore(controller().clock().instant().minus(Duration.ofDays(90)))) .forEach(application -> { try { - attempts.incrementAndGet(); // TODO jvenstad: Makes sense to require, and run this only in main? tenantOf(application.id()).contact().flatMap(contact -> { return ownershipIssues.confirmOwnership(application.ownershipIssueId(), @@ -69,17 +65,17 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { }).ifPresent(newIssueId -> store(newIssueId, application.id())); } catch (RuntimeException e) { // Catch errors due to wrong data in the controller, or issues client timeout. - failures.incrementAndGet(); + success.set(false); log.log(Level.INFO, "Exception caught when attempting to file an issue for '" + application.id() + "': " + Exceptions.toMessageString(e)); } }); - return asSuccessFactor(attempts.get(), failures.get()); + return success.get(); } private ApplicationSummary summaryOf(TenantAndApplicationId application) { var app = applications.requireApplication(application); var metrics = new HashMap<ZoneId, ApplicationSummary.Metric>(); - for (Instance instance : app.instances().values()) { + for (Instance instance : app.instances().values()) for (var kv : instance.deployments().entrySet()) { var zone = kv.getKey(); var deploymentMetrics = kv.getValue().metrics(); @@ -87,31 +83,28 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { deploymentMetrics.queriesPerSecond(), deploymentMetrics.writesPerSecond())); } - } return new ApplicationSummary(app.id().defaultInstance(), app.activity().lastQueried(), app.activity().lastWritten(), app.latestVersion().flatMap(version -> version.buildTime()), metrics); } /** Escalate ownership issues which have not been closed before a defined amount of time has passed. */ - private double ensureConfirmationResponses() { - AtomicInteger attempts = new AtomicInteger(0); - AtomicInteger failures = new AtomicInteger(0); + private boolean ensureConfirmationResponses() { + AtomicBoolean success = new AtomicBoolean(true); for (Application application : applications()) application.ownershipIssueId().ifPresent(issueId -> { try { - attempts.incrementAndGet(); Tenant tenant = tenantOf(application.id()); ownershipIssues.ensureResponse(issueId, tenant.contact()); } catch (RuntimeException e) { - failures.incrementAndGet(); + success.set(false); log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } }); - return asSuccessFactor(attempts.get(), failures.get()); + return success.get(); } - private double updateConfirmedApplicationOwners() { + private boolean updateConfirmedApplicationOwners() { applications() .withProjectId() .withProductionDeployment() @@ -125,7 +118,7 @@ public class ApplicationOwnershipConfirmer extends ControllerMaintainer { controller().applications().store(lockedApplication.withOwner(owner))); }); }); - return 1.0; + return true; } private ApplicationList applications() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java index b096a853541..1a9889284e1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveAccessMaintainer.java @@ -37,7 +37,8 @@ public class ArchiveAccessMaintainer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { + // Count buckets - so we can alert if we get close to the account limit of 1000 zoneRegistry.zones().all().ids().forEach(zoneId -> metric.set(bucketCountMetricName, archiveBucketDb.buckets(zoneId).size(), @@ -58,7 +59,6 @@ public class ArchiveAccessMaintainer extends ControllerMaintainer { ) ); - return 1.0; + return true; } - } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java index ab8e5efa0bd..d2141b097b3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ArchiveUriUpdater.java @@ -38,7 +38,7 @@ public class ArchiveUriUpdater extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { Map<ZoneId, Set<TenantName>> tenantsByZone = new HashMap<>(); for (var application : applications.asList()) { for (var instance : application.instances().values()) { @@ -63,7 +63,7 @@ public class ArchiveUriUpdater extends ControllerMaintainer { .forEach(tenant -> nodeRepository.removeArchiveUri(zone, tenant)); }); - return 1.0; + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java index 14e3e685a8a..1f360c477b9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java @@ -43,14 +43,14 @@ public class ChangeRequestMaintainer extends ControllerMaintainer { @Override - protected double maintain() { + protected boolean maintain() { var currentChangeRequests = pruneOldChangeRequests(); var changeRequests = changeRequestClient.getChangeRequests(currentChangeRequests); logger.fine(() -> "Found requests: " + changeRequests); storeChangeRequests(changeRequests); - return 1.0; + return true; } private void storeChangeRequests(List<ChangeRequest> changeRequests) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java index 5acd0c63670..d923db936cb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java @@ -38,7 +38,7 @@ public class CloudEventReporter extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { for (var region : zonesByCloudNativeRegion.keySet()) { List<CloudEvent> events = eventFetcher.getEvents(region); for (var event : events) { @@ -48,7 +48,7 @@ public class CloudEventReporter extends ControllerMaintainer { deprovisionAffectedHosts(region, event); } } - return 1.0; + return true; } /** Deprovision any host affected by given event */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java index 5ee39f7c8f2..7b846fa288c 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContactInformationMaintainer.java @@ -35,14 +35,12 @@ public class ContactInformationMaintainer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { TenantController tenants = controller().tenants(); - int attempts = 0; - int failures = 0; + boolean success = true; for (Tenant tenant : tenants.asList()) { log.log(FINE, () -> "Updating contact information for " + tenant); try { - attempts++; switch (tenant.type()) { case athenz: tenants.lockIfPresent(tenant.name(), LockedTenant.Athenz.class, lockedTenant -> { @@ -58,13 +56,13 @@ public class ContactInformationMaintainer extends ControllerMaintainer { throw new IllegalArgumentException("Unexpected tenant type '" + tenant.type() + "'."); } } catch (Exception e) { - failures++; + success = false; log.log(Level.WARNING, "Failed to update contact information for " + tenant + ": " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } } - return asSuccessFactor(attempts, failures); + return success; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java index f1574381a3d..ff5fc4d2051 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ContainerImageExpirer.java @@ -34,7 +34,7 @@ public class ContainerImageExpirer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { Instant now = controller().clock().instant(); VersionStatus versionStatus = controller().readVersionStatus(); List<ContainerImage> imagesToExpire = controller().serviceRegistry().containerRegistry().list().stream() @@ -44,7 +44,7 @@ public class ContainerImageExpirer extends ControllerMaintainer { log.log(Level.INFO, "Expiring " + imagesToExpire.size() + " container images: " + imagesToExpire); controller().serviceRegistry().containerRegistry().deleteAll(imagesToExpire); } - return 1.0; + return true; } /** Returns whether given image is expired */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java index 810c412fcc0..03a6268397e 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainer.java @@ -34,7 +34,7 @@ public abstract class ControllerMaintainer extends Maintainer { public ControllerMaintainer(Controller controller, Duration interval, String name, Set<SystemName> activeSystems) { super(name, interval, controller.clock().instant(), controller.jobControl(), - new ControllerJobMetrics(controller.metric()), controller.curator().cluster(), true); + jobMetrics(controller.metric()), controller.curator().cluster(), true); this.controller = controller; this.activeSystems = Set.copyOf(Objects.requireNonNull(activeSystems)); } @@ -47,20 +47,10 @@ public abstract class ControllerMaintainer extends Maintainer { super.run(); } - private static class ControllerJobMetrics extends JobMetrics { - - private final Metric metric; - - public ControllerJobMetrics(Metric metric) { - this.metric = metric; - } - - @Override - protected void recordCompletion(String job, Long incompleteRuns, double successFactor) { - metric.set("maintenance.consecutiveFailures", incompleteRuns, metric.createContext(Map.of("job", job))); - metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job))); - } - + private static JobMetrics jobMetrics(Metric metric) { + return new JobMetrics((job, consecutiveFailures) -> { + metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job))); + }); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java index 21cda09d92a..28b64b5bfe0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CostReportMaintainer.java @@ -31,10 +31,10 @@ public class CostReportMaintainer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { var csv = CostCalculator.resourceShareByPropertyToCsv(nodeRepository, controller(), clock, consumer.fixedAllocations()); consumer.consume(csv); - return 1.0; + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java index 9e3da506ca8..e5316788802 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentExpirer.java @@ -28,9 +28,8 @@ public class DeploymentExpirer extends ControllerMaintainer { } @Override - protected double maintain() { - int attempts = 0; - int failures = 0; + protected boolean maintain() { + boolean success = true; for (Application application : controller().applications().readable()) { for (Instance instance : application.instances().values()) for (Deployment deployment : instance.deployments().values()) { @@ -38,17 +37,16 @@ public class DeploymentExpirer extends ControllerMaintainer { try { log.log(Level.INFO, "Expiring deployment of " + instance.id() + " in " + deployment.zone()); - attempts++; controller().applications().deactivate(instance.id(), deployment.zone()); } catch (Exception e) { - failures++; + success = false; log.log(Level.WARNING, "Could not expire " + deployment + " of " + instance + ": " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } } } - return asSuccessFactor(attempts, failures); + return success; } /** Returns whether given deployment has expired according to its TTL */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java index 4e53e07f5af..a3070ef55a0 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentIssueReporter.java @@ -21,7 +21,6 @@ import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import static com.yahoo.vespa.hosted.controller.versions.VespaVersion.Confidence.broken; @@ -46,11 +45,10 @@ public class DeploymentIssueReporter extends ControllerMaintainer { } @Override - protected double maintain() { - return ( maintainDeploymentIssues(applications()) + - maintainPlatformIssue(applications()) + - escalateInactiveDeploymentIssues(applications())) - / 3; + protected boolean maintain() { + return maintainDeploymentIssues(applications()) & + maintainPlatformIssue(applications()) & + escalateInactiveDeploymentIssues(applications()); } /** Returns the applications to maintain issue status for. */ @@ -65,7 +63,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer { * and store the issue id for the filed issues. Also, clear the issueIds of applications * where deployment has not failed for this amount of time. */ - private double maintainDeploymentIssues(List<Application> applications) { + private boolean maintainDeploymentIssues(List<Application> applications) { List<TenantAndApplicationId> failingApplications = controller().jobController().deploymentStatuses(ApplicationList.from(applications)) .failingApplicationChangeSince(controller().clock().instant().minus(maxFailureAge)) .mapToList(status -> status.application().id()); @@ -75,7 +73,7 @@ public class DeploymentIssueReporter extends ControllerMaintainer { fileDeploymentIssueFor(application); else store(application.id(), null); - return 1.0; + return true; } /** @@ -83,26 +81,27 @@ public class DeploymentIssueReporter extends ControllerMaintainer { * applications that have been failing the upgrade to the system version for * longer than the set grace period, or update this list if the issue already exists. */ - private double maintainPlatformIssue(List<Application> applications) { + private boolean maintainPlatformIssue(List<Application> applications) { + boolean success = true; if (controller().system() == SystemName.cd) - return 1.0; + return success; VersionStatus versionStatus = controller().readVersionStatus(); Version systemVersion = controller().systemVersion(versionStatus); if (versionStatus.version(systemVersion).confidence() != broken) - return 1.0; + return success; DeploymentStatusList statuses = controller().jobController().deploymentStatuses(ApplicationList.from(applications)); if (statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant().minus(upgradeGracePeriod)).isEmpty()) - return 1.0; + return success; List<ApplicationId> failingApplications = statuses.failingUpgradeToVersionSince(systemVersion, controller().clock().instant()) .mapToList(status -> status.application().id().defaultInstance()); // TODO jonmv: Send only tenant and application, here and elsewhere in this. deploymentIssues.fileUnlessOpen(failingApplications, systemVersion); - return 1.0; + return success; } private Tenant ownerOf(TenantAndApplicationId applicationId) { @@ -127,23 +126,21 @@ public class DeploymentIssueReporter extends ControllerMaintainer { } /** Escalate issues for which there has been no activity for a certain amount of time. */ - private double escalateInactiveDeploymentIssues(Collection<Application> applications) { - AtomicInteger attempts = new AtomicInteger(0); - AtomicInteger failures = new AtomicInteger(0); + private boolean escalateInactiveDeploymentIssues(Collection<Application> applications) { + AtomicBoolean success = new AtomicBoolean(true); applications.forEach(application -> application.deploymentIssueId().ifPresent(issueId -> { try { - attempts.incrementAndGet(); Tenant tenant = ownerOf(application.id()); deploymentIssues.escalateIfInactive(issueId, maxInactivity, tenant.type() == Tenant.Type.athenz ? tenant.contact() : Optional.empty()); } catch (RuntimeException e) { - failures.incrementAndGet(); + success.set(false); log.log(Level.INFO, "Exception caught when attempting to escalate issue with id '" + issueId + "': " + Exceptions.toMessageString(e)); } })); - return asSuccessFactor(attempts.get(), failures.get()); + return success.get(); } private void store(TenantAndApplicationId id, IssueId issueId) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java index 20154c4f122..a8214ac8a09 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainer.java @@ -44,7 +44,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { AtomicInteger failures = new AtomicInteger(0); AtomicInteger attempts = new AtomicInteger(0); AtomicReference<Exception> lastException = new AtomicReference<>(null); @@ -92,7 +92,7 @@ public class DeploymentMetricsMaintainer extends ControllerMaintainer { } catch (InterruptedException e) { throw new RuntimeException(e); } - return asSuccessFactor(attempts.get(), failures.get()); + return lastException.get() == null; } static DeploymentMetrics updateDeploymentMetrics(DeploymentMetrics current, List<ClusterMetrics> metrics) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java index 85a69b0f338..55a957f0247 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainer.java @@ -54,7 +54,7 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { try { // In order of importance deployRefreshedCertificates(); @@ -62,10 +62,10 @@ public class EndpointCertificateMaintainer extends ControllerMaintainer { deleteUnusedCertificates(); } catch (Exception e) { log.log(LogLevel.ERROR, "Exception caught while maintaining endpoint certificates", e); - return 0.0; + return false; } - return 1.0; + return true; } private void updateRefreshedCertificates() { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java index 10e6f9eb039..83ccda422e6 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java @@ -38,7 +38,7 @@ public class HostInfoUpdater extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { Map<String, NodeEntity> nodeEntities = controller().serviceRegistry().entityService().listNodes().stream() .collect(Collectors.toMap(NodeEntity::hostname, Function.identity())); @@ -62,7 +62,7 @@ public class HostInfoUpdater extends ControllerMaintainer { LOG.info("Updated information for " + hostsUpdated + " hosts(s)"); } } - return 1.0; + return true; } private static Optional<String> buildModelName(NodeEntity nodeEntity) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java index 5101de73a33..9859d12510a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/InfrastructureUpgrader.java @@ -39,28 +39,23 @@ public abstract class InfrastructureUpgrader<VERSION> extends ControllerMaintain } @Override - protected double maintain() { - if (targetVersion().isEmpty()) return 1.0; - return upgradeAll(targetVersion().get(), managedApplications); + protected boolean maintain() { + targetVersion().ifPresent(target -> upgradeAll(target, managedApplications)); + return true; } /** Deploy a list of system applications until they converge on the given version */ - private double upgradeAll(VERSION target, List<SystemApplication> applications) { - int attempts = 0; - int failures = 0; + private void upgradeAll(VERSION target, List<SystemApplication> applications) { for (List<ZoneApi> zones : upgradePolicy.asList()) { boolean converged = true; for (ZoneApi zone : zones) { try { - attempts++; converged &= upgradeAll(target, applications, zone); } catch (UnreachableNodeRepositoryException e) { - failures++; converged = false; log.warning(String.format("%s: Failed to communicate with node repository in %s, continuing with next parallel zone: %s", this, zone, Exceptions.toMessageString(e))); } catch (Exception e) { - failures++; converged = false; log.warning(String.format("%s: Failed to upgrade zone: %s, continuing with next parallel zone: %s", this, zone, Exceptions.toMessageString(e))); @@ -70,7 +65,6 @@ public abstract class InfrastructureUpgrader<VERSION> extends ControllerMaintain break; } } - return asSuccessFactor(attempts, failures); } /** Returns whether all applications have converged to the target version in zone */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java index 25207b733f0..b84cfe5af9b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/JobRunner.java @@ -49,10 +49,10 @@ public class JobRunner extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { jobs.active().forEach(this::advance); jobs.collectGarbage(); - return 1.0; + return true; } @Override diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java index 3f65c2e49cd..b26b94f0b28 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporter.java @@ -73,7 +73,7 @@ public class MetricsReporter extends ControllerMaintainer { } @Override - public double maintain() { + public boolean maintain() { reportDeploymentMetrics(); reportRemainingRotations(); reportQueuedNameServiceRequests(); @@ -82,7 +82,7 @@ public class MetricsReporter extends ControllerMaintainer { reportAuditLog(); reportBrokenSystemVersion(versionStatus); reportTenantMetrics(); - return 1.0; + return true; } private void reportBrokenSystemVersion(VersionStatus versionStatus) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java index fe20db00e05..e57affdc15d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/NameServiceDispatcher.java @@ -37,12 +37,13 @@ public class NameServiceDispatcher extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { + boolean success = true; try (var lock = db.lockNameServiceQueue()) { var queue = db.readNameServiceQueue(); var instant = clock.instant(); var remaining = queue.dispatchTo(nameService, requestCount); - if (queue == remaining) return 1.0; // Queue unchanged + if (queue == remaining) return success; // Queue unchanged var dispatched = queue.first(requestCount); if (!dispatched.requests().isEmpty()) { @@ -53,7 +54,7 @@ public class NameServiceDispatcher extends ControllerMaintainer { } db.writeNameServiceQueue(remaining); } - return 1.0; + return success; } private static int requestCount(SystemName system) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java index 666d1c3b23a..e1618f05a7d 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsUpgradeScheduler.java @@ -42,13 +42,13 @@ public class OsUpgradeScheduler extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { for (var cloud : supportedClouds()) { Optional<Version> newTarget = newTargetIn(cloud); if (newTarget.isEmpty()) continue; controller().upgradeOsIn(cloud, newTarget.get(), upgradeBudget(), false); } - return 1.0; + return true; } /** Returns the new target version for given cloud, if any */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java index 271dd277e1c..cbd9207fda4 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OsVersionStatusUpdater.java @@ -18,16 +18,16 @@ public class OsVersionStatusUpdater extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { try { OsVersionStatus newStatus = OsVersionStatus.compute(controller()); controller().updateOsVersionStatus(newStatus); - return 1.0; + return true; } catch (Exception e) { log.log(Level.WARNING, "Failed to compute OS version status: " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } - return 0.0; + return false; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java index 9d93ac719b7..a032f266de5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/OutstandingChangeDeployer.java @@ -19,13 +19,13 @@ public class OutstandingChangeDeployer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { for (Application application : ApplicationList.from(controller().applications().readable()) .withProductionDeployment() .withDeploymentSpec() .asList()) controller().applications().deploymentTrigger().triggerNewRevision(application.id()); - return 1.0; + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java index ffe958cb63a..a626f21359a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReadyJobsTrigger.java @@ -17,9 +17,9 @@ public class ReadyJobsTrigger extends ControllerMaintainer { } @Override - public double maintain() { + public boolean maintain() { controller().applications().deploymentTrigger().triggerReadyJobs(); - return 1.0; + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java index 0bd74c844ae..263a33cf266 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ReindexingTriggerer.java @@ -40,7 +40,7 @@ public class ReindexingTriggerer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { try { Instant now = controller().clock().instant(); for (Application application : controller().applications().asList()) @@ -51,11 +51,11 @@ public class ReindexingTriggerer extends ControllerMaintainer { && reindexingIsReady(controller().applications().applicationReindexing(id, deployment.zone()), now)) controller().applications().reindex(id, deployment.zone(), List.of(), List.of(), true); }); - return 1.0; + return true; } catch (RuntimeException e) { log.log(Level.WARNING, "Failed to trigger reindexing: " + Exceptions.toMessageString(e)); - return 0.0; + return false; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java index 39ad233ce46..aed2e637e4b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java @@ -79,19 +79,19 @@ public class ResourceMeterMaintainer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { Collection<ResourceSnapshot> resourceSnapshots; try { resourceSnapshots = getAllResourceSnapshots(); } catch (Exception e) { log.log(Level.WARNING, "Failed to collect resource snapshots. Retrying in " + interval() + ". Error: " + Exceptions.toMessageString(e)); - return 0.0; + return false; } if (systemName.isPublic()) reportResourceSnapshots(resourceSnapshots); updateDeploymentCost(resourceSnapshots); - return 1.0; + return true; } void updateDeploymentCost(Collection<ResourceSnapshot> resourceSnapshots) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java index ab988bcf0ac..c7bf7e765ed 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java @@ -28,7 +28,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer { } @Override - public double maintain() { + public boolean maintain() { controller().zoneRegistry().zones() .ofCloud(CloudName.from("aws")) .reachable() @@ -38,7 +38,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer { if (taggedResources > 0) log.log(Level.INFO, "Tagged " + taggedResources + " resources in " + zone.getId()); }); - return 1.0; + return true; } private Map<HostName, Optional<ApplicationId>> getTenantOfParentHosts(ZoneId zoneId) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java index e40d772a673..3b0a1fca4af 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainer.java @@ -21,14 +21,14 @@ public class SystemRoutingPolicyMaintainer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { for (var zone : controller().zoneRegistry().zones().all().ids()) { for (var application : SystemApplication.values()) { if (!application.hasEndpoint()) continue; controller().routing().policies().refresh(application.id(), DeploymentSpec.empty, zone); } } - return 1.0; + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java index 637ae10bcc6..1265d687850 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TenantRoleMaintainer.java @@ -23,7 +23,7 @@ public class TenantRoleMaintainer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { var roleService = controller().serviceRegistry().roleService(); var tenants = controller().tenants().asList(); var tenantsWithRoles = tenants.stream() @@ -31,7 +31,7 @@ public class TenantRoleMaintainer extends ControllerMaintainer { .filter(this::hasProductionDeployment) .collect(Collectors.toList()); roleService.maintainRoles(tenantsWithRoles); - return 1.0; + return true; } private boolean hasProductionDeployment(TenantName tenant) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java index 0af0d01478b..fbe9faa9754 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdater.java @@ -36,34 +36,30 @@ public class TrafficShareUpdater extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { + boolean success = false; Exception lastException = null; - int attempts = 0; - int failures = 0; for (var application : applications.asList()) { for (var instance : application.instances().values()) { for (var deployment : instance.deployments().values()) { if ( ! deployment.zone().environment().isProduction()) continue; try { - attempts++; - updateTrafficFraction(instance, deployment); + success |= updateTrafficFraction(instance, deployment); } catch (Exception e) { // Some failures due to locked applications are expected and benign - failures++; lastException = e; } } } } - double successFactor = asSuccessFactor(attempts, failures); - if ( successFactor == 0 ) + if ( ! success && lastException != null) // log on complete failure log.log(Level.WARNING, "Could not update traffic share on any applications", lastException); - return successFactor; + return success; } - private void updateTrafficFraction(Instance instance, Deployment deployment) { + private boolean updateTrafficFraction(Instance instance, Deployment deployment) { double qpsInZone = deployment.metrics().queriesPerSecond(); double totalQps = instance.deployments().values().stream() .filter(i -> i.zone().environment().isProduction()) @@ -77,6 +73,7 @@ public class TrafficShareUpdater extends ControllerMaintainer { maxReadShare = currentReadShare; // distribution can be incorrect nodeRepository.patchApplication(deployment.zone(), instance.id(), currentReadShare, maxReadShare); + return true; } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java index 2326f7b66ee..8d5019904fa 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/Upgrader.java @@ -51,7 +51,7 @@ public class Upgrader extends ControllerMaintainer { * Schedule application upgrades. Note that this implementation must be idempotent. */ @Override - public double maintain() { + public boolean maintain() { // Determine target versions for each upgrade policy VersionStatus versionStatus = controller().readVersionStatus(); Version canaryTarget = controller().systemVersion(versionStatus); @@ -91,7 +91,7 @@ public class Upgrader extends ControllerMaintainer { upgrade(instances.with(UpgradePolicy.canary), canaryTarget, targetMajorVersion, instances.size()); defaultTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.defaultPolicy), target, targetMajorVersion, numberOfApplicationsToUpgrade())); conservativeTargets.forEach(target -> upgrade(instances.with(UpgradePolicy.conservative), target, targetMajorVersion, numberOfApplicationsToUpgrade())); - return 1.0; + return true; } /** Returns the target versions for given confidence, one per major version in the system */ diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java index 4cd24289676..fedf3d90760 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java @@ -57,7 +57,7 @@ public class VCMRMaintainer extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { var changeRequests = curator.readChangeRequests() .stream() .filter(shouldUpdate()) @@ -81,7 +81,7 @@ public class VCMRMaintainer extends ControllerMaintainer { }); } }); - return 1.0; + return true; } /** diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java index e4866c43f13..a3e9672b715 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VersionStatusUpdater.java @@ -29,7 +29,7 @@ public class VersionStatusUpdater extends ControllerMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { try { VersionStatus newStatus = VersionStatus.compute(controller()); controller().updateVersionStatus(newStatus); @@ -37,12 +37,12 @@ public class VersionStatusUpdater extends ControllerMaintainer { controller().serviceRegistry().systemMonitor().reportSystemVersion(version.versionNumber(), convert(version.confidence())); }); - return 1.0; + return true; } catch (Exception e) { log.log(Level.WARNING, "Failed to compute version status: " + Exceptions.toMessageString(e) + ". Retrying in " + interval()); } - return 0.0; + return false; } static SystemMonitor.Confidence convert(VespaVersion.Confidence confidence) { diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java index 7dc5cb34818..27b4f3744e7 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintainerTest.java @@ -39,16 +39,13 @@ public class ControllerMaintainerTest { TestControllerMaintainer maintainer = new TestControllerMaintainer(tester.controller(), SystemName.main, new AtomicInteger()); maintainer.run(); assertEquals(0L, consecutiveFailuresMetric()); - assertEquals(1.0, successFactorMetric(), 0.0000001); maintainer.success = false; maintainer.run(); maintainer.run(); assertEquals(2L, consecutiveFailuresMetric()); - assertEquals(0.0, successFactorMetric(), 0.0000001); maintainer.success = true; maintainer.run(); assertEquals(0, consecutiveFailuresMetric()); - assertEquals(1.0, successFactorMetric(), 0.0000001); } private long consecutiveFailuresMetric() { @@ -57,12 +54,6 @@ public class ControllerMaintainerTest { "maintenance.consecutiveFailures").get().longValue(); } - private long successFactorMetric() { - MetricsMock metrics = (MetricsMock) tester.controller().metric(); - return metrics.getMetric((context) -> "TestControllerMaintainer".equals(context.get("job")), - "maintenance.successFactor").get().longValue(); - } - private static class TestControllerMaintainer extends ControllerMaintainer { private final AtomicInteger executions; @@ -74,9 +65,9 @@ public class ControllerMaintainerTest { } @Override - protected double maintain() { + protected boolean maintain() { executions.incrementAndGet(); - return success ? 1.0 : 0.0; + return success; } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java index ce219b8beed..66bda66bbf9 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/EndpointCertificateMaintainerTest.java @@ -33,7 +33,7 @@ public class EndpointCertificateMaintainerTest { @Test public void old_and_unused_cert_is_deleted() { tester.curator().writeEndpointCertificateMetadata(ApplicationId.defaultId(), exampleMetadata); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertTrue(maintainer.maintain()); assertTrue(tester.curator().readEndpointCertificateMetadata(ApplicationId.defaultId()).isEmpty()); } @@ -41,7 +41,7 @@ public class EndpointCertificateMaintainerTest { public void unused_but_recently_used_cert_is_not_deleted() { EndpointCertificateMetadata recentlyRequestedCert = exampleMetadata.withLastRequested(tester.clock().instant().minusSeconds(3600).getEpochSecond()); tester.curator().writeEndpointCertificateMetadata(ApplicationId.defaultId(), recentlyRequestedCert); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertTrue(maintainer.maintain()); assertEquals(Optional.of(recentlyRequestedCert), tester.curator().readEndpointCertificateMetadata(ApplicationId.defaultId())); } @@ -53,7 +53,7 @@ public class EndpointCertificateMaintainerTest { secretStore.setSecret(exampleMetadata.keyName(), "foo", 1); secretStore.setSecret(exampleMetadata.certName(), "bar", 1); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertTrue(maintainer.maintain()); var updatedCert = Optional.of(recentlyRequestedCert.withLastRefreshed(tester.clock().instant().getEpochSecond()).withVersion(1)); @@ -77,7 +77,7 @@ public class EndpointCertificateMaintainerTest { tester.curator().writeEndpointCertificateMetadata(appId, exampleMetadata); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertTrue(maintainer.maintain()); assertTrue(tester.curator().readEndpointCertificateMetadata(appId).isPresent()); // cert should not be deleted, the app is deployed! } @@ -97,7 +97,7 @@ public class EndpointCertificateMaintainerTest { tester.curator().writeEndpointCertificateMetadata(appId, exampleMetadata); - assertEquals(1.0, maintainer.maintain(), 0.0000001); + assertTrue(maintainer.maintain()); assertTrue(tester.curator().readEndpointCertificateMetadata(appId).isPresent()); // cert should not be deleted, the app is deployed! tester.clock().advance(Duration.ofDays(3)); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java index 7b4882de3ff..2afa3a0faea 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java @@ -39,7 +39,7 @@ public class TrafficShareUpdaterTest { // Single zone setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertTrue(updater.maintain()); assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester); // Two zones @@ -48,14 +48,14 @@ public class TrafficShareUpdaterTest { setQpsMetric(50.0, application.application().id().defaultInstance(), prod1, tester); setQpsMetric(0.0, application.application().id().defaultInstance(), prod2, tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertTrue(updater.maintain()); assertTrafficFraction(1.0, 1.0, application.instanceId(), prod1, tester); assertTrafficFraction(0.0, 1.0, application.instanceId(), prod2, tester); // - both hot setQpsMetric(53.0, application.application().id().defaultInstance(), prod1, tester); setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertTrue(updater.maintain()); assertTrafficFraction(0.53, 1.0, application.instanceId(), prod1, tester); assertTrafficFraction(0.47, 1.0, application.instanceId(), prod2, tester); @@ -66,7 +66,7 @@ public class TrafficShareUpdaterTest { setQpsMetric(47.0, application.application().id().defaultInstance(), prod2, tester); setQpsMetric(0.0, application.application().id().defaultInstance(), prod3, tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertTrue(updater.maintain()); assertTrafficFraction(0.53, 0.53, application.instanceId(), prod1, tester); assertTrafficFraction(0.47, 0.50, application.instanceId(), prod2, tester); assertTrafficFraction(0.00, 0.50, application.instanceId(), prod3, tester); @@ -75,7 +75,7 @@ public class TrafficShareUpdaterTest { setQpsMetric(25.0, application.application().id().defaultInstance(), prod2, tester); setQpsMetric(25.0, application.application().id().defaultInstance(), prod3, tester); deploymentMetricsMaintainer.maintain(); - assertEquals(1.0, updater.maintain(), 0.0000001); + assertTrue(updater.maintain()); assertTrafficFraction(0.50, 0.5, application.instanceId(), prod1, tester); assertTrafficFraction(0.25, 0.5, application.instanceId(), prod2, tester); assertTrafficFraction(0.25, 0.5, application.instanceId(), prod3, tester); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java index 24160c19dfa..9ac1ca2b4c1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ApplicationMaintainer.java @@ -41,9 +41,9 @@ public abstract class ApplicationMaintainer extends NodeRepositoryMaintainer { } @Override - protected final double maintain() { + protected final boolean maintain() { applicationsNeedingMaintenance().forEach(this::deploy); - return 1.0; + return true; } /** Returns the number of deployments that are pending execution */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 3914a0c9f07..05eb878e1b1 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -44,13 +44,14 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { - if ( ! nodeRepository().nodes().isWorking()) return 0.0; + protected boolean maintain() { + if ( ! nodeRepository().nodes().isWorking()) return false; - if ( ! nodeRepository().zone().environment().isAnyOf(Environment.dev, Environment.prod)) return 1.0; + boolean success = true; + if ( ! nodeRepository().zone().environment().isAnyOf(Environment.dev, Environment.prod)) return success; activeNodesByApplication().forEach(this::autoscale); - return 1.0; + return success; } private void autoscale(ApplicationId application, NodeList applicationNodes) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 0eb2038e233..af32c285d99 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -74,13 +74,13 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { try (Mutex lock = nodeRepository().nodes().lockUnallocated()) { NodeList nodes = nodeRepository().nodes().list(); resumeProvisioning(nodes, lock); convergeToCapacity(nodes); } - return 1.0; + return true; } /** Resume provisioning of already provisioned hosts and their children */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java index 25108425e6e..2443a12d198 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java @@ -40,7 +40,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { NodeList expired = nodeRepository().nodes().list(fromState).matching(this::isExpired); if ( ! expired.isEmpty()) { @@ -49,7 +49,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer { } metric.add("expired." + fromState, expired.size(), null); - return 1.0; + return true; } protected boolean isExpired(Node node) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java index 7505ce42668..e98da35aa6a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java @@ -66,7 +66,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { List<Node> remainingNodes = new ArrayList<>(nodeRepository.nodes().list(Node.State.failed) .nodeType(NodeType.tenant, NodeType.host) .asList()); @@ -78,7 +78,7 @@ public class FailedExpirer extends NodeRepositoryMaintainer { recycleIf(remainingNodes, node -> node.allocation().get().membership().cluster().isStateful() && node.history().hasEventBefore(History.Event.Type.failed, clock().instant().minus(statefulExpiry))); - return 1.0; + return true; } /** Recycle the nodes matching condition, and remove those nodes from the nodes list. */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java index 80f74a011c0..caf20463f60 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostEncrypter.java @@ -43,7 +43,7 @@ public class HostEncrypter extends NodeRepositoryMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { Instant now = nodeRepository().clock().instant(); NodeList allNodes = nodeRepository().nodes().list(); for (var nodeType : NodeType.values()) { @@ -51,7 +51,7 @@ public class HostEncrypter extends NodeRepositoryMaintainer { if (upgradingVespa(allNodes, nodeType)) continue; unencryptedHosts(allNodes, nodeType).forEach(host -> encrypt(host, now)); } - return 1.0; + return true; } /** Returns whether any node of given type is currently upgrading its Vespa version */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java index d9f5ea6a7a9..e317333135c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/InfrastructureProvisioner.java @@ -39,9 +39,9 @@ public class InfrastructureProvisioner extends NodeRepositoryMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { infraDeployer.activateAllSupportedInfraApplications(false); - return 1.0; + return true; } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java index ac9d8d6671a..10069fd1a18 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/LoadBalancerExpirer.java @@ -3,7 +3,6 @@ package com.yahoo.vespa.hosted.provision.maintenance; import com.google.common.collect.Sets; import com.yahoo.jdisc.Metric; -import com.yahoo.lang.MutableInteger; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.lb.LoadBalancer; @@ -55,9 +54,9 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { expireReserved(); - return ( removeInactive() + pruneReals() ) / 2; + return removeInactive() & pruneReals(); } /** Move reserved load balancer that have expired to inactive */ @@ -69,8 +68,7 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer { } /** Deprovision inactive load balancers that have expired */ - private double removeInactive() { - MutableInteger attempts = new MutableInteger(0); + private boolean removeInactive() { var failed = new ArrayList<LoadBalancerId>(); var lastException = new AtomicReference<Exception>(); var expiry = nodeRepository().clock().instant().minus(inactiveExpiry); @@ -78,7 +76,6 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer { lb.changedAt().isBefore(expiry) && allocatedNodes(lb.id()).isEmpty(), lb -> { try { - attempts.add(1); log.log(Level.INFO, () -> "Removing expired inactive load balancer " + lb.id()); service.remove(lb.id().application(), lb.id().cluster()); db.removeLoadBalancer(lb.id()); @@ -95,12 +92,11 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer { .collect(Collectors.joining(", ")), interval())); } - return asSuccessFactor(attempts.get(), failed.size()); + return lastException.get() == null; } /** Remove reals from inactive load balancers */ - private double pruneReals() { - var attempts = new MutableInteger(0); + private boolean pruneReals() { var failed = new ArrayList<LoadBalancerId>(); var lastException = new AtomicReference<Exception>(); patchLoadBalancers(lb -> lb.state() == State.inactive, lb -> { @@ -111,7 +107,6 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer { reals.removeIf(real -> !allocatedNodes.contains(real.hostname().value())); if (reals.equals(lb.instance().get().reals())) return; // Nothing to remove try { - attempts.add(1); LOG.log(Level.INFO, () -> "Removing reals from inactive load balancer " + lb.id() + ": " + Sets.difference(lb.instance().get().reals(), reals)); service.create(new LoadBalancerSpec(lb.id().application(), lb.id().cluster(), reals), true); db.writeLoadBalancer(lb.with(lb.instance().map(instance -> instance.withReals(reals)))); @@ -129,7 +124,7 @@ public class LoadBalancerExpirer extends NodeRepositoryMaintainer { interval()), lastException.get()); } - return asSuccessFactor(attempts.get(), failed.size()); + return lastException.get() == null; } /** Patch load balancers matching given filter, while holding lock */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 3990c5099eb..85437b3e78a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -65,7 +65,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { } @Override - public double maintain() { + public boolean maintain() { NodeList nodes = nodeRepository().nodes().list(); ServiceModel serviceModel = serviceMonitor.getServiceModelSnapshot(); @@ -80,7 +80,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { updateRepairTicketMetrics(nodes); updateAllocationMetrics(nodes); updateExclusiveSwitchMetrics(nodes); - return 1.0; + return true; } private void updateAllocationMetrics(NodeList nodes) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index f16459ee8b9..effa41dc69f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -72,21 +72,17 @@ public class NodeFailer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { - if ( ! nodeRepository().nodes().isWorking()) return 0.0; + protected boolean maintain() { + if ( ! nodeRepository().nodes().isWorking()) return false; - int attempts = 0; - int failures = 0; int throttledHostFailures = 0; int throttledNodeFailures = 0; // Ready nodes try (Mutex lock = nodeRepository().nodes().lockUnallocated()) { for (Map.Entry<Node, String> entry : getReadyNodesByFailureReason().entrySet()) { - attempts++; Node node = entry.getKey(); if (throttle(node)) { - failures++; if (node.type().isHost()) throttledHostFailures++; else @@ -100,12 +96,10 @@ public class NodeFailer extends NodeRepositoryMaintainer { // Active nodes for (Map.Entry<Node, String> entry : getActiveNodesByFailureReason().entrySet()) { - attempts++; Node node = entry.getKey(); if (!failAllowedFor(node.type())) continue; if (throttle(node)) { - failures++; if (node.type().isHost()) throttledHostFailures++; else @@ -122,15 +116,11 @@ public class NodeFailer extends NodeRepositoryMaintainer { if ( ! activeNodes.childrenOf(host).isEmpty()) continue; Optional<NodeMutex> locked = Optional.empty(); try { - attempts++; locked = nodeRepository().nodes().lockAndGet(host); if (locked.isEmpty()) continue; nodeRepository().nodes().fail(List.of(locked.get().node()), Agent.NodeFailer, "Host should be failed and have no tenant nodes"); } - catch (Exception e) { - failures++; - } finally { locked.ifPresent(NodeMutex::close); } @@ -140,7 +130,7 @@ public class NodeFailer extends NodeRepositoryMaintainer { metric.set(throttlingActiveMetric, throttlingActive, null); metric.set(throttledHostFailuresMetric, throttledHostFailures, null); metric.set(throttledNodeFailuresMetric, throttledNodeFailures, null); - return asSuccessFactor(attempts, failures); + return throttlingActive == 0; } private Map<Node, String> getReadyNodesByFailureReason() { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java index 37969a30b81..fe2fb5229f9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java @@ -5,7 +5,6 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationLockException; import com.yahoo.config.provision.HostLivenessTracker; import com.yahoo.jdisc.Metric; -import com.yahoo.lang.MutableInteger; import com.yahoo.transaction.Mutex; import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.applicationmodel.ServiceStatus; @@ -49,11 +48,13 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer { } @Override - protected double maintain() { - return ( updateReadyNodeLivenessEvents() + updateActiveNodeDownState() ) / 2; + protected boolean maintain() { + updateReadyNodeLivenessEvents(); + updateActiveNodeDownState(); + return true; } - private double updateReadyNodeLivenessEvents() { + private void updateReadyNodeLivenessEvents() { // Update node last request events through ZooKeeper to collect request to all config servers. // We do this here ("lazily") to avoid writing to zk for each config request. try (Mutex lock = nodeRepository().nodes().lockUnallocated()) { @@ -68,16 +69,13 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer { } } } - return 1.0; } /** * If the node is down (see {@link #allDown}), and there is no "down" history record, we add it. * Otherwise we remove any "down" history record. */ - private double updateActiveNodeDownState() { - var attempts = new MutableInteger(0); - var failures = new MutableInteger(0); + private void updateActiveNodeDownState() { NodeList activeNodes = nodeRepository().nodes().list(Node.State.active); serviceMonitor.getServiceModelSnapshot().getServiceInstancesByHostName().forEach((hostname, serviceInstances) -> { Optional<Node> node = activeNodes.node(hostname.toString()); @@ -92,7 +90,6 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer { try (var lock = nodeRepository().nodes().lock(owner)) { node = getNode(hostname.toString(), owner, lock); // Re-get inside lock if (node.isEmpty()) return; // Node disappeared or changed allocation - attempts.add(1); if (isDown) { recordAsDown(node.get(), lock); } else { @@ -101,10 +98,8 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer { } catch (ApplicationLockException e) { // Fine, carry on with other nodes. We'll try updating this one in the next run log.log(Level.WARNING, "Could not lock " + owner + ": " + Exceptions.toMessageString(e)); - failures.add(1); } }); - return asSuccessFactor(attempts.get(), failures.get()); } /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java index d671900d08c..1ea4577f7fe 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainer.java @@ -33,21 +33,19 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { - int attempts = 0; - var failures = new MutableInteger(0); + protected boolean maintain() { try { + var warnings = new MutableInteger(0); Set<ApplicationId> applications = activeNodesByApplication().keySet(); - if (applications.isEmpty()) return 1.0; + if (applications.isEmpty()) return true; long pauseMs = interval().toMillis() / applications.size() - 1; // spread requests over interval int done = 0; for (ApplicationId application : applications) { - attempts++; metricsFetcher.fetchMetrics(application) .whenComplete((metricsResponse, exception) -> handleResponse(metricsResponse, exception, - failures, + warnings, application)); if (++done < applications.size()) Thread.sleep(pauseMs); @@ -58,22 +56,23 @@ public class NodeMetricsDbMaintainer extends NodeRepositoryMaintainer { nodeRepository().metricsDb().gc(); - return asSuccessFactor(attempts, failures.get()); + // Suppress failures for manual zones for now to avoid noise + return nodeRepository().zone().environment().isManuallyDeployed() || warnings.get() == 0; } catch (InterruptedException e) { - return asSuccessFactor(attempts, failures.get()); + return false; } } private void handleResponse(MetricsResponse response, Throwable exception, - MutableInteger failures, + MutableInteger warnings, ApplicationId application) { if (exception != null) { - if (failures.get() < maxWarningsPerInvocation) + if (warnings.get() < maxWarningsPerInvocation) log.log(Level.WARNING, "Could not update metrics for " + application + ": " + Exceptions.toMessageString(exception)); - failures.add(1); + warnings.add(1); } else if (response != null) { nodeRepository().metricsDb().addNodeMetrics(response.nodeMetrics()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java index c282fcdb7fc..6ee657beadd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java @@ -37,7 +37,7 @@ public class NodeRebooter extends NodeRepositoryMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { // Reboot candidates: Nodes in long-term states, where we know we can safely orchestrate a reboot List<Node> nodesToReboot = nodeRepository().nodes().list(Node.State.active, Node.State.ready).stream() .filter(node -> node.type().isHost()) @@ -46,7 +46,7 @@ public class NodeRebooter extends NodeRepositoryMaintainer { if (!nodesToReboot.isEmpty()) nodeRepository().nodes().reboot(NodeListFilter.from(nodesToReboot)); - return 1.0; + return true; } private boolean shouldReboot(Node node) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java index fe5cd419b31..0a1f6961f9f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintainer.java @@ -25,7 +25,7 @@ public abstract class NodeRepositoryMaintainer extends Maintainer { public NodeRepositoryMaintainer(NodeRepository nodeRepository, Duration interval, Metric metric) { super(null, interval, nodeRepository.clock().instant(), nodeRepository.jobControl(), - new NodeRepositoryJobMetrics(metric), nodeRepository.database().cluster(), true); + jobMetrics(metric), nodeRepository.database().cluster(), true); this.nodeRepository = nodeRepository; } @@ -48,20 +48,10 @@ public abstract class NodeRepositoryMaintainer extends Maintainer { .groupingBy(node -> node.allocation().get().owner()); } - private static class NodeRepositoryJobMetrics extends JobMetrics { - - private final Metric metric; - - public NodeRepositoryJobMetrics(Metric metric) { - this.metric = metric; - } - - @Override - protected void recordCompletion(String job, Long consecutiveFailures, double successFactor) { + private static JobMetrics jobMetrics(Metric metric) { + return new JobMetrics((job, consecutiveFailures) -> { metric.set("maintenance.consecutiveFailures", consecutiveFailures, metric.createContext(Map.of("job", job))); - metric.set("maintenance.successFactor", successFactor, metric.createContext(Map.of("job", job))); - } - + }); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java index 749603a373d..4eba15307cb 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/OsUpgradeActivator.java @@ -23,13 +23,13 @@ public class OsUpgradeActivator extends NodeRepositoryMaintainer { } @Override - protected double maintain() { + protected boolean maintain() { for (var nodeType : NodeType.values()) { if (!nodeType.isHost()) continue; boolean resume = canUpgradeOsOf(nodeType); nodeRepository().osVersions().resumeUpgradeOf(nodeType, resume); } - return 1.0; + return true; } /** Returns whether to allow OS upgrade of nodes of given type */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java index 7bb748c92c9..1543506a78e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java @@ -33,18 +33,19 @@ public class Rebalancer extends NodeMover<Rebalancer.Move> { } @Override - protected double maintain() { - if ( ! nodeRepository().nodes().isWorking()) return 0.0; + protected boolean maintain() { + if ( ! nodeRepository().nodes().isWorking()) return false; - if (nodeRepository().zone().getCloud().dynamicProvisioning()) return 1.0; // Rebalancing not necessary - if (nodeRepository().zone().environment().isTest()) return 1.0; // Short lived deployments; no need to rebalance + boolean success = true; + if (nodeRepository().zone().getCloud().dynamicProvisioning()) return success; // Rebalancing not necessary + if (nodeRepository().zone().environment().isTest()) return success; // Short lived deployments; no need to rebalance // Work with an unlocked snapshot as this can take a long time and full consistency is not needed NodeList allNodes = nodeRepository().nodes().list(); updateSkewMetric(allNodes); - if ( ! zoneIsStable(allNodes)) return 1.0; + if ( ! zoneIsStable(allNodes)) return success; findBestMove(allNodes).execute(true, Agent.Rebalancer, deployer, metric, nodeRepository()); - return 1.0; + return success; } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index 3f5893b368a..f72daf1bc2b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -48,10 +48,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { - int attempts = 0; - int successes = 0; - + protected boolean maintain() { NodeList activeNodes = nodeRepository().nodes().list(Node.State.active); Map<ApplicationId, NodeList> retiredNodesByApplication = activeNodes.retired().groupingBy(node -> node.allocation().get().owner()); for (Map.Entry<ApplicationId, NodeList> entry : retiredNodesByApplication.entrySet()) { @@ -60,19 +57,17 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { List<Node> nodesToRemove = retiredNodes.stream().filter(n -> canRemove(n, activeNodes)).collect(Collectors.toList()); if (nodesToRemove.isEmpty()) continue; - attempts++; try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { if ( ! deployment.isValid()) continue; nodeRepository().nodes().setRemovable(application, nodesToRemove); boolean success = deployment.activate().isPresent(); - if ( ! success) continue; + if ( ! success) return success; String nodeList = nodesToRemove.stream().map(Node::hostname).collect(Collectors.joining(", ")); log.info("Redeployed " + application + " to deactivate retired nodes: " + nodeList); - successes++; } } - return attempts == 0 ? 1.0 : ((double)successes / attempts); + return true; } /** diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java index 888f06a5004..c217580872b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ScalingSuggestionsMaintainer.java @@ -36,16 +36,13 @@ public class ScalingSuggestionsMaintainer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { - if ( ! nodeRepository().zone().environment().isProduction()) return 1.0; + protected boolean maintain() { + if ( ! nodeRepository().zone().environment().isProduction()) return true; - int attempts = 0; int successes = 0; - for (var application : activeNodesByApplication().entrySet()) { - attempts++; + for (var application : activeNodesByApplication().entrySet()) successes += suggest(application.getKey(), application.getValue()); - } - return attempts == 0 ? 1.0 : ((double)successes / attempts); + return successes > 0; } private int suggest(ApplicationId application, NodeList applicationNodes) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java index 0589571e9d8..0307ae13b24 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java @@ -66,11 +66,12 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { } @Override - protected double maintain() { - if ( ! nodeRepository().nodes().isWorking()) return 0.0; + protected boolean maintain() { + if ( ! nodeRepository().nodes().isWorking()) return false; + boolean success = true; // Don't need to maintain spare capacity in dynamically provisioned zones; can provision more on demand. - if (nodeRepository().zone().getCloud().dynamicProvisioning()) return 1.0; + if (nodeRepository().zone().getCloud().dynamicProvisioning()) return success; NodeList allNodes = nodeRepository().nodes().list(); CapacityChecker capacityChecker = new CapacityChecker(allNodes); @@ -79,7 +80,6 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { metric.set("overcommittedHosts", overcommittedHosts.size(), null); retireOvercommitedHosts(allNodes, overcommittedHosts); - boolean success = true; Optional<CapacityChecker.HostFailurePath> failurePath = capacityChecker.worstCaseHostLossLeadingToFailure(); if (failurePath.isPresent()) { int spareHostCapacity = failurePath.get().hostsCausingFailure.size() - 1; @@ -96,7 +96,7 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { } metric.set("spareHostCapacity", spareHostCapacity, null); } - return success ? 1.0 : 0.0; + return success; } private boolean execute(List<Move> mitigation, CapacityChecker.HostFailurePath failurePath) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java index 44890f2f5af..cfab980570d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancer.java @@ -33,14 +33,14 @@ public class SwitchRebalancer extends NodeMover<Move> { } @Override - protected double maintain() { - if (!nodeRepository().nodes().isWorking()) return 0.0; - if (!nodeRepository().zone().environment().isProduction()) return 1.0; + protected boolean maintain() { + if (!nodeRepository().nodes().isWorking()) return false; + if (!nodeRepository().zone().environment().isProduction()) return true; NodeList allNodes = nodeRepository().nodes().list(); // Lockless as strong consistency is not needed - if (!zoneIsStable(allNodes)) return 1.0; + if (!zoneIsStable(allNodes)) return true; findBestMove(allNodes).execute(false, Agent.SwitchRebalancer, deployer, metric, nodeRepository()); - return 1.0; + return true; } @Override diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java index cfe6e4d348d..5b2f7ce91e8 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeMetricsDbMaintainerTest.java @@ -43,7 +43,7 @@ public class NodeMetricsDbMaintainerTest { fetcher, Duration.ofHours(1), new TestMetric()); - assertEquals(maintainer.maintain(), 1.0, 0.0000001); + assertTrue(maintainer.maintain()); List<NodeTimeseries> timeseriesList = tester.nodeRepository().metricsDb().getNodeTimeseries(Duration.ofDays(1), Set.of("host-1.yahoo.com", "host-2.yahoo.com")); assertEquals(2, timeseriesList.size()); diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java index 73a5dc77743..fcc5b8e57a2 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/JobMetrics.java @@ -2,18 +2,25 @@ package com.yahoo.concurrent.maintenance; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiConsumer; /** * Tracks and forwards maintenance job metrics. * * @author mpolden */ -public abstract class JobMetrics { +public class JobMetrics { + + private final BiConsumer<String, Long> metricConsumer; private final ConcurrentHashMap<String, Long> incompleteRuns = new ConcurrentHashMap<>(); - /** Record starting of a run of a job */ - public void starting(String job) { + public JobMetrics(BiConsumer<String, Long> metricConsumer) { + this.metricConsumer = metricConsumer; + } + + /** Record a run for given job */ + public void recordRunOf(String job) { incompleteRuns.merge(job, 1L, Long::sum); } @@ -22,17 +29,12 @@ public abstract class JobMetrics { incompleteRuns.put(job, 0L); } - /** - * Records completion of a run of a job. - * This is guaranteed to always be called once whenever starting has been called. - */ - public void completed(String job, double successFactor) { + /** Forward metrics for given job to metric consumer */ + public void forward(String job) { Long incompleteRuns = this.incompleteRuns.get(job); if (incompleteRuns != null) { - recordCompletion(job, incompleteRuns, successFactor); + metricConsumer.accept(job, incompleteRuns); } } - protected abstract void recordCompletion(String job, Long incompleteRuns, double successFactor); - } diff --git a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java index 2a9e6dda6b6..734c46a2819 100644 --- a/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java +++ b/vespajlib/src/main/java/com/yahoo/concurrent/maintenance/Maintainer.java @@ -83,19 +83,8 @@ public abstract class Maintainer implements Runnable { @Override public final String toString() { return name(); } - /** - * Called once each time this maintenance job should run. - * - * @return the degree to which the run was successful - a number between 0 (no success), to 1 (complete success). - * Note that this indicates whether something is wrong, so e.g if the call did nothing because it should do - * nothing, 1.0 should be returned. - */ - protected abstract double maintain(); - - /** Convenience methods to convert attempts and failures into a success factor */ - protected final double asSuccessFactor(int attempts, int failures) { - return attempts == 0 ? 1.0 : 1 - (double)failures / attempts; - } + /** Called once each time this maintenance job should run. Returns whether the maintenance run was successful */ + protected abstract boolean maintain(); /** Returns the interval at which this job is set to run */ protected Duration interval() { return interval; } @@ -104,12 +93,9 @@ public abstract class Maintainer implements Runnable { public final void lockAndMaintain(boolean force) { if (!force && !jobControl.isActive(name())) return; log.log(Level.FINE, () -> "Running " + this.getClass().getSimpleName()); - jobMetrics.starting(name()); - double successFactor = 0; + jobMetrics.recordRunOf(name()); try (var lock = jobControl.lockJob(name())) { - successFactor = maintain(); - if (successFactor > 0.0) - jobMetrics.recordCompletionOf(name()); + if (maintain()) jobMetrics.recordCompletionOf(name()); } catch (UncheckedTimeoutException e) { if (ignoreCollision) { jobMetrics.recordCompletionOf(name()); @@ -119,7 +105,7 @@ public abstract class Maintainer implements Runnable { } catch (Throwable e) { log.log(Level.WARNING, this + " failed. Will retry in " + interval, e); } finally { - jobMetrics.completed(name(), successFactor); + jobMetrics.forward(name()); } log.log(Level.FINE, () -> "Finished " + this.getClass().getSimpleName()); } diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java index 01560c050ff..139a2901cd3 100644 --- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java +++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/JobControlTest.java @@ -3,8 +3,6 @@ package com.yahoo.concurrent.maintenance; import org.junit.Test; -import java.util.concurrent.atomic.AtomicLong; - import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -21,8 +19,9 @@ public class JobControlTest { String job1 = "Job1"; String job2 = "Job2"; - TestMaintainer maintainer1 = new TestMaintainer(job1, jobControl, new NoopJobMetrics()); - TestMaintainer maintainer2 = new TestMaintainer(job2, jobControl, new NoopJobMetrics()); + JobMetrics metrics = new JobMetrics((job, instant) -> {}); + TestMaintainer maintainer1 = new TestMaintainer(job1, jobControl, metrics); + TestMaintainer maintainer2 = new TestMaintainer(job2, jobControl, metrics); assertEquals(2, jobControl.jobs().size()); assertTrue(jobControl.jobs().contains(job1)); assertTrue(jobControl.jobs().contains(job2)); @@ -63,7 +62,7 @@ public class JobControlTest { public void testJobControlMayDeactivateJobs() { JobControlStateMock state = new JobControlStateMock(); JobControl jobControl = new JobControl(state); - TestMaintainer mockMaintainer = new TestMaintainer(null, jobControl, new NoopJobMetrics()); + TestMaintainer mockMaintainer = new TestMaintainer(null, jobControl, new JobMetrics((job, instant) -> {})); assertTrue(jobControl.jobs().contains("TestMaintainer")); @@ -81,11 +80,4 @@ public class JobControlTest { assertEquals(2, mockMaintainer.totalRuns()); } - private static class NoopJobMetrics extends JobMetrics { - - @Override - protected void recordCompletion(String job, Long incompleteRuns, double successFactor) { } - - } - } diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java index d2db380f4a1..e881d4b3ff6 100644 --- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java +++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/MaintainerTest.java @@ -42,45 +42,35 @@ public class MaintainerTest { @Test public void success_metric() { - TestJobMetrics jobMetrics = new TestJobMetrics(); + AtomicLong consecutiveFailures = new AtomicLong(); + JobMetrics jobMetrics = new JobMetrics((job, count) -> consecutiveFailures.set(count)); TestMaintainer maintainer = new TestMaintainer(null, jobControl, jobMetrics); // Maintainer fails twice in a row maintainer.successOnNextRun(false).run(); - assertEquals(1, jobMetrics.consecutiveFailures.get()); + assertEquals(1, consecutiveFailures.get()); maintainer.successOnNextRun(false).run(); - assertEquals(2, jobMetrics.consecutiveFailures.get()); + assertEquals(2, consecutiveFailures.get()); // Maintainer runs successfully maintainer.successOnNextRun(true).run(); - assertEquals(0, jobMetrics.consecutiveFailures.get()); + assertEquals(0, consecutiveFailures.get()); // Maintainer runs successfully again maintainer.run(); - assertEquals(0, jobMetrics.consecutiveFailures.get()); + assertEquals(0, consecutiveFailures.get()); // Maintainer throws maintainer.throwOnNextRun(new RuntimeException()).run(); - assertEquals(1, jobMetrics.consecutiveFailures.get()); + assertEquals(1, consecutiveFailures.get()); // Maintainer recovers maintainer.throwOnNextRun(null).run(); - assertEquals(0, jobMetrics.consecutiveFailures.get()); + assertEquals(0, consecutiveFailures.get()); // Lock exception is treated as a failure maintainer.throwOnNextRun(new UncheckedTimeoutException()).run(); - assertEquals(1, jobMetrics.consecutiveFailures.get()); - } - - private static class TestJobMetrics extends JobMetrics { - - AtomicLong consecutiveFailures = new AtomicLong(); - - @Override - protected void recordCompletion(String job, Long incompleteRuns, double successFactor) { - consecutiveFailures.set(incompleteRuns); - } - + assertEquals(1, consecutiveFailures.get()); } } diff --git a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java index 7424b17cab2..44a00a37a83 100644 --- a/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java +++ b/vespajlib/src/test/java/com/yahoo/concurrent/maintenance/TestMaintainer.java @@ -33,10 +33,10 @@ class TestMaintainer extends Maintainer { } @Override - protected double maintain() { + protected boolean maintain() { if (exceptionToThrow != null) throw exceptionToThrow; totalRuns++; - return success ? 1.0 : 0.0; + return success; } } |