diff options
author | Øyvind Grønnesby <oyving@verizonmedia.com> | 2021-04-28 12:21:57 +0200 |
---|---|---|
committer | Øyvind Grønnesby <oyving@verizonmedia.com> | 2021-04-28 12:21:57 +0200 |
commit | dd2ef6cfc4d3d6e3735d1cb553f7ae2560a7f1ff (patch) | |
tree | e1ba3a56439bb3c16022b60d2a7ab3534037827e /controller-server | |
parent | a0db2b1020ea53aa356a7547a23d4e1dfaa851c0 (diff) | |
parent | e79af49a3159e5505cd3e5f2605c299d38fe40cd (diff) |
Merge remote-tracking branch 'origin/master' into ogronnesby/billing-api-v2
Diffstat (limited to 'controller-server')
50 files changed, 1876 insertions, 154 deletions
diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java index 69d723edbe8..32063bf9ba5 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/ApplicationController.java @@ -11,6 +11,7 @@ import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.InstanceName; import com.yahoo.config.provision.TenantName; import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.log.LogLevel; import com.yahoo.vespa.athenz.api.AthenzDomain; import com.yahoo.vespa.athenz.api.AthenzIdentity; import com.yahoo.vespa.athenz.api.AthenzPrincipal; @@ -21,7 +22,6 @@ import com.yahoo.vespa.flags.FetchVector; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.flags.StringFlag; -import com.yahoo.vespa.hosted.controller.application.ActivateResult; import com.yahoo.vespa.hosted.controller.api.application.v4.model.DeploymentData; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.identifiers.InstanceId; @@ -35,7 +35,6 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServ import com.yahoo.vespa.hosted.controller.api.integration.configserver.ContainerEndpoint; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Log; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; -import com.yahoo.vespa.hosted.controller.api.integration.configserver.NotFoundException; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationStore; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ArtifactRepository; @@ -43,6 +42,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobId; import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterId; import com.yahoo.vespa.hosted.controller.api.integration.noderepository.RestartFilter; import com.yahoo.vespa.hosted.controller.api.integration.secrets.TenantSecretStore; +import com.yahoo.vespa.hosted.controller.application.ActivateResult; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.ApplicationPackageValidator; import com.yahoo.vespa.hosted.controller.application.Deployment; @@ -58,6 +58,8 @@ import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; import com.yahoo.vespa.hosted.controller.deployment.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.deployment.RunStatus; +import com.yahoo.vespa.hosted.controller.notification.Notification; +import com.yahoo.vespa.hosted.controller.notification.NotificationSource; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import com.yahoo.vespa.hosted.controller.security.AccessControl; import com.yahoo.vespa.hosted.controller.security.Credentials; @@ -85,6 +87,7 @@ import java.util.TreeMap; import java.util.function.Consumer; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.State.active; import static com.yahoo.vespa.hosted.controller.api.integration.configserver.Node.State.reserved; @@ -183,8 +186,7 @@ public class ApplicationController { /** Returns the reindexing status for the given application in the given zone. */ public ApplicationReindexing applicationReindexing(ApplicationId id, ZoneId zoneId) { - return configServer.getReindexing(new DeploymentId(id, zoneId)) - .orElseThrow(() -> new NotExistsException("Reindexing status not found for " + id + " in " + zoneId)); + return configServer.getReindexing(new DeploymentId(id, zoneId)); } /** Enables reindexing for the given application in the given zone. */ @@ -392,6 +394,22 @@ public class ApplicationController { // Record the quota usage for this application var quotaUsage = deploymentQuotaUsage(zone, job.application()); + // For direct deployments use the full application ID, but otherwise use just the tenant and application as + // the source since it's the same application, so it should have the same warnings + NotificationSource source = zone.environment().isManuallyDeployed() ? + NotificationSource.from(job.application()) : NotificationSource.from(applicationId); + List<String> warnings = Optional.ofNullable(result.prepareResponse().log) + .map(logs -> logs.stream() + .filter(log -> log.applicationPackage) + .filter(log -> LogLevel.parse(log.level).intValue() >= Level.WARNING.intValue()) + .map(log -> log.message) + .sorted() + .distinct() + .collect(Collectors.toList())) + .orElseGet(List::of); + if (warnings.isEmpty()) controller.notificationsDb().removeNotification(source, Notification.Type.APPLICATION_PACKAGE_WARNING); + else controller.notificationsDb().setNotification(source, Notification.Type.APPLICATION_PACKAGE_WARNING, warnings); + lockApplicationOrThrow(applicationId, application -> store(application.with(job.application().instance(), instance -> instance.withNewDeployment(zone, revision, platform, @@ -562,6 +580,7 @@ public class ApplicationController { curator.removeApplication(id); controller.jobController().collectGarbage(); + controller.notificationsDb().removeNotifications(NotificationSource.from(id)); log.info("Deleted " + id); }); } @@ -589,6 +608,7 @@ public class ApplicationController { controller.routing().removeEndpointsInDns(application.get(), instanceId.instance()); curator.writeApplication(application.without(instanceId.instance()).get()); controller.jobController().collectGarbage(); + controller.notificationsDb().removeNotifications(NotificationSource.from(instanceId)); log.info("Deleted " + instanceId); }); } @@ -678,8 +698,6 @@ public class ApplicationController { DeploymentId id = new DeploymentId(application.get().id().instance(instanceName), zone); try { configServer.deactivate(id); - } catch (NotFoundException ignored) { - // ok; already gone } finally { controller.routing().policies().refresh(application.get().id().instance(instanceName), application.get().deploymentSpec(), zone); if (zone.environment().isManuallyDeployed()) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java index 5b2c2d74d20..2de8fa6457a 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/Controller.java @@ -23,6 +23,7 @@ import com.yahoo.vespa.hosted.controller.config.ControllerConfig; import com.yahoo.vespa.hosted.controller.deployment.JobController; import com.yahoo.vespa.hosted.controller.dns.NameServiceForwarder; import com.yahoo.vespa.hosted.controller.metric.ConfigServerMetrics; +import com.yahoo.vespa.hosted.controller.notification.NotificationsDb; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import com.yahoo.vespa.hosted.controller.persistence.JobControlFlags; import com.yahoo.vespa.hosted.controller.security.AccessControl; @@ -82,6 +83,7 @@ public class Controller extends AbstractComponent { private final ControllerConfig controllerConfig; private final SecretStore secretStore; private final CuratorArchiveBucketDb archiveBucketDb; + private final NotificationsDb notificationsDb; /** * Creates a controller @@ -118,6 +120,7 @@ public class Controller extends AbstractComponent { auditLogger = new AuditLogger(curator, clock); jobControl = new JobControl(new JobControlFlags(curator, flagSource)); archiveBucketDb = new CuratorArchiveBucketDb(this); + notificationsDb = new NotificationsDb(this); this.controllerConfig = controllerConfig; this.secretStore = secretStore; @@ -306,4 +309,8 @@ public class Controller extends AbstractComponent { public CuratorArchiveBucketDb archiveBucketDb() { return archiveBucketDb; } + + public NotificationsDb notificationsDb() { + return notificationsDb; + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java index f3e192aef90..4b102ef3077 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/TenantController.java @@ -10,6 +10,7 @@ import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.hosted.controller.api.identifiers.TenantId; import com.yahoo.vespa.hosted.controller.application.SystemApplication; import com.yahoo.vespa.hosted.controller.concurrent.Once; +import com.yahoo.vespa.hosted.controller.notification.NotificationSource; import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import com.yahoo.vespa.hosted.controller.security.AccessControl; import com.yahoo.vespa.hosted.controller.security.Credentials; @@ -171,6 +172,7 @@ public class TenantController { curator.removeTenant(tenant); accessControl.deleteTenant(tenant, credentials); + controller.notificationsDb().removeNotifications(NotificationSource.from(tenant)); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 548f6a9aaf2..0458a64c5a9 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -25,9 +25,9 @@ import com.yahoo.security.X509CertificateUtils; import com.yahoo.vespa.hosted.controller.Application; import com.yahoo.vespa.hosted.controller.Controller; import com.yahoo.vespa.hosted.controller.Instance; -import com.yahoo.vespa.hosted.controller.application.ActivateResult; import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; import com.yahoo.vespa.hosted.controller.api.integration.LogEntry; +import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateException; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; import com.yahoo.vespa.hosted.controller.api.integration.configserver.PrepareResponse; @@ -39,13 +39,15 @@ import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterCloud; import com.yahoo.vespa.hosted.controller.api.integration.deployment.TesterId; import com.yahoo.vespa.hosted.controller.api.integration.organization.DeploymentFailureMails; import com.yahoo.vespa.hosted.controller.api.integration.organization.Mail; +import com.yahoo.vespa.hosted.controller.application.ActivateResult; import com.yahoo.vespa.hosted.controller.application.ApplicationPackage; import com.yahoo.vespa.hosted.controller.application.Deployment; import com.yahoo.vespa.hosted.controller.application.Endpoint; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; -import com.yahoo.vespa.hosted.controller.api.integration.certificates.EndpointCertificateException; import com.yahoo.vespa.hosted.controller.config.ControllerConfig; import com.yahoo.vespa.hosted.controller.maintenance.JobRunner; +import com.yahoo.vespa.hosted.controller.notification.Notification; +import com.yahoo.vespa.hosted.controller.notification.NotificationSource; import com.yahoo.vespa.hosted.controller.routing.RoutingPolicyId; import com.yahoo.yolean.Exceptions; @@ -67,6 +69,7 @@ import java.util.Locale; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Consumer; import java.util.function.Supplier; import java.util.logging.Level; import java.util.logging.Logger; @@ -225,31 +228,31 @@ public class InternalStepRunner implements StepRunner { // Retry certain failures for up to one hour. Optional<RunStatus> result = startTime.isBefore(controller.clock().instant().minus(Duration.ofHours(1))) ? Optional.of(deploymentFailed) : Optional.empty(); - switch (e.getErrorCode()) { + switch (e.code()) { case CERTIFICATE_NOT_READY: logger.log("Waiting for certificate to become ready on config server: New application, or old one has expired"); if (startTime.plus(timeouts.endpointCertificate()).isBefore(controller.clock().instant())) { - logger.log("Certificate did not become available on config server within (" + timeouts.endpointCertificate() + ")"); + logger.log(WARNING, "Certificate did not become available on config server within (" + timeouts.endpointCertificate() + ")"); return Optional.of(RunStatus.endpointCertificateTimeout); } return result; case ACTIVATION_CONFLICT: case APPLICATION_LOCK_FAILURE: - logger.log("Deployment failed with possibly transient error " + e.getErrorCode() + + logger.log("Deployment failed with possibly transient error " + e.code() + ", will retry: " + e.getMessage()); return result; case LOAD_BALANCER_NOT_READY: case PARENT_HOST_NOT_READY: - logger.log(e.getServerMessage()); + logger.log(e.message()); return result; case OUT_OF_CAPACITY: - logger.log(e.getServerMessage()); + logger.log(e.message()); return controller.system().isCd() && startTime.plus(timeouts.capacity()).isAfter(controller.clock().instant()) ? Optional.empty() : Optional.of(outOfCapacity); case INVALID_APPLICATION_PACKAGE: case BAD_REQUEST: - logger.log(e.getMessage()); + logger.log(WARNING, e.getMessage()); return Optional.of(deploymentFailed); } @@ -261,7 +264,7 @@ public class InternalStepRunner implements StepRunner { // Same as CERTIFICATE_NOT_READY above, only from the controller logger.log("Waiting for certificate to become valid: New application, or old one has expired"); if (startTime.plus(timeouts.endpointCertificate()).isBefore(controller.clock().instant())) { - logger.log("Controller could not validate certificate within " + + logger.log(WARNING, "Controller could not validate certificate within " + timeouts.endpointCertificate() + ": " + Exceptions.toMessageString(e)); return Optional.of(RunStatus.endpointCertificateTimeout); } @@ -596,7 +599,7 @@ public class InternalStepRunner implements StepRunner { testerCertificate.get().checkValidity(Date.from(controller.clock().instant())); } catch (CertificateExpiredException | CertificateNotYetValidException e) { - logger.log(INFO, "Tester certificate expired before tests could complete."); + logger.log(WARNING, "Tester certificate expired before tests could complete."); return Optional.of(aborted); } } @@ -671,7 +674,8 @@ public class InternalStepRunner implements StepRunner { try { controller.jobController().active(id).ifPresent(run -> { if (run.hasFailed()) - sendNotification(run, logger); + sendEmailNotification(run, logger); + updateConsoleNotification(run); }); } catch (IllegalStateException e) { @@ -682,7 +686,7 @@ public class InternalStepRunner implements StepRunner { } /** Sends a mail with a notification of a failed run, if one should be sent. */ - private void sendNotification(Run run, DualLogger logger) { + private void sendEmailNotification(Run run, DualLogger logger) { Application application = controller.applications().requireApplication(TenantAndApplicationId.from(run.id().application())); Notifications notifications = application.deploymentSpec().requireInstance(run.id().application().instance()).notifications(); boolean newCommit = application.require(run.id().application().instance()).change().application() @@ -702,8 +706,39 @@ public class InternalStepRunner implements StepRunner { mailOf(run, recipients).ifPresent(controller.serviceRegistry().mailer()::send); } catch (RuntimeException e) { - logger.log(INFO, "Exception trying to send mail for " + run.id(), e); + logger.log(WARNING, "Exception trying to send mail for " + run.id(), e); + } + } + + private void updateConsoleNotification(Run run) { + NotificationSource source = NotificationSource.from(run.id()); + Consumer<String> updater = msg -> controller.notificationsDb().setNotification(source, Notification.Type.DEPLOYMENT_FAILURE, msg); + switch (run.status()) { + case aborted: return; // wait and see how the next run goes. + case running: + case success: + controller.notificationsDb().removeNotification(source, Notification.Type.DEPLOYMENT_FAILURE); + return; + case outOfCapacity: + if ( ! run.id().type().environment().isTest()) updater.accept("lack of capacity. Please contact the Vespa team to request more!"); + return; + case deploymentFailed: + updater.accept("invalid application configuration, or timeout of other deployments of the same application"); + return; + case installationFailed: + updater.accept("nodes were not able to start the new Java containers"); + return; + case testFailure: + updater.accept("one or more verification tests against the deployment failed"); + return; + case error: + case endpointCertificateTimeout: + break; + default: + logger.log(WARNING, "Don't know what to set console notification to for run status '" + run.status() + "'"); } + updater.accept("something in the framework went wrong. Such errors are " + + "usually transient. Please contact the Vespa team if the problem persists!"); } private Optional<Mail> mailOf(Run run, List<String> recipients) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java index 65d3f666309..3dc88d5d6d2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/JobController.java @@ -532,8 +532,6 @@ public class JobController { var zone = type.zone(controller.system()); try { controller.serviceRegistry().configServer().deactivate(new DeploymentId(id.id(), zone)); - } catch (NotFoundException ignored) { - // Already gone -- great! } finally { // Passing an empty DeploymentSpec here is fine as it's used for registering global endpoint names, and // tester instances have none. diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessor.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessor.java index a84d1c3ad7e..95432e3acbc 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessor.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessor.java @@ -104,7 +104,7 @@ public class ChangeManagementAssessor { private List<String> toParentHosts(List<String> impactedHostnames, List<NodeRepositoryNode> allNodes) { return impactedHostnames.stream() - .map(hostname -> + .flatMap(hostname -> allNodes.stream() .filter(node -> List.of(NodeType.config, NodeType.proxy, NodeType.host).contains(node.getType())) .filter(node -> hostname.equals(node.getHostname()) || hostname.equals(node.getParentHostname())) @@ -112,7 +112,7 @@ public class ChangeManagementAssessor { if (node.getType() == NodeType.host) return node.getHostname(); return node.getParentHostname(); - }).findFirst().orElseThrow() + }).findFirst().stream() ) .collect(Collectors.toList()); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java index ca9ebe132fd..0ebf4cbc2d2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainer.java @@ -1,13 +1,25 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; +import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest; import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequestClient; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequestSource; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest; +import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; import java.time.Duration; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; import java.util.function.Predicate; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -20,27 +32,29 @@ public class ChangeRequestMaintainer extends ControllerMaintainer { private final Logger logger = Logger.getLogger(ChangeRequestMaintainer.class.getName()); private final ChangeRequestClient changeRequestClient; private final SystemName system; + private final CuratorDb curator; + private final NodeRepository nodeRepository; public ChangeRequestMaintainer(Controller controller, Duration interval) { super(controller, interval, null, SystemName.allOf(Predicate.not(SystemName::isPublic))); this.changeRequestClient = controller.serviceRegistry().changeRequestClient(); this.system = controller.system(); + this.curator = controller.curator(); + this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); } @Override protected boolean maintain() { - var changeRequests = changeRequestClient.getUpcomingChangeRequests(); + var currentChangeRequests = pruneOldChangeRequests(); + var changeRequests = changeRequestClient.getChangeRequests(currentChangeRequests); - if (!changeRequests.isEmpty()) { - logger.info(() -> "Found the following upcoming change requests:"); - changeRequests.forEach(changeRequest -> logger.info(changeRequest::toString)); - } - - if (system.equals(SystemName.main)) + logger.fine(() -> "Found requests: " + changeRequests); + storeChangeRequests(changeRequests); + if (system.equals(SystemName.main)) { approveChanges(changeRequests); + } - // TODO: Store in curator? return true; } @@ -50,6 +64,75 @@ public class ChangeRequestMaintainer extends ControllerMaintainer { .filter(changeRequest -> changeRequest.getApproval() == ChangeRequest.Approval.REQUESTED) .collect(Collectors.toList()); + logger.fine(() -> "Approving " + unapprovedRequests); changeRequestClient.approveChangeRequests(unapprovedRequests); } + + private void storeChangeRequests(List<ChangeRequest> changeRequests) { + var existingChangeRequests = curator.readChangeRequests() + .stream() + .collect(Collectors.toMap(ChangeRequest::getId, Function.identity())); + + var hostsByZone = hostsByZone(); + // Create or update requests in curator + try (var lock = curator.lockChangeRequests()) { + changeRequests.forEach(changeRequest -> { + var optionalZone = inferZone(changeRequest, hostsByZone); + optionalZone.ifPresent(zone -> { + var vcmr = existingChangeRequests + .getOrDefault(changeRequest.getId(), new VespaChangeRequest(changeRequest, zone)) + .withSource(changeRequest.getChangeRequestSource()) + .withApproval(changeRequest.getApproval()); + logger.fine(() -> "Storing " + vcmr); + curator.writeChangeRequest(vcmr); + }); + }); + } + } + + // Deletes closed change requests older than 7 days, returns the current list of requests + private List<ChangeRequest> pruneOldChangeRequests() { + List<ChangeRequest> currentChangeRequests = new ArrayList<>(); + + try (var lock = curator.lockChangeRequests()) { + for (var changeRequest : curator.readChangeRequests()) { + if (shouldDeleteChangeRequest(changeRequest.getChangeRequestSource())) { + curator.deleteChangeRequest(changeRequest); + } else { + currentChangeRequests.add(changeRequest); + } + } + } + return currentChangeRequests; + } + + private Map<ZoneId, List<String>> hostsByZone() { + return controller().zoneRegistry() + .zones() + .reachable() + .in(Environment.prod) + .ids() + .stream() + .collect(Collectors.toMap( + zone -> zone, + zone -> nodeRepository.list(zone, false) + .stream() + .map(node -> node.hostname().value()) + .collect(Collectors.toList()) + )); + } + + private Optional<ZoneId> inferZone(ChangeRequest changeRequest, Map<ZoneId, List<String>> hostsByZone) { + return hostsByZone.entrySet().stream() + .filter(entry -> !Collections.disjoint(entry.getValue(), changeRequest.getImpactedHosts())) + .map(Map.Entry::getKey) + .findFirst(); + } + + private boolean shouldDeleteChangeRequest(ChangeRequestSource source) { + return source.isClosed() && + source.getPlannedStartTime() + .plus(Duration.ofDays(7)) + .isBefore(ZonedDateTime.now()); + } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java index 5ab2ca4a5d6..d923db936cb 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporter.java @@ -54,7 +54,7 @@ public class CloudEventReporter extends ControllerMaintainer { /** Deprovision any host affected by given event */ private void deprovisionAffectedHosts(String region, CloudEvent event) { for (var zone : zonesByCloudNativeRegion.get(region)) { - for (var node : nodeRepository.list(zone.getId())) { + for (var node : nodeRepository.list(zone.getId(), false)) { if (!affects(node, event)) continue; log.info("Retiring and deprovisioning " + node.hostname().value() + " in " + zone.getId() + ": Affected by maintenance event " + event.instanceEventId); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java index 19199c5a281..015da1faae8 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ControllerMaintenance.java @@ -61,7 +61,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new SystemRoutingPolicyMaintainer(controller, intervals.systemRoutingPolicyMaintainer)); maintainers.add(new ApplicationMetaDataGarbageCollector(controller, intervals.applicationMetaDataGarbageCollector)); maintainers.add(new ContainerImageExpirer(controller, intervals.containerImageExpirer)); - maintainers.add(new HostSwitchUpdater(controller, intervals.hostSwitchUpdater)); + maintainers.add(new HostInfoUpdater(controller, intervals.hostSwitchUpdater)); maintainers.add(new ReindexingTriggerer(controller, intervals.reindexingTriggerer)); maintainers.add(new EndpointCertificateMaintainer(controller, intervals.endpointCertificateMaintainer)); maintainers.add(new TrafficShareUpdater(controller, intervals.trafficFractionUpdater)); @@ -69,6 +69,7 @@ public class ControllerMaintenance extends AbstractComponent { maintainers.add(new ArchiveAccessMaintainer(controller, metric, intervals.archiveAccessMaintainer)); maintainers.add(new TenantRoleMaintainer(controller, intervals.tenantRoleMaintainer)); maintainers.add(new ChangeRequestMaintainer(controller, intervals.changeRequestMaintainer)); + maintainers.add(new VCMRMaintainer(controller, intervals.vcmrMaintainer)); } public Upgrader upgrader() { return upgrader; } @@ -123,6 +124,7 @@ public class ControllerMaintenance extends AbstractComponent { private final Duration archiveAccessMaintainer; private final Duration tenantRoleMaintainer; private final Duration changeRequestMaintainer; + private final Duration vcmrMaintainer; public Intervals(SystemName system) { this.system = Objects.requireNonNull(system); @@ -153,7 +155,8 @@ public class ControllerMaintenance extends AbstractComponent { this.archiveUriUpdater = duration(5, MINUTES); this.archiveAccessMaintainer = duration(10, MINUTES); this.tenantRoleMaintainer = duration(5, MINUTES); - this.changeRequestMaintainer = duration(12, HOURS); + this.changeRequestMaintainer = duration(1, HOURS); + this.vcmrMaintainer = duration(1, HOURS); } private Duration duration(long amount, TemporalUnit unit) { diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostSwitchUpdater.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java index 8e7a364b5f3..83ccda422e6 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostSwitchUpdater.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdater.java @@ -12,6 +12,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeRepo import java.time.Duration; import java.util.EnumSet; import java.util.Map; +import java.util.Optional; import java.util.function.Function; import java.util.logging.Logger; import java.util.regex.Matcher; @@ -19,18 +20,19 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; /** - * Ensures that the switch information for all hosts is up to date. + * Ensures that the host information for all hosts is up to date. * * @author mpolden + * @author bjormel */ -public class HostSwitchUpdater extends ControllerMaintainer { +public class HostInfoUpdater extends ControllerMaintainer { - private static final Logger LOG = Logger.getLogger(HostSwitchUpdater.class.getName()); + private static final Logger LOG = Logger.getLogger(HostInfoUpdater.class.getName()); private static final Pattern HOST_PATTERN = Pattern.compile("^(proxy|cfg|controller)host(.+)$"); private final NodeRepository nodeRepository; - public HostSwitchUpdater(Controller controller, Duration interval) { + public HostInfoUpdater(Controller controller, Duration interval) { super(controller, interval, null, EnumSet.of(SystemName.cd, SystemName.main)); this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); } @@ -40,28 +42,35 @@ public class HostSwitchUpdater extends ControllerMaintainer { Map<String, NodeEntity> nodeEntities = controller().serviceRegistry().entityService().listNodes().stream() .collect(Collectors.toMap(NodeEntity::hostname, Function.identity())); - int nodesUpdated = 0; + int hostsUpdated = 0; try { for (var zone : controller().zoneRegistry().zones().controllerUpgraded().all().ids()) { - for (var node : nodeRepository.list(zone)) { + for (var node : nodeRepository.list(zone, false)) { if (!node.type().isHost()) continue; NodeEntity nodeEntity = nodeEntities.get(registeredHostnameOf(node)); - if (!shouldUpdate(node, nodeEntity)) continue; + if (!shouldUpdateSwitch(node, nodeEntity) && !shouldUpdateModel(node, nodeEntity)) continue; NodeRepositoryNode updatedNode = new NodeRepositoryNode(); - updatedNode.setSwitchHostname(nodeEntity.switchHostname().get()); + nodeEntity.switchHostname().ifPresent(updatedNode::setSwitchHostname); + buildModelName(nodeEntity).ifPresent(updatedNode::setModelName); nodeRepository.patchNode(zone, node.hostname().value(), updatedNode); - nodesUpdated++; + hostsUpdated++; } } } finally { - if (nodesUpdated > 0) { - LOG.info("Updated switch hostname for " + nodesUpdated + " node(s)"); + if (hostsUpdated > 0) { + LOG.info("Updated information for " + hostsUpdated + " hosts(s)"); } } return true; } + private static Optional<String> buildModelName(NodeEntity nodeEntity) { + if(nodeEntity.manufacturer().isEmpty() || nodeEntity.model().isEmpty()) + return Optional.empty(); + return Optional.of(nodeEntity.manufacturer().get() + " " + nodeEntity.model().get()); + } + /** Returns the hostname that given host is registered under in the {@link EntityService} */ private static String registeredHostnameOf(Node host) { String hostname = host.hostname().value(); @@ -71,10 +80,17 @@ public class HostSwitchUpdater extends ControllerMaintainer { return matcher.replaceFirst("$1$2"); } - private static boolean shouldUpdate(Node node, NodeEntity nodeEntity) { + private static boolean shouldUpdateSwitch(Node node, NodeEntity nodeEntity) { if (nodeEntity == null) return false; - if (nodeEntity.switchHostname().isEmpty()) return false; + if (nodeEntity.switchHostname().isEmpty()) return false; return !node.switchHostname().equals(nodeEntity.switchHostname()); } + private static boolean shouldUpdateModel(Node node, NodeEntity nodeEntity) { + if (nodeEntity == null) return false; + if (nodeEntity.model().isEmpty()) return false; + if (nodeEntity.manufacturer().isEmpty()) return false; + return !node.modelName().equals(buildModelName(nodeEntity)); + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java index fd375c80218..b40f2232504 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceMeterMaintainer.java @@ -94,7 +94,7 @@ public class ResourceMeterMaintainer extends ControllerMaintainer { .ofCloud(CloudName.from("aws")) .reachable().zones().stream() .map(ZoneApi::getId) - .map(zoneId -> createResourceSnapshotsFromNodes(zoneId, nodeRepository.list(zoneId))) + .map(zoneId -> createResourceSnapshotsFromNodes(zoneId, nodeRepository.list(zoneId, false))) .flatMap(Collection::stream) .collect(Collectors.toList()); } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java index 634e5ba10ce..c7bf7e765ed 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/ResourceTagMaintainer.java @@ -43,7 +43,7 @@ public class ResourceTagMaintainer extends ControllerMaintainer { private Map<HostName, Optional<ApplicationId>> getTenantOfParentHosts(ZoneId zoneId) { return controller().serviceRegistry().configServer().nodeRepository() - .list(zoneId) + .list(zoneId, false) .stream() .filter(node -> node.type().isHost()) .collect(Collectors.toMap( diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java new file mode 100644 index 00000000000..a8de70a56a2 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainer.java @@ -0,0 +1,256 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.SystemName; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.NodeRepository; +import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeRepositoryNode; +import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeState; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest.Impact; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction.State; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest.Status; +import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; +import com.yahoo.yolean.Exceptions; + +import java.time.Duration; +import java.time.Instant; +import java.time.ZonedDateTime; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Predicate; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * @author olaa + * + * Maintains status and execution of VCMRs + * For now only retires all affected tenant hosts if zone capacity allows it + */ +public class VCMRMaintainer extends ControllerMaintainer { + + private final Logger logger = Logger.getLogger(VCMRMaintainer.class.getName()); + private final Duration ALLOWED_RETIREMENT_TIME = Duration.ofHours(60); + private final Duration ALLOWED_POSTPONEMENT_TIME = Duration.ofDays(7); + private final CuratorDb curator; + private final NodeRepository nodeRepository; + + public VCMRMaintainer(Controller controller, Duration interval) { + super(controller, interval, null, SystemName.allOf(Predicate.not(SystemName::isPublic))); + this.curator = controller.curator(); + this.nodeRepository = controller.serviceRegistry().configServer().nodeRepository(); + } + + @Override + protected boolean maintain() { + var changeRequests = curator.readChangeRequests() + .stream() + .filter(shouldUpdate()) + .collect(Collectors.toList()); + + var nodesByZone = nodesByZone(); + + changeRequests.forEach(changeRequest -> { + var nodes = impactedNodes(nodesByZone, changeRequest); + var nextActions = getNextActions(nodes, changeRequest); + var status = getStatus(nextActions, changeRequest); + + try (var lock = curator.lockChangeRequests()) { + // Read the vcmr again, in case the source status has been updated + curator.readChangeRequest(changeRequest.getId()) + .ifPresent(vcmr -> curator.writeChangeRequest(vcmr.withActionPlan(nextActions) + .withStatus(status))); + } + }); + + return true; + } + + /** + * Status is based on: + * 1. Whether the source has reportedly closed the request + * 2. Whether any host requires operator action + * 3. Whether any host has started/finished retiring + */ + private Status getStatus(List<HostAction> nextActions, VespaChangeRequest changeRequest) { + if (changeRequest.getChangeRequestSource().isClosed()) { + return Status.COMPLETED; + } + + var byActionState = nextActions.stream().collect(Collectors.groupingBy(HostAction::getState, Collectors.counting())); + + if (byActionState.getOrDefault(State.REQUIRES_OPERATOR_ACTION, 0L) > 0) { + return Status.REQUIRES_OPERATOR_ACTION; + } + + if (byActionState.getOrDefault(State.RETIRING, 0L) + byActionState.getOrDefault(State.RETIRED, 0L) > 0) { + return Status.IN_PROGRESS; + } + + if (byActionState.getOrDefault(State.PENDING_RETIREMENT, 0L) > 0) { + return Status.PENDING_ACTION; + } + + return Status.NOOP; + } + + private List<HostAction> getNextActions(List<Node> nodes, VespaChangeRequest changeRequest) { + var spareCapacity = hasSpareCapacity(changeRequest.getZoneId(), nodes); + return nodes.stream() + .map(node -> nextAction(node, changeRequest, spareCapacity)) + .collect(Collectors.toList()); + } + + // Get the superset of impacted hosts by looking at impacted switches + private List<Node> impactedNodes(Map<ZoneId, List<Node>> nodesByZone, VespaChangeRequest changeRequest) { + return nodesByZone.get(changeRequest.getZoneId()) + .stream() + .filter(isImpacted(changeRequest)) + .collect(Collectors.toList()); + } + + private Optional<HostAction> getPreviousAction(Node node, VespaChangeRequest changeRequest) { + return changeRequest.getHostActionPlan() + .stream() + .filter(hostAction -> hostAction.getHostname().equals(node.hostname().value())) + .findFirst(); + } + + private HostAction nextAction(Node node, VespaChangeRequest changeRequest, boolean spareCapacity) { + var hostAction = getPreviousAction(node, changeRequest) + .orElse(new HostAction(node.hostname().value(), State.NONE, Instant.now())); + + if (changeRequest.getChangeRequestSource().isClosed()) { + logger.fine(() -> changeRequest.getChangeRequestSource().getId() + " is closed, recycling " + node.hostname()); + recycleNode(changeRequest.getZoneId(), node, hostAction); + return hostAction.withState(State.COMPLETE); + } + + if (isPostponed(changeRequest, hostAction)) { + logger.fine(() -> changeRequest.getChangeRequestSource().getId() + " is postponed, recycling " + node.hostname()); + recycleNode(changeRequest.getZoneId(), node, hostAction); + return hostAction.withState(State.PENDING_RETIREMENT); + } + + if (node.type() != NodeType.host || !spareCapacity) { + return hostAction.withState(State.REQUIRES_OPERATOR_ACTION); + } + + if (shouldRetire(changeRequest, hostAction)) { + if (!node.wantToRetire()) { + logger.info(String.format("Retiring %s due to %s", node.hostname().value(), changeRequest.getChangeRequestSource().getId())); + // TODO: Remove try/catch once retirement is stabilized + try { + setWantToRetire(changeRequest.getZoneId(), node, true); + } catch (Exception e) { + logger.warning("Failed to retire host " + node.hostname() + ": " + Exceptions.toMessageString(e)); + // Check if retirement actually failed + if (!nodeRepository.getNode(changeRequest.getZoneId(), node.hostname().value()).getWantToRetire()) { + return hostAction; + } + } + } + return hostAction.withState(State.RETIRING); + } + + if (hasRetired(node, hostAction)) { + logger.fine(() -> node.hostname() + " has retired"); + return hostAction.withState(State.RETIRED); + } + + if (pendingRetirement(node, hostAction)) { + logger.fine(() -> node.hostname() + " is pending retirement"); + return hostAction.withState(State.PENDING_RETIREMENT); + } + + return hostAction; + } + + // Dirty host iff the parked host was retired by this maintainer + private void recycleNode(ZoneId zoneId, Node node, HostAction hostAction) { + if (hostAction.getState() == State.RETIRED && + node.state() == Node.State.parked) { + logger.info("Setting " + node.hostname() + " to dirty"); + nodeRepository.setState(zoneId, NodeState.dirty, node.hostname().value()); + } + if (hostAction.getState() == State.RETIRING && node.wantToRetire()) { + try { + setWantToRetire(zoneId, node, false); + } catch (Exception ignored) {} + } + } + + private boolean isPostponed(VespaChangeRequest changeRequest, HostAction action) { + return List.of(State.RETIRED, State.RETIRING).contains(action.getState()) && + changeRequest.getChangeRequestSource().getPlannedStartTime() + .minus(ALLOWED_POSTPONEMENT_TIME) + .isAfter(ZonedDateTime.now()); + } + + private boolean shouldRetire(VespaChangeRequest changeRequest, HostAction action) { + return action.getState() == State.PENDING_RETIREMENT && + changeRequest.getChangeRequestSource().getPlannedStartTime() + .minus(ALLOWED_RETIREMENT_TIME) + .isBefore(ZonedDateTime.now()); + } + + private boolean hasRetired(Node node, HostAction hostAction) { + return hostAction.getState() == State.RETIRING && + node.state() == Node.State.parked; + } + + /** + * TODO: For now, we choose to retire any active host + */ + private boolean pendingRetirement(Node node, HostAction action) { + return action.getState() == State.NONE && node.state() == Node.State.active; + } + + private Map<ZoneId, List<Node>> nodesByZone() { + return controller().zoneRegistry() + .zones() + .reachable() + .in(Environment.prod) + .ids() + .stream() + .collect(Collectors.toMap( + zone -> zone, + zone -> nodeRepository.list(zone, false) + )); + } + + private Predicate<Node> isImpacted(VespaChangeRequest changeRequest) { + return node -> changeRequest.getImpactedHosts().contains(node.hostname().value()) || + node.switchHostname() + .map(switchHostname -> changeRequest.getImpactedSwitches().contains(switchHostname)) + .orElse(false); + } + private Predicate<VespaChangeRequest> shouldUpdate() { + return changeRequest -> changeRequest.getStatus() != Status.COMPLETED && + List.of(Impact.HIGH, Impact.VERY_HIGH) + .contains(changeRequest.getImpact()); + } + + private boolean hasSpareCapacity(ZoneId zoneId, List<Node> nodes) { + var tenantHosts = nodes.stream() + .filter(node -> node.type() == NodeType.host) + .map(Node::hostname) + .collect(Collectors.toList()); + + return tenantHosts.isEmpty() || + nodeRepository.isReplaceable(zoneId, tenantHosts); + } + + private void setWantToRetire(ZoneId zoneId, Node node, boolean wantToRetire) { + var newNode = new NodeRepositoryNode(); + newNode.setWantToRetire(wantToRetire); + nodeRepository.patchNode(zoneId, node.hostname().value(), newNode); + } +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/metric/CostCalculator.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/metric/CostCalculator.java index 5d92166d758..b6468464a0b 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/metric/CostCalculator.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/metric/CostCalculator.java @@ -48,7 +48,7 @@ public class CostCalculator { Map<Property, ResourceAllocation> allocationByProperty = new HashMap<>(); var nodes = controller.zoneRegistry().zones() .reachable().in(Environment.prod).ofCloud(cloudName).zones().stream() - .flatMap(zone -> uncheck(() -> nodeRepository.list(zone.getId()).stream())) + .flatMap(zone -> uncheck(() -> nodeRepository.list(zone.getId(), false).stream())) .filter(node -> node.owner().isPresent() && !node.owner().get().tenant().equals(SystemApplication.TENANT)) .collect(Collectors.toList()); var totalAllocation = ResourceAllocation.ZERO; diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notification.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notification.java new file mode 100644 index 00000000000..299ef3ef50d --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/Notification.java @@ -0,0 +1,74 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.notification; + +import java.time.Instant; +import java.util.List; +import java.util.Objects; + +/** + * @author freva + */ +public class Notification { + private final Instant at; + private final Type type; + private final NotificationSource source; + private final List<String> messages; + + public Notification(Instant at, Type type, NotificationSource source, List<String> messages) { + this.at = Objects.requireNonNull(at, "at cannot be null"); + this.type = Objects.requireNonNull(type, "type cannot be null"); + this.source = Objects.requireNonNull(source, "source cannot be null"); + this.messages = List.copyOf(Objects.requireNonNull(messages, "messages cannot be null")); + if (messages.size() < 1) throw new IllegalArgumentException("messages cannot be empty"); + } + + public Instant at() { return at; } + public Type type() { return type; } + public NotificationSource source() { return source; } + public List<String> messages() { return messages; } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Notification that = (Notification) o; + return at.equals(that.at) && type == that.type && source.equals(that.source) && messages.equals(that.messages); + } + + @Override + public int hashCode() { + return Objects.hash(at, type, source, messages); + } + + @Override + public String toString() { + return "Notification{" + + "at=" + at + + ", type=" + type + + ", source=" + source + + ", messages=" + messages + + '}'; + } + + public enum Level { + warning, error; + } + + public enum Type { + /** Warnings about usage of deprecated features in application package */ + APPLICATION_PACKAGE_WARNING(Level.warning), + + /** Failure to deploy application package */ + DEPLOYMENT_FAILURE(Level.error); + + private final Level level; + Type(Level level) { + this.level = level; + } + + public Level level() { + return level; + } + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationSource.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationSource.java new file mode 100644 index 00000000000..827b5a71eb1 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationSource.java @@ -0,0 +1,152 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.notification; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.Environment; +import com.yahoo.config.provision.InstanceName; +import com.yahoo.config.provision.TenantName; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; +import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; + +import java.util.Objects; +import java.util.Optional; +import java.util.OptionalLong; + +/** + * Denotes the source of the notification. + * + * @author freva + */ +public class NotificationSource { + private final TenantName tenant; + private final Optional<ApplicationName> application; + private final Optional<InstanceName> instance; + private final Optional<ZoneId> zoneId; + private final Optional<ClusterSpec.Id> clusterId; + private final Optional<JobType> jobType; + private final OptionalLong runNumber; + + public NotificationSource(TenantName tenant, Optional<ApplicationName> application, Optional<InstanceName> instance, + Optional<ZoneId> zoneId, Optional<ClusterSpec.Id> clusterId, Optional<JobType> jobType, OptionalLong runNumber) { + this.tenant = Objects.requireNonNull(tenant, "tenant cannot be null"); + this.application = Objects.requireNonNull(application, "application cannot be null"); + this.instance = Objects.requireNonNull(instance, "instance cannot be null"); + this.zoneId = Objects.requireNonNull(zoneId, "zoneId cannot be null"); + this.clusterId = Objects.requireNonNull(clusterId, "clusterId cannot be null"); + this.jobType = Objects.requireNonNull(jobType, "jobType cannot be null"); + this.runNumber = Objects.requireNonNull(runNumber, "runNumber cannot be null"); + + if (instance.isPresent() && application.isEmpty()) + throw new IllegalArgumentException("Application name must be present with instance name"); + if (zoneId.isPresent() && instance.isEmpty()) + throw new IllegalArgumentException("Instance name must be present with zone ID"); + if (clusterId.isPresent() && zoneId.isEmpty()) + throw new IllegalArgumentException("Zone ID must be present with cluster ID"); + if (clusterId.isPresent() && jobType.isPresent()) + throw new IllegalArgumentException("Cannot set both cluster ID and job type"); + if (jobType.isPresent() && instance.isEmpty()) + throw new IllegalArgumentException("Instance name must be present with job type"); + if (jobType.isPresent() != runNumber.isPresent()) + throw new IllegalArgumentException(String.format("Run number (%s) must be 1-to-1 with job type (%s)", + runNumber.isPresent() ? "present" : "missing", jobType.map(i -> "present").orElse("missing"))); + } + + + public TenantName tenant() { return tenant; } + public Optional<ApplicationName> application() { return application; } + public Optional<InstanceName> instance() { return instance; } + public Optional<ZoneId> zoneId() { return zoneId; } + public Optional<ClusterSpec.Id> clusterId() { return clusterId; } + public Optional<JobType> jobType() { return jobType; } + public OptionalLong runNumber() { return runNumber; } + + /** + * Returns true iff this source contains the given source. A source contains the other source if + * all the set fields in this source are equal to the given source, while the fields not set + * in this source are ignored. + */ + public boolean contains(NotificationSource other) { + return tenant.equals(other.tenant) && + (application.isEmpty() || application.equals(other.application)) && + (instance.isEmpty() || instance.equals(other.instance)) && + (zoneId.isEmpty() || zoneId.equals(other.zoneId)) && + (clusterId.isEmpty() || clusterId.equals(other.clusterId)) && + (jobType.isEmpty() || jobType.equals(other.jobType)); // Do not consider run number (it's unique!) + } + + /** + * Returns whether this source from a production deployment or deployment related to prod deployment (e.g. to + * staging zone), or if this is at tenant or application level + */ + public boolean isProduction() { + if (instance.isEmpty()) return true; + return ! zoneId.map(ZoneId::environment) + .or(() -> jobType.map(JobType::environment)) + .map(Environment::isManuallyDeployed) + .orElse(true); // Assume that notification with full application ID concern dev deployments + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NotificationSource that = (NotificationSource) o; + return tenant.equals(that.tenant) && application.equals(that.application) && instance.equals(that.instance) && + zoneId.equals(that.zoneId) && clusterId.equals(that.clusterId) && jobType.equals(that.jobType); // Do not consider run number (it's unique!) + } + + @Override + public int hashCode() { + return Objects.hash(tenant, application, instance, zoneId, clusterId, jobType, runNumber); + } + + @Override + public String toString() { + return "NotificationSource{" + + "tenant=" + tenant + + application.map(application -> ", application=" + application.value()).orElse("") + + instance.map(instance -> ", instance=" + instance.value()).orElse("") + + zoneId.map(zoneId -> ", zone=" + zoneId.value()).orElse("") + + clusterId.map(clusterId -> ", clusterId=" + clusterId.value()).orElse("") + + jobType.map(jobType -> ", job=" + jobType.jobName() + "#" + runNumber.getAsLong()).orElse("") + + '}'; + } + + private static NotificationSource from(TenantName tenant, ApplicationName application, InstanceName instance, ZoneId zoneId, + ClusterSpec.Id clusterId, JobType jobType, Long runNumber) { + return new NotificationSource(tenant, Optional.ofNullable(application), Optional.ofNullable(instance), Optional.ofNullable(zoneId), + Optional.ofNullable(clusterId), Optional.ofNullable(jobType), runNumber == null ? OptionalLong.empty() : OptionalLong.of(runNumber)); + } + + public static NotificationSource from(TenantName tenantName) { + return from(tenantName, null, null, null, null, null, null); + } + + public static NotificationSource from(TenantAndApplicationId id) { + return from(id.tenant(), id.application(), null, null, null, null, null); + } + + public static NotificationSource from(ApplicationId app) { + return from(app.tenant(), app.application(), app.instance(), null, null, null, null); + } + + public static NotificationSource from(DeploymentId deploymentId) { + ApplicationId app = deploymentId.applicationId(); + return from(app.tenant(), app.application(), app.instance(), deploymentId.zoneId(), null, null, null); + } + + public static NotificationSource from(DeploymentId deploymentId, ClusterSpec.Id clusterId) { + ApplicationId app = deploymentId.applicationId(); + return from(app.tenant(), app.application(), app.instance(), deploymentId.zoneId(), clusterId, null, null); + } + + public static NotificationSource from(RunId runId) { + ApplicationId app = runId.application(); + return from(app.tenant(), app.application(), app.instance(), null, null, runId.job().type(), runId.number()); + } +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java new file mode 100644 index 00000000000..950dddfc056 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDb.java @@ -0,0 +1,84 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.notification; + +import com.yahoo.vespa.curator.Lock; +import com.yahoo.vespa.hosted.controller.Controller; +import com.yahoo.vespa.hosted.controller.persistence.CuratorDb; + +import java.time.Clock; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Adds, updates and removes tenant notifications in ZK + * + * @author freva + */ +public class NotificationsDb { + + private final Clock clock; + private final CuratorDb curatorDb; + + public NotificationsDb(Controller controller) { + this(controller.clock(), controller.curator()); + } + + NotificationsDb(Clock clock, CuratorDb curatorDb) { + this.clock = clock; + this.curatorDb = curatorDb; + } + + public List<Notification> listNotifications(NotificationSource source, boolean productionOnly) { + return curatorDb.readNotifications(source.tenant()).stream() + .filter(notification -> source.contains(notification.source()) && (!productionOnly || notification.source().isProduction())) + .collect(Collectors.toUnmodifiableList()); + } + + public void setNotification(NotificationSource source, Notification.Type type, String message) { + setNotification(source, type, List.of(message)); + } + + /** + * Add a notification with given source and type. If a notification with same source and type + * already exists, it'll be replaced by this one instead + */ + public void setNotification(NotificationSource source, Notification.Type type, List<String> messages) { + try (Lock lock = curatorDb.lockNotifications(source.tenant())) { + List<Notification> notifications = curatorDb.readNotifications(source.tenant()).stream() + .filter(notification -> !source.equals(notification.source()) || type != notification.type()) + .collect(Collectors.toCollection(ArrayList::new)); + notifications.add(new Notification(clock.instant(), type, source, messages)); + curatorDb.writeNotifications(source.tenant(), notifications); + } + } + + /** Remove the notification with the given source and type */ + public void removeNotification(NotificationSource source, Notification.Type type) { + try (Lock lock = curatorDb.lockNotifications(source.tenant())) { + List<Notification> initial = curatorDb.readNotifications(source.tenant()); + List<Notification> filtered = initial.stream() + .filter(notification -> !source.equals(notification.source()) || type != notification.type()) + .collect(Collectors.toUnmodifiableList()); + if (initial.size() > filtered.size()) + curatorDb.writeNotifications(source.tenant(), filtered); + } + } + + /** Remove all notifications for this source or sources contained by this source */ + public void removeNotifications(NotificationSource source) { + try (Lock lock = curatorDb.lockNotifications(source.tenant())) { + if (source.application().isEmpty()) { // Source is tenant + curatorDb.deleteNotifications(source.tenant()); + return; + } + + List<Notification> initial = curatorDb.readNotifications(source.tenant()); + List<Notification> filtered = initial.stream() + .filter(notification -> !source.contains(notification.source())) + .collect(Collectors.toUnmodifiableList()); + if (initial.size() > filtered.size()) + curatorDb.writeNotifications(source.tenant(), filtered); + } + } +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ChangeRequestSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ChangeRequestSerializer.java new file mode 100644 index 00000000000..407eb5ad5ab --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/ChangeRequestSerializer.java @@ -0,0 +1,150 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.persistence; + +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.slime.ArrayTraverser; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.Slime; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequestSource; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest; + +import java.time.Instant; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.List; + +/** + * @author olaa + */ +public class ChangeRequestSerializer { + + // WARNING: Since there are multiple servers in a ZooKeeper cluster and they upgrade one by one + // (and rewrite all nodes on startup), changes to the serialized format must be made + // such that what is serialized on version N+1 can be read by version N: + // - ADDING FIELDS: Always ok + // - REMOVING FIELDS: Stop reading the field first. Stop writing it on a later version. + // - CHANGING THE FORMAT OF A FIELD: Don't do it bro. + + private static final String ID_FIELD = "id"; + private static final String SOURCE_FIELD = "source"; + private static final String SOURCE_SYSTEM_FIELD = "system"; + private static final String STATUS_FIELD = "status"; + private static final String URL_FIELD = "url"; + private static final String ZONE_FIELD = "zoneId"; + private static final String START_TIME_FIELD = "plannedStartTime"; + private static final String END_TIME_FIELD = "plannedEndTime"; + private static final String APPROVAL_FIELD = "approval"; + private static final String IMPACT_FIELD = "impact"; + private static final String IMPACTED_HOSTS_FIELD = "impactedHosts"; + private static final String IMPACTED_SWITCHES_FIELD = "impactedSwitches"; + private static final String ACTION_PLAN_FIELD = "actionPlan"; + private static final String HOST_FIELD = "hostname"; + private static final String ACTION_STATE_FIELD = "state"; + private static final String LAST_UPDATED_FIELD = "lastUpdated"; + private static final String HOSTS_FIELD = "hosts"; + + + public static VespaChangeRequest fromSlime(Slime slime) { + var inspector = slime.get(); + var id = inspector.field(ID_FIELD).asString(); + var zoneId = ZoneId.from(inspector.field(ZONE_FIELD).asString()); + var changeRequestSource = readChangeRequestSource(inspector.field(SOURCE_FIELD)); + var actionPlan = readHostActionPlan(inspector.field(ACTION_PLAN_FIELD)); + var status = VespaChangeRequest.Status.valueOf(inspector.field(STATUS_FIELD).asString()); + var impact = ChangeRequest.Impact.valueOf(inspector.field(IMPACT_FIELD).asString()); + var approval = ChangeRequest.Approval.valueOf(inspector.field(APPROVAL_FIELD).asString()); + + var impactedHosts = new ArrayList<String>(); + inspector.field(IMPACTED_HOSTS_FIELD) + .traverse((ArrayTraverser) (i, hostname) -> impactedHosts.add(hostname.asString())); + var impactedSwitches = new ArrayList<String>(); + inspector.field(IMPACTED_SWITCHES_FIELD) + .traverse((ArrayTraverser) (i, switchName) -> impactedSwitches.add(switchName.asString())); + + return new VespaChangeRequest( + id, + changeRequestSource, + impactedSwitches, + impactedHosts, + approval, + impact, + status, + actionPlan, + zoneId); + } + + public static Slime toSlime(VespaChangeRequest changeRequest) { + var slime = new Slime(); + writeChangeRequest(slime.setObject(), changeRequest); + return slime; + } + + public static void writeChangeRequest(Cursor cursor, VespaChangeRequest changeRequest) { + cursor.setString(ID_FIELD, changeRequest.getId()); + cursor.setString(STATUS_FIELD, changeRequest.getStatus().name()); + cursor.setString(IMPACT_FIELD, changeRequest.getImpact().name()); + cursor.setString(APPROVAL_FIELD, changeRequest.getApproval().name()); + cursor.setString(ZONE_FIELD, changeRequest.getZoneId().value()); + writeChangeRequestSource(cursor.setObject(SOURCE_FIELD), changeRequest.getChangeRequestSource()); + writeActionPlan(cursor.setObject(ACTION_PLAN_FIELD), changeRequest); + + var impactedHosts = cursor.setArray(IMPACTED_HOSTS_FIELD); + changeRequest.getImpactedHosts().forEach(impactedHosts::addString); + var impactedSwitches = cursor.setArray(IMPACTED_SWITCHES_FIELD); + changeRequest.getImpactedSwitches().forEach(impactedSwitches::addString); + } + + private static void writeActionPlan(Cursor cursor, VespaChangeRequest changeRequest) { + var hostsCursor = cursor.setArray(HOSTS_FIELD); + + changeRequest.getHostActionPlan().forEach(action -> { + var actionCursor = hostsCursor.addObject(); + actionCursor.setString(HOST_FIELD, action.getHostname()); + actionCursor.setString(ACTION_STATE_FIELD, action.getState().name()); + actionCursor.setString(LAST_UPDATED_FIELD, action.getLastUpdated().toString()); + }); + + // TODO: Add action plan per application + } + + private static void writeChangeRequestSource(Cursor cursor, ChangeRequestSource source) { + cursor.setString(SOURCE_SYSTEM_FIELD, source.getSystem()); + cursor.setString(ID_FIELD, source.getId()); + cursor.setString(URL_FIELD, source.getUrl()); + cursor.setString(START_TIME_FIELD, source.getPlannedStartTime().toString()); + cursor.setString(END_TIME_FIELD, source.getPlannedEndTime().toString()); + cursor.setString(STATUS_FIELD, source.getStatus().name()); + } + + private static ChangeRequestSource readChangeRequestSource(Inspector inspector) { + return new ChangeRequestSource( + inspector.field(SOURCE_SYSTEM_FIELD).asString(), + inspector.field(ID_FIELD).asString(), + inspector.field(URL_FIELD).asString(), + ChangeRequestSource.Status.valueOf(inspector.field(STATUS_FIELD).asString()), + ZonedDateTime.parse(inspector.field(START_TIME_FIELD).asString()), + ZonedDateTime.parse(inspector.field(END_TIME_FIELD).asString()) + ); + } + + private static List<HostAction> readHostActionPlan(Inspector inspector) { + if (!inspector.valid()) + return List.of(); + + var actionPlan = new ArrayList<HostAction>(); + inspector.field(HOSTS_FIELD).traverse((ArrayTraverser) (index, hostObject) -> + actionPlan.add( + new HostAction( + hostObject.field(HOST_FIELD).asString(), + HostAction.State.valueOf(hostObject.field(ACTION_STATE_FIELD).asString()), + Instant.parse(hostObject.field(LAST_UPDATED_FIELD).asString()) + ) + ) + ); + return actionPlan; + } + +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java index 34741bcaedf..3d6cb45aeb1 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/CuratorDb.java @@ -24,6 +24,8 @@ import com.yahoo.vespa.hosted.controller.auditlog.AuditLog; import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.deployment.Step; import com.yahoo.vespa.hosted.controller.dns.NameServiceQueue; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest; +import com.yahoo.vespa.hosted.controller.notification.Notification; import com.yahoo.vespa.hosted.controller.routing.GlobalRouting; import com.yahoo.vespa.hosted.controller.routing.RoutingPolicy; import com.yahoo.vespa.hosted.controller.routing.RoutingPolicyId; @@ -39,7 +41,6 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.time.Duration; -import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -54,7 +55,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeoutException; import java.util.function.Function; import java.util.function.Predicate; -import java.util.function.Supplier; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -89,6 +89,8 @@ public class CuratorDb { private static final Path zoneRoutingPoliciesRoot = root.append("zoneRoutingPolicies"); private static final Path endpointCertificateRoot = root.append("applicationCertificates"); private static final Path archiveBucketsRoot = root.append("archiveBuckets"); + private static final Path changeRequestsRoot = root.append("changeRequests"); + private static final Path notificationsRoot = root.append("notifications"); private final NodeVersionSerializer nodeVersionSerializer = new NodeVersionSerializer(); private final VersionStatusSerializer versionStatusSerializer = new VersionStatusSerializer(nodeVersionSerializer); @@ -204,6 +206,14 @@ public class CuratorDb { return curator.lock(lockRoot.append("archiveBuckets").append(zoneId.value()), defaultLockTimeout); } + public Lock lockChangeRequests() { + return curator.lock(lockRoot.append("changeRequests"), defaultLockTimeout); + } + + public Lock lockNotifications(TenantName tenantName) { + return curator.lock(lockRoot.append("notifications").append(tenantName.value()), defaultLockTimeout); + } + // -------------- Helpers ------------------------------------------ /** Try locking with a low timeout, meaning it is OK to fail lock acquisition. @@ -563,6 +573,43 @@ public class CuratorDb { curator.set(archiveBucketsPath(zoneid), asJson(ArchiveBucketsSerializer.toSlime(archiveBuckets))); } + // -------------- VCMRs --------------------------------------------------- + + public Optional<VespaChangeRequest> readChangeRequest(String changeRequestId) { + return readSlime(changeRequestPath(changeRequestId)).map(ChangeRequestSerializer::fromSlime); + } + + public List<VespaChangeRequest> readChangeRequests() { + return curator.getChildren(changeRequestsRoot) + .stream() + .map(this::readChangeRequest) + .flatMap(Optional::stream) + .collect(Collectors.toList()); + } + + public void writeChangeRequest(VespaChangeRequest changeRequest) { + curator.set(changeRequestPath(changeRequest.getId()), asJson(ChangeRequestSerializer.toSlime(changeRequest))); + } + + public void deleteChangeRequest(VespaChangeRequest changeRequest) { + curator.delete(changeRequestPath(changeRequest.getId())); + } + + // -------------- Notifications --------------------------------------------------- + + public List<Notification> readNotifications(TenantName tenantName) { + return readSlime(notificationsPath(tenantName)) + .map(slime -> NotificationsSerializer.fromSlime(tenantName, slime)).orElseGet(List::of); + } + + public void writeNotifications(TenantName tenantName, List<Notification> notifications) { + curator.set(notificationsPath(tenantName), asJson(NotificationsSerializer.toSlime(notifications))); + } + + public void deleteNotifications(TenantName tenantName) { + curator.delete(notificationsPath(tenantName)); + } + // -------------- Paths --------------------------------------------------- private Path lockPath(TenantName tenant) { @@ -688,4 +735,12 @@ public class CuratorDb { return archiveBucketsRoot.append(zoneId.value()); } + private static Path changeRequestPath(String id) { + return changeRequestsRoot.append(id); + } + + private static Path notificationsPath(TenantName tenantName) { + return notificationsRoot.append(tenantName.value()); + } + } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/NotificationsSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/NotificationsSerializer.java new file mode 100644 index 00000000000..dcb485b9016 --- /dev/null +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/NotificationsSerializer.java @@ -0,0 +1,104 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.persistence; + +import com.yahoo.config.provision.ApplicationName; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.InstanceName; +import com.yahoo.config.provision.TenantName; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Inspector; +import com.yahoo.slime.Slime; +import com.yahoo.slime.SlimeUtils; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; +import com.yahoo.vespa.hosted.controller.notification.Notification; +import com.yahoo.vespa.hosted.controller.notification.NotificationSource; + +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * (de)serializes notifications for a tenant + * + * @author freva + */ +public class NotificationsSerializer { + + // WARNING: Since there are multiple servers in a ZooKeeper cluster and they upgrade one by one + // (and rewrite all nodes on startup), changes to the serialized format must be made + // such that what is serialized on version N+1 can be read by version N: + // - ADDING FIELDS: Always ok + // - REMOVING FIELDS: Stop reading the field first. Stop writing it on a later version. + // - CHANGING THE FORMAT OF A FIELD: Don't do it bro. + + private static final String notificationsFieldName = "notifications"; + private static final String atFieldName = "at"; + private static final String typeField = "type"; + private static final String messagesField = "messages"; + private static final String applicationField = "application"; + private static final String instanceField = "instance"; + private static final String zoneField = "zone"; + private static final String clusterIdField = "clusterId"; + private static final String jobTypeField = "jobId"; + private static final String runNumberField = "runNumber"; + + public static Slime toSlime(List<Notification> notifications) { + Slime slime = new Slime(); + Cursor notificationsArray = slime.setObject().setArray(notificationsFieldName); + + for (Notification notification : notifications) { + Cursor notificationObject = notificationsArray.addObject(); + notificationObject.setLong(atFieldName, notification.at().toEpochMilli()); + notificationObject.setString(typeField, asString(notification.type())); + Cursor messagesArray = notificationObject.setArray(messagesField); + notification.messages().forEach(messagesArray::addString); + + notification.source().application().ifPresent(application -> notificationObject.setString(applicationField, application.value())); + notification.source().instance().ifPresent(instance -> notificationObject.setString(instanceField, instance.value())); + notification.source().zoneId().ifPresent(zoneId -> notificationObject.setString(zoneField, zoneId.value())); + notification.source().clusterId().ifPresent(clusterId -> notificationObject.setString(clusterIdField, clusterId.value())); + notification.source().jobType().ifPresent(jobType -> notificationObject.setString(jobTypeField, jobType.jobName())); + notification.source().runNumber().ifPresent(runNumber -> notificationObject.setLong(runNumberField, runNumber)); + } + + return slime; + } + + public static List<Notification> fromSlime(TenantName tenantName, Slime slime) { + return SlimeUtils.entriesStream(slime.get().field(notificationsFieldName)) + .map(inspector -> fromInspector(tenantName, inspector)) + .collect(Collectors.toUnmodifiableList()); + } + + private static Notification fromInspector(TenantName tenantName, Inspector inspector) { + return new Notification( + Serializers.instant(inspector.field(atFieldName)), + typeFrom(inspector.field(typeField)), + new NotificationSource( + tenantName, + Serializers.optionalString(inspector.field(applicationField)).map(ApplicationName::from), + Serializers.optionalString(inspector.field(instanceField)).map(InstanceName::from), + Serializers.optionalString(inspector.field(zoneField)).map(ZoneId::from), + Serializers.optionalString(inspector.field(clusterIdField)).map(ClusterSpec.Id::from), + Serializers.optionalString(inspector.field(jobTypeField)).map(JobType::fromJobName), + Serializers.optionalLong(inspector.field(runNumberField))), + SlimeUtils.entriesStream(inspector.field(messagesField)).map(Inspector::asString).collect(Collectors.toUnmodifiableList())); + } + + private static String asString(Notification.Type type) { + switch (type) { + case APPLICATION_PACKAGE_WARNING: return "APPLICATION_PACKAGE_WARNING"; + case DEPLOYMENT_FAILURE: return "DEPLOYMENT_FAILURE"; + default: throw new IllegalArgumentException("No serialization defined for notification type " + type); + } + } + + private static Notification.Type typeFrom(Inspector field) { + switch (field.asString()) { + case "APPLICATION_PACKAGE_WARNING": return Notification.Type.APPLICATION_PACKAGE_WARNING; + case "DEPLOYMENT_FAILURE": return Notification.Type.DEPLOYMENT_FAILURE; + default: throw new IllegalArgumentException("Unknown serialized notification type value '" + field.asString() + "'"); + } + } +} diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java index ffb5e040517..994dc877182 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiHandler.java @@ -80,6 +80,7 @@ import com.yahoo.vespa.hosted.controller.application.EndpointList; import com.yahoo.vespa.hosted.controller.application.QuotaUsage; import com.yahoo.vespa.hosted.controller.application.SystemApplication; import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; +import com.yahoo.vespa.hosted.controller.auditlog.AuditLoggingRequestHandler; import com.yahoo.vespa.hosted.controller.deployment.DeploymentStatus; import com.yahoo.vespa.hosted.controller.deployment.DeploymentSteps; import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; @@ -87,6 +88,8 @@ import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger.ChangesToC import com.yahoo.vespa.hosted.controller.deployment.JobStatus; import com.yahoo.vespa.hosted.controller.deployment.Run; import com.yahoo.vespa.hosted.controller.deployment.TestConfigSerializer; +import com.yahoo.vespa.hosted.controller.notification.Notification; +import com.yahoo.vespa.hosted.controller.notification.NotificationSource; import com.yahoo.vespa.hosted.controller.rotation.RotationId; import com.yahoo.vespa.hosted.controller.rotation.RotationState; import com.yahoo.vespa.hosted.controller.rotation.RotationStatus; @@ -136,8 +139,6 @@ import java.util.stream.Stream; import static com.yahoo.jdisc.Response.Status.BAD_REQUEST; import static com.yahoo.jdisc.Response.Status.CONFLICT; -import static com.yahoo.jdisc.Response.Status.INTERNAL_SERVER_ERROR; -import static com.yahoo.jdisc.Response.Status.NOT_FOUND; import static java.util.Map.Entry.comparingByKey; import static java.util.stream.Collectors.joining; import static java.util.stream.Collectors.toList; @@ -151,7 +152,7 @@ import static java.util.stream.Collectors.toUnmodifiableList; * @author mpolden */ @SuppressWarnings("unused") // created by injection -public class ApplicationApiHandler extends LoggingRequestHandler { +public class ApplicationApiHandler extends AuditLoggingRequestHandler { private static final ObjectMapper jsonMapper = new ObjectMapper(); @@ -163,7 +164,7 @@ public class ApplicationApiHandler extends LoggingRequestHandler { public ApplicationApiHandler(LoggingRequestHandler.Context parentCtx, Controller controller, AccessControlRequests accessControlRequests) { - super(parentCtx); + super(parentCtx, controller.auditLogger()); this.controller = controller; this.accessControlRequests = accessControlRequests; this.testConfigSerializer = new TestConfigSerializer(controller.system()); @@ -175,7 +176,7 @@ public class ApplicationApiHandler extends LoggingRequestHandler { } @Override - public HttpResponse handle(HttpRequest request) { + public HttpResponse auditAndHandle(HttpRequest request) { try { Path path = new Path(request.getUri()); switch (request.getMethod()) { @@ -201,15 +202,15 @@ public class ApplicationApiHandler extends LoggingRequestHandler { return ErrorResponse.badRequest(Exceptions.toMessageString(e)); } catch (ConfigServerException e) { - switch (e.getErrorCode()) { + switch (e.code()) { case NOT_FOUND: - return new ErrorResponse(NOT_FOUND, e.getErrorCode().name(), Exceptions.toMessageString(e)); + return ErrorResponse.notFoundError(Exceptions.toMessageString(e)); case ACTIVATION_CONFLICT: - return new ErrorResponse(CONFLICT, e.getErrorCode().name(), Exceptions.toMessageString(e)); + return new ErrorResponse(CONFLICT, e.code().name(), Exceptions.toMessageString(e)); case INTERNAL_SERVER_ERROR: - return new ErrorResponse(INTERNAL_SERVER_ERROR, e.getErrorCode().name(), Exceptions.toMessageString(e)); + return ErrorResponse.internalServerError(Exceptions.toMessageString(e)); default: - return new ErrorResponse(BAD_REQUEST, e.getErrorCode().name(), Exceptions.toMessageString(e)); + return new ErrorResponse(BAD_REQUEST, e.code().name(), Exceptions.toMessageString(e)); } } catch (RuntimeException e) { @@ -223,6 +224,7 @@ public class ApplicationApiHandler extends LoggingRequestHandler { if (path.matches("/application/v4/tenant")) return tenants(request); if (path.matches("/application/v4/tenant/{tenant}")) return tenant(path.get("tenant"), request); if (path.matches("/application/v4/tenant/{tenant}/info")) return tenantInfo(path.get("tenant"), request); + if (path.matches("/application/v4/tenant/{tenant}/notifications")) return notifications(path.get("tenant"), request); if (path.matches("/application/v4/tenant/{tenant}/secret-store/{name}/validate")) return validateSecretStore(path.get("tenant"), path.get("name"), request); if (path.matches("/application/v4/tenant/{tenant}/application")) return applications(path.get("tenant"), Optional.empty(), request); if (path.matches("/application/v4/tenant/{tenant}/application/{application}")) return application(path.get("tenant"), path.get("application"), request); @@ -480,6 +482,53 @@ public class ApplicationApiHandler extends LoggingRequestHandler { .withAddress(updateTenantInfoAddress(insp.field("address"), oldContact.address())); } + private HttpResponse notifications(String tenantName, HttpRequest request) { + NotificationSource notificationSource = new NotificationSource(TenantName.from(tenantName), + Optional.ofNullable(request.getProperty("application")).map(ApplicationName::from), + Optional.ofNullable(request.getProperty("instance")).map(InstanceName::from), + Optional.empty(), Optional.empty(), Optional.empty(), OptionalLong.empty()); + + Slime slime = new Slime(); + Cursor notificationsArray = slime.setObject().setArray("notifications"); + controller.notificationsDb().listNotifications(notificationSource, showOnlyProductionInstances(request)) + .forEach(notification -> toSlime(notificationsArray.addObject(), notification)); + return new SlimeJsonResponse(slime); + } + + private static void toSlime(Cursor cursor, Notification notification) { + cursor.setLong("at", notification.at().toEpochMilli()); + cursor.setString("level", notificatioLevelAsString(notification.type().level())); + cursor.setString("type", notificationTypeAsString(notification.type())); + Cursor messagesArray = cursor.setArray("messages"); + notification.messages().forEach(messagesArray::addString); + + notification.source().application().ifPresent(application -> cursor.setString("application", application.value())); + notification.source().instance().ifPresent(instance -> cursor.setString("instance", instance.value())); + notification.source().zoneId().ifPresent(zoneId -> { + cursor.setString("environment", zoneId.environment().value()); + cursor.setString("region", zoneId.region().value()); + }); + notification.source().clusterId().ifPresent(clusterId -> cursor.setString("clusterId", clusterId.value())); + notification.source().jobType().ifPresent(jobType -> cursor.setString("jobName", jobType.jobName())); + notification.source().runNumber().ifPresent(runNumber -> cursor.setLong("runNumber", runNumber)); + } + + private static String notificationTypeAsString(Notification.Type type) { + switch (type) { + case APPLICATION_PACKAGE_WARNING: return "APPLICATION_PACKAGE_WARNING"; + case DEPLOYMENT_FAILURE: return "DEPLOYMENT_FAILURE"; + default: throw new IllegalArgumentException("No serialization defined for notification type " + type); + } + } + + private static String notificatioLevelAsString(Notification.Level level) { + switch (level) { + case warning: return "warning"; + case error: return "error"; + default: throw new IllegalArgumentException("No serialization defined for notification level " + level); + } + } + private HttpResponse applications(String tenantName, Optional<String> applicationName, HttpRequest request) { TenantName tenant = TenantName.from(tenantName); if (controller.tenants().get(tenantName).isEmpty()) @@ -685,10 +734,10 @@ public class ApplicationApiHandler extends LoggingRequestHandler { var tenantSecretStore = new TenantSecretStore(name, awsId, role); if (!tenantSecretStore.isValid()) { - return ErrorResponse.badRequest(String.format("Secret store " + tenantSecretStore + " is invalid")); + return ErrorResponse.badRequest("Secret store " + tenantSecretStore + " is invalid"); } if (tenant.tenantSecretStores().contains(tenantSecretStore)) { - return ErrorResponse.badRequest(String.format("Secret store " + tenantSecretStore + " is already configured")); + return ErrorResponse.badRequest("Secret store " + tenantSecretStore + " is already configured"); } controller.serviceRegistry().roleService().createTenantPolicy(TenantName.from(tenantName), name, awsId, role); @@ -1631,7 +1680,6 @@ public class ApplicationApiHandler extends LoggingRequestHandler { /** Trigger deployment of the given Vespa version if a valid one is given, e.g., "7.8.9". */ private HttpResponse deployPlatform(String tenantName, String applicationName, String instanceName, boolean pin, HttpRequest request) { - request = controller.auditLogger().log(request); String versionString = readToString(request.getData()); ApplicationId id = ApplicationId.from(tenantName, applicationName, instanceName); StringBuilder response = new StringBuilder(); @@ -1660,7 +1708,6 @@ public class ApplicationApiHandler extends LoggingRequestHandler { /** Trigger deployment to the last known application package for the given application. */ private HttpResponse deployApplication(String tenantName, String applicationName, String instanceName, HttpRequest request) { - controller.auditLogger().log(request); ApplicationId id = ApplicationId.from(tenantName, applicationName, instanceName); StringBuilder response = new StringBuilder(); controller.applications().lockApplicationOrThrow(TenantAndApplicationId.from(id), application -> { @@ -2050,6 +2097,7 @@ public class ApplicationApiHandler extends LoggingRequestHandler { toSlime(scalingEvent.from(), scalingEventObject.setObject("from")); toSlime(scalingEvent.to(), scalingEventObject.setObject("to")); scalingEventObject.setLong("at", scalingEvent.at().toEpochMilli()); + scalingEvent.completion().ifPresent(completion -> scalingEventObject.setLong("completion", completion.toEpochMilli())); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/ChangeManagementApiHandler.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/ChangeManagementApiHandler.java index 2077278ee0c..5973cc3fcf3 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/ChangeManagementApiHandler.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/ChangeManagementApiHandler.java @@ -20,6 +20,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest; import com.yahoo.vespa.hosted.controller.auditlog.AuditLoggingRequestHandler; import com.yahoo.vespa.hosted.controller.maintenance.ChangeManagementAssessor; +import com.yahoo.vespa.hosted.controller.persistence.ChangeRequestSerializer; import com.yahoo.yolean.Exceptions; import javax.ws.rs.BadRequestException; @@ -63,6 +64,7 @@ public class ChangeManagementApiHandler extends AuditLoggingRequestHandler { private HttpResponse get(HttpRequest request) { Path path = new Path(request.getUri()); if (path.matches("/changemanagement/v1/assessment/{changeRequestId}")) return changeRequestAssessment(path.get("changeRequestId")); + if (path.matches("/changemanagement/v1/vcmr")) return getVCMRs(); return ErrorResponse.notFoundError("Nothing at " + path); } @@ -87,8 +89,7 @@ public class ChangeManagementApiHandler extends AuditLoggingRequestHandler { } private HttpResponse changeRequestAssessment(String changeRequestId) { - var optionalChangeRequest = controller.serviceRegistry().changeRequestClient() - .getUpcomingChangeRequests() + var optionalChangeRequest = controller.curator().readChangeRequests() .stream() .filter(request -> changeRequestId.equals(request.getChangeRequestSource().getId())) .findFirst(); @@ -171,6 +172,17 @@ public class ChangeManagementApiHandler extends AuditLoggingRequestHandler { return new SlimeJsonResponse(slime); } + private HttpResponse getVCMRs() { + var changeRequests = controller.curator().readChangeRequests(); + var slime = new Slime(); + var cursor = slime.setObject().setArray("vcmrs"); + changeRequests.forEach(changeRequest -> { + var changeCursor = cursor.addObject(); + ChangeRequestSerializer.writeChangeRequest(changeCursor, changeRequest); + }); + return new SlimeJsonResponse(slime); + } + private Optional<ZoneId> affectedZone(List<String> hosts) { var affectedHosts = hosts.stream() .map(HostName::from) diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/routing/RoutingPolicies.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/routing/RoutingPolicies.java index 0356e11ae36..898b2531460 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/routing/RoutingPolicies.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/routing/RoutingPolicies.java @@ -188,9 +188,10 @@ public class RoutingPolicies { private void storePoliciesOf(LoadBalancerAllocation allocation, @SuppressWarnings("unused") Lock lock) { var policies = new LinkedHashMap<>(get(allocation.deployment.applicationId())); for (LoadBalancer loadBalancer : allocation.loadBalancers) { + if (loadBalancer.hostname().isEmpty()) continue; var policyId = new RoutingPolicyId(loadBalancer.application(), loadBalancer.cluster(), allocation.deployment.zoneId()); var existingPolicy = policies.get(policyId); - var newPolicy = new RoutingPolicy(policyId, loadBalancer.hostname(), loadBalancer.dnsZone(), + var newPolicy = new RoutingPolicy(policyId, loadBalancer.hostname().get(), loadBalancer.dnsZone(), allocation.endpointIdsOf(loadBalancer), new Status(isActive(loadBalancer), GlobalRouting.DEFAULT_STATUS)); // Preserve global routing status for existing policy diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java index 4a4159180b5..976cdb5c674 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/DeploymentContext.java @@ -275,11 +275,9 @@ public class DeploymentContext { /** Fail current deployment in given job */ public DeploymentContext outOfCapacity(JobType type) { return failDeployment(type, - new ConfigServerException(URI.create("https://config.server"), - "Failed to deploy application", + new ConfigServerException(ConfigServerException.ErrorCode.OUT_OF_CAPACITY, "Out of capacity", - ConfigServerException.ErrorCode.OUT_OF_CAPACITY, - new RuntimeException("Out of capacity from test code"))); + "Failed to deploy application")); } /** Fail current deployment in given job */ diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java index 6bd7feb8d96..14244d7bdda 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunnerTest.java @@ -99,11 +99,9 @@ public class InternalStepRunnerTest { @Test public void retriesDeploymentForOneHour() { - RuntimeException exception = new ConfigServerException(URI.create("https://server"), - "test failure", + RuntimeException exception = new ConfigServerException(ConfigServerException.ErrorCode.APPLICATION_LOCK_FAILURE, "Exception to retry", - ConfigServerException.ErrorCode.APPLICATION_LOCK_FAILURE, - new RuntimeException("Retry me")); + "test failure"); tester.configServer().throwOnNextPrepare(exception); tester.jobs().deploy(app.instanceId(), JobType.devUsEast1, Optional.empty(), applicationPackage()); assertEquals(unfinished, tester.jobs().last(app.instanceId(), JobType.devUsEast1).get().stepStatuses().get(Step.deployReal)); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java index 70651ada473..9e15f2ec788 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/ConfigServerMock.java @@ -30,7 +30,6 @@ import com.yahoo.vespa.hosted.controller.api.integration.configserver.ContainerE import com.yahoo.vespa.hosted.controller.api.integration.configserver.LoadBalancer; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Log; import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; -import com.yahoo.vespa.hosted.controller.api.integration.configserver.NotFoundException; import com.yahoo.vespa.hosted.controller.api.integration.configserver.PrepareResponse; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ProxyResponse; import com.yahoo.vespa.hosted.controller.api.integration.configserver.QuotaUsage; @@ -121,7 +120,8 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer new Cluster.Utilization(0.1, 0.2, 0.3, 0.4, 0.5, 0.6), List.of(new Cluster.ScalingEvent(new ClusterResources(0, 0, NodeResources.unspecified()), current, - Instant.ofEpochMilli(1234))), + Instant.ofEpochMilli(1234), + Optional.of(Instant.ofEpochMilli(2234)))), "the autoscaling status", Duration.ofMinutes(6), 0.7, @@ -390,7 +390,7 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer putLoadBalancers(id.zoneId(), List.of(new LoadBalancer(UUID.randomUUID().toString(), id.applicationId(), cluster, - HostName.from("lb-0--" + id.applicationId().serializedForm() + "--" + id.zoneId().toString()), + Optional.of(HostName.from("lb-0--" + id.applicationId().serializedForm() + "--" + id.zoneId().toString())), LoadBalancer.State.active, Optional.of("dns-zone-1")))); } @@ -432,21 +432,18 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer public void reindex(DeploymentId deployment, List<String> clusterNames, List<String> documentTypes, boolean indexedOnly) { } @Override - public Optional<ApplicationReindexing> getReindexing(DeploymentId deployment) { - return Optional.of(new ApplicationReindexing(true, - Map.of("cluster", - new ApplicationReindexing.Cluster(Map.of("type", 100L), - Map.of("type", new Status(Instant.ofEpochMilli(345), - Instant.ofEpochMilli(456), - Instant.ofEpochMilli(567), - ApplicationReindexing.State.FAILED, - "(#`д´)ノ", - 0.1)))))); - - + public ApplicationReindexing getReindexing(DeploymentId deployment) { + return new ApplicationReindexing(true, + Map.of("cluster", + new ApplicationReindexing.Cluster(Map.of("type", 100L), + Map.of("type", new Status(Instant.ofEpochMilli(345), + Instant.ofEpochMilli(456), + Instant.ofEpochMilli(567), + ApplicationReindexing.State.FAILED, + "(#`д´)ノ", + 0.1))))); } - @Override public void disableReindexing(DeploymentId deployment) { } @@ -464,12 +461,13 @@ public class ConfigServerMock extends AbstractComponent implements ConfigServer } @Override - public void deactivate(DeploymentId deployment) throws NotFoundException { + public void deactivate(DeploymentId deployment) { ApplicationId applicationId = deployment.applicationId(); nodeRepository().removeNodes(deployment.zoneId(), nodeRepository().list(deployment.zoneId(), applicationId)); if ( ! applications.containsKey(deployment)) - throw new NotFoundException("No application with id " + applicationId + " exists, cannot deactivate"); + return; + applications.remove(deployment); serviceStatus.remove(deployment); removeLoadBalancers(deployment.applicationId(), deployment.zoneId()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java index 2eabc5dc21b..fe241976d13 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/integration/NodeRepositoryMock.java @@ -33,7 +33,6 @@ import java.util.Map; import java.util.NoSuchElementException; import java.util.Objects; import java.util.Optional; -import java.util.Set; import java.util.function.Function; import java.util.function.UnaryOperator; import java.util.stream.Collectors; @@ -56,6 +55,7 @@ public class NodeRepositoryMock implements NodeRepository { private final Map<ZoneId, List<NodeRepositoryNode>> nodeRepoNodes = new HashMap<>(); private boolean allowPatching = false; + private boolean hasSpareCapacity = false; /** Add or update given nodes in zone */ public void putNodes(ZoneId zone, List<Node> nodes) { @@ -162,8 +162,14 @@ public class NodeRepositoryMock implements NodeRepository { } @Override - public void setState(ZoneId zone, NodeState nodeState, String nodename) { - throw new UnsupportedOperationException(); + public void setState(ZoneId zone, NodeState nodeState, String hostName) { + var existing = list(zone, List.of(HostName.from(hostName))); + if (existing.size() != 1) throw new IllegalArgumentException("Node " + hostName + " not found in " + zone); + + var node = new Node.Builder(existing.get(0)) + .state(Node.State.valueOf(nodeState.name())) + .build(); + putNodes(zone, node); } @Override @@ -177,17 +183,7 @@ public class NodeRepositoryMock implements NodeRepository { } @Override - public NodeList listNodes(ZoneId zone, ApplicationId application) { - throw new UnsupportedOperationException(); - } - - @Override - public NodeList listNodes(ZoneId zone, List<HostName> hostnames) { - throw new UnsupportedOperationException(); - } - - @Override - public List<Node> list(ZoneId zone) { + public List<Node> list(ZoneId zone, boolean includeDeprovisioned) { return List.copyOf(nodeRepository.getOrDefault(zone, Map.of()).values()); } @@ -288,10 +284,16 @@ public class NodeRepositoryMock implements NodeRepository { List<Node> existing = list(zoneId, List.of(HostName.from(hostName))); if (existing.size() != 1) throw new IllegalArgumentException("Node " + hostName + " not found in " + zoneId); - // Note: Only supports switchHostname - Node newNode = new Node.Builder(existing.get(0)).switchHostname(node.getSwitchHostname()) - .build(); - putNodes(zoneId, newNode); + // Note: Only supports switchHostname, modelName and wantToRetire + Node.Builder newNode = new Node.Builder(existing.get(0)); + if (node.getSwitchHostname() != null) + newNode.switchHostname(node.getSwitchHostname()); + if (node.getModelName() != null) + newNode.modelName(node.getModelName()); + if (node.getWantToRetire() != null) + newNode.wantToRetire(node.getWantToRetire()); + + putNodes(zoneId, newNode.build()); } @Override @@ -301,7 +303,7 @@ public class NodeRepositoryMock implements NodeRepository { @Override public boolean isReplaceable(ZoneId zoneId, List<HostName> hostNames) { - return false; + return hasSpareCapacity; } public Optional<Duration> osUpgradeBudget(ZoneId zone, NodeType type, Version version) { @@ -351,4 +353,8 @@ public class NodeRepositoryMock implements NodeRepository { return this; } + public void hasSpareCapacity(boolean hasSpareCapacity) { + this.hasSpareCapacity = hasSpareCapacity; + } + } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessorTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessorTest.java index 575a38cd637..476d2465202 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessorTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeManagementAssessorTest.java @@ -69,7 +69,7 @@ public class ChangeManagementAssessorTest { @Test public void one_of_two_groups_in_one_of_two_clusters() { ZoneId zone = ZoneId.from("prod", "eu-trd"); - List<String> hostNames = Arrays.asList("host1", "host2"); + List<String> hostNames = Arrays.asList("host1", "host2", "host5"); List<NodeRepositoryNode> allNodesInZone = new ArrayList<>(); // Two impacted nodes on host1 diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainerTest.java index 1ce59587d6c..290e08ca47b 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/ChangeRequestMaintainerTest.java @@ -1,10 +1,13 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.controller.maintenance; +import com.yahoo.config.provision.zone.ZoneId; import com.yahoo.vespa.hosted.controller.ControllerTester; import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest; import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequestSource; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequestSource.Status; import com.yahoo.vespa.hosted.controller.api.integration.vcmr.MockChangeRequestClient; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest; import org.junit.Test; import java.time.Duration; @@ -24,10 +27,11 @@ public class ChangeRequestMaintainerTest { @Test public void only_approve_requests_pending_approval() { - + var changeRequest1 = newChangeRequest("id1", ChangeRequest.Approval.APPROVED); + var changeRequest2 = newChangeRequest("id2", ChangeRequest.Approval.REQUESTED); var upcomingChangeRequests = List.of( - newChangeRequest("id1", ChangeRequest.Approval.APPROVED), - newChangeRequest("id2", ChangeRequest.Approval.REQUESTED) + changeRequest1, + changeRequest2 ); changeRequestClient.setUpcomingChangeRequests(upcomingChangeRequests); @@ -37,24 +41,72 @@ public class ChangeRequestMaintainerTest { assertEquals(1, approvedChangeRequests.size()); assertEquals("id2", approvedChangeRequests.get(0).getId()); + var writtenChangeRequests = tester.curator().readChangeRequests(); + assertEquals(2, writtenChangeRequests.size()); + + var expectedChangeRequest = new VespaChangeRequest(changeRequest1, ZoneId.from("prod.us-east-3")); + assertEquals(expectedChangeRequest, writtenChangeRequests.get(0)); + } + + @Test + public void updates_status_time_and_approval() { + var time = ZonedDateTime.now(); + var persistedChangeRequest = persistedChangeRequest("some-id", time.minusDays(5), Status.WAITING_FOR_APPROVAL); + tester.curator().writeChangeRequest(persistedChangeRequest); + + var updatedChangeRequest = newChangeRequest("some-id", ChangeRequest.Approval.APPROVED, time, Status.CANCELED); + changeRequestClient.setUpcomingChangeRequests(List.of(updatedChangeRequest)); + changeRequestMaintainer.maintain(); + + persistedChangeRequest = tester.curator().readChangeRequest("some-id").get(); + assertEquals(Status.CANCELED, persistedChangeRequest.getChangeRequestSource().getStatus()); + assertEquals(ChangeRequest.Approval.APPROVED, persistedChangeRequest.getApproval()); + assertEquals(time, persistedChangeRequest.getChangeRequestSource().getPlannedStartTime()); + } + + @Test + public void deletes_old_change_requests() { + var now = ZonedDateTime.now(); + var before = now.minus(Duration.ofDays(8)); + var newChangeRequest = persistedChangeRequest("new", now, Status.CLOSED); + var oldChangeRequest = persistedChangeRequest("old", before, Status.CLOSED); + + tester.curator().writeChangeRequest(newChangeRequest); + tester.curator().writeChangeRequest(oldChangeRequest); + + changeRequestMaintainer.maintain(); + + var persistedChangeRequests = tester.curator().readChangeRequests(); + assertEquals(1, persistedChangeRequests.size()); + assertEquals(newChangeRequest, persistedChangeRequests.get(0)); } private ChangeRequest newChangeRequest(String id, ChangeRequest.Approval approval) { + return newChangeRequest(id, approval, ZonedDateTime.now(), Status.CLOSED); + } + + private ChangeRequest newChangeRequest(String id, ChangeRequest.Approval approval, ZonedDateTime time, Status status) { return new ChangeRequest.Builder() .id(id) .approval(approval) .impact(ChangeRequest.Impact.VERY_HIGH) .impactedSwitches(List.of()) - .impactedHosts(List.of()) + .impactedHosts(List.of("node-1-tenant-host-prod.us-east-3")) .changeRequestSource(new ChangeRequestSource.Builder() - .plannedStartTime(ZonedDateTime.now()) - .plannedEndTime(ZonedDateTime.now()) + .plannedStartTime(time) + .plannedEndTime(time) .id("some-id") .url("some-url") .system("some-system") - .status(ChangeRequestSource.Status.CLOSED) + .status(status) .build()) .build(); + } + private VespaChangeRequest persistedChangeRequest(String id, ZonedDateTime time, Status status) { + return new VespaChangeRequest( + newChangeRequest(id, ChangeRequest.Approval.REQUESTED, time, status), + ZoneId.from("prod.us-east-3") + ); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java index d14d4014b48..680743055c9 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/CloudEventReporterTest.java @@ -128,7 +128,7 @@ public class CloudEventReporterTest { } private Set<String> getHostnames(ZoneId zoneId) { - return tester.configServer().nodeRepository().list(zoneId) + return tester.configServer().nodeRepository().list(zoneId, false) .stream() .map(node -> node.hostname().value()) .collect(Collectors.toSet()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainerTest.java index a1fb6463074..84f4f3d9b7c 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/DeploymentMetricsMaintainerTest.java @@ -114,10 +114,7 @@ public class DeploymentMetricsMaintainerTest { } private void setMetrics(ApplicationId application, Map<String, Double> metrics) { - var clusterMetrics = new ClusterMetrics("default", "container"); - for (var kv : metrics.entrySet()) { - clusterMetrics = clusterMetrics.addMetric(kv.getKey(), kv.getValue()); - } + var clusterMetrics = new ClusterMetrics("default", "container", metrics); tester.controllerTester().serviceRegistry().configServerMock().setMetrics(new DeploymentId(application, ZoneId.from("dev", "us-east-1")), clusterMetrics); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/HostSwitchUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdaterTest.java index 4dcacb3934b..0baee28143c 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/HostSwitchUpdaterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/HostInfoUpdaterTest.java @@ -19,8 +19,9 @@ import static org.junit.Assert.assertTrue; /** * @author mpolden + * @author bjormel */ -public class HostSwitchUpdaterTest { +public class HostInfoUpdaterTest { @Test public void maintain() { @@ -29,7 +30,7 @@ public class HostSwitchUpdaterTest { addNodeEntities(tester); // First iteration patches all hosts - HostSwitchUpdater maintainer = new HostSwitchUpdater(tester.controller(), Duration.ofDays(1)); + HostInfoUpdater maintainer = new HostInfoUpdater(tester.controller(), Duration.ofDays(1)); maintainer.maintain(); List<Node> nodes = allNodes(tester); assertFalse(nodes.isEmpty()); @@ -48,7 +49,7 @@ public class HostSwitchUpdaterTest { // One host is moved to a different switch Node host = allNodes(tester).stream().filter(node -> node.type().isHost()).findFirst().get(); String newSwitch = "tor2-" + host.hostname().value(); - NodeEntity nodeEntity = new NodeEntity(host.hostname().value(), "", "", newSwitch); + NodeEntity nodeEntity = new NodeEntity(host.hostname().value(), "RD350G", "Lenovo", newSwitch); tester.serviceRegistry().entityService().addNodeEntity(nodeEntity); // Host is updated @@ -56,12 +57,29 @@ public class HostSwitchUpdaterTest { maintainer.maintain(); assertEquals(newSwitch, getNode(host.hostname(), tester).switchHostname().get()); + // Host has updated model + String newModel = "Quanta q801"; + String manufacturer = "quanta computer"; + nodeEntity = new NodeEntity(host.hostname().value(), newModel, manufacturer, newSwitch); + tester.serviceRegistry().entityService().addNodeEntity(nodeEntity); + + // Host is updated + tester.serviceRegistry().configServer().nodeRepository().allowPatching(true); + maintainer.maintain(); + assertEquals(manufacturer + " " + newModel, getNode(host.hostname(), tester).modelName().get()); + // Host keeps old switch hostname if removed from the node entity - nodeEntity = new NodeEntity(host.hostname().value(), "", "", ""); + nodeEntity = new NodeEntity(host.hostname().value(), newModel, manufacturer, ""); tester.serviceRegistry().entityService().addNodeEntity(nodeEntity); maintainer.maintain(); assertEquals(newSwitch, getNode(host.hostname(), tester).switchHostname().get()); + // Host keeps old model name if removed from the node entity + nodeEntity = new NodeEntity(host.hostname().value(), "", "", newSwitch); + tester.serviceRegistry().entityService().addNodeEntity(nodeEntity); + maintainer.maintain(); + assertEquals(manufacturer + " " + newModel, getNode(host.hostname(), tester).modelName().get()); + // Updates node registered under a different hostname ZoneId zone = tester.zoneRegistry().zones().controllerUpgraded().all().ids().get(0); String hostnameSuffix = ".prod." + zone.value(); @@ -73,7 +91,7 @@ public class HostSwitchUpdaterTest { .build(); tester.serviceRegistry().configServer().nodeRepository().putNodes(zone, List.of(configNode, configHost)); String switchHostname = switchHostname(configHost); - NodeEntity configNodeEntity = new NodeEntity("cfg3" + hostnameSuffix, "", "", switchHostname); + NodeEntity configNodeEntity = new NodeEntity("cfg3" + hostnameSuffix, "RD350G", "Lenovo", switchHostname); tester.serviceRegistry().entityService().addNodeEntity(configNodeEntity); maintainer.maintain(); assertEquals(switchHostname, getNode(configHost.hostname(), tester).switchHostname().get()); @@ -90,7 +108,7 @@ public class HostSwitchUpdaterTest { private static List<Node> allNodes(ControllerTester tester) { List<Node> nodes = new ArrayList<>(); for (var zone : tester.zoneRegistry().zones().controllerUpgraded().all().ids()) { - nodes.addAll(tester.serviceRegistry().configServer().nodeRepository().list(zone)); + nodes.addAll(tester.serviceRegistry().configServer().nodeRepository().list(zone, false)); } return nodes; } @@ -102,7 +120,7 @@ public class HostSwitchUpdaterTest { private static void addNodeEntities(ControllerTester tester) { for (var node : allNodes(tester)) { if (!node.type().isHost()) continue; - NodeEntity nodeEntity = new NodeEntity(node.hostname().value(), "", "", switchHostname(node)); + NodeEntity nodeEntity = new NodeEntity(node.hostname().value(), "RD350G", "Lenovo", switchHostname(node)); tester.serviceRegistry().entityService().addNodeEntity(nodeEntity); } } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java index 1b43fe2aaa5..d42342b57fb 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/MetricsReporterTest.java @@ -357,7 +357,7 @@ public class MetricsReporterTest { tester.configServer().setOsVersion(version0, SystemApplication.tenantHost.id(), zone); tester.configServer().setOsVersion(version0, SystemApplication.configServerHost.id(), zone); runAll(statusUpdater, reporter); - List<Node> hosts = tester.configServer().nodeRepository().list(zone); + List<Node> hosts = tester.configServer().nodeRepository().list(zone, false); assertOsChangeDuration(Duration.ZERO, hosts); var targets = List.of(Version.fromString("8.1"), Version.fromString("8.2")); @@ -381,7 +381,7 @@ public class MetricsReporterTest { tester.configServer().nodeRepository().list(zone, SystemApplication.tenantHost.id()).stream() .map(Node::wantedOsVersion).min(Comparator.naturalOrder()).get()); assertTrue("No nodes are suspended", tester.controller().serviceRegistry().configServer() - .nodeRepository().list(zone).stream() + .nodeRepository().list(zone, false).stream() .noneMatch(node -> node.serviceState() == Node.ServiceState.allowedDown)); // Another 30 minutes pass diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainerTest.java index d7440a706ea..1eadae18668 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/SystemRoutingPolicyMaintainerTest.java @@ -35,7 +35,7 @@ public class SystemRoutingPolicyMaintainerTest { tester.configServer().putLoadBalancers(zone, List.of(new LoadBalancer("lb1", SystemApplication.configServer.id(), ClusterSpec.Id.from("config"), - HostName.from("lb1.example.com"), + Optional.of(HostName.from("lb1.example.com")), LoadBalancer.State.active, Optional.of("dns-zone-1")))); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java index 2f24d3e6eee..2afa3a0faea 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/TrafficShareUpdaterTest.java @@ -13,6 +13,7 @@ import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock; import org.junit.Test; import java.time.Duration; +import java.util.Map; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @@ -81,8 +82,7 @@ public class TrafficShareUpdaterTest { } private void setQpsMetric(double qps, ApplicationId application, ZoneId zone, DeploymentTester tester) { - var clusterMetrics = new ClusterMetrics("default", "container"); - clusterMetrics = clusterMetrics.addMetric(ClusterMetrics.QUERIES_PER_SECOND, qps); + var clusterMetrics = new ClusterMetrics("default", "container", Map.of(ClusterMetrics.QUERIES_PER_SECOND, qps)); tester.controllerTester().serviceRegistry().configServerMock().setMetrics(new DeploymentId(application, zone), clusterMetrics); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainerTest.java new file mode 100644 index 00000000000..d5c35f806f4 --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/maintenance/VCMRMaintainerTest.java @@ -0,0 +1,214 @@ +package com.yahoo.vespa.hosted.controller.maintenance; + +import com.yahoo.config.provision.HostName; +import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.vespa.hosted.controller.ControllerTester; +import com.yahoo.vespa.hosted.controller.api.integration.configserver.Node; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequestSource; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction.State; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest.Status; +import com.yahoo.vespa.hosted.controller.integration.NodeRepositoryMock; +import org.junit.Test; + +import java.time.Duration; +import java.time.Instant; +import java.time.ZonedDateTime; +import java.util.List; + +import static org.junit.Assert.*; + +/** + * @author olaa + */ +public class VCMRMaintainerTest { + + private final ControllerTester tester = new ControllerTester(); + private final VCMRMaintainer maintainer = new VCMRMaintainer(tester.controller(), Duration.ofMinutes(1)); + private final NodeRepositoryMock nodeRepo = tester.serviceRegistry().configServer().nodeRepository(); + private final ZoneId zoneId = ZoneId.from("prod.us-east-3"); + private final HostName host1 = HostName.from("host1"); + private final HostName host2 = HostName.from("host2"); + private final String changeRequestId = "id123"; + + @Test + public void recycle_hosts_after_completion() { + var parkedNode = createNode(host1, NodeType.host, Node.State.parked, true); + var failedNode = createNode(host2, NodeType.host, Node.State.failed, false); + nodeRepo.putNodes(zoneId, List.of(parkedNode, failedNode)); + + tester.curator().writeChangeRequest(canceledChangeRequest()); + maintainer.maintain(); + + // Only the parked node is recycled + var nodeList = nodeRepo.list(zoneId, List.of(host1, host2)); + assertEquals(Node.State.dirty, nodeList.get(0).state()); + assertEquals(Node.State.failed, nodeList.get(1).state()); + var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).get(); + assertEquals(Status.COMPLETED, writtenChangeRequest.getStatus()); + } + + @Test + public void infrastructure_hosts_require_maunal_intervention() { + var configNode = createNode(host1, NodeType.config, Node.State.active, false); + var activeNode = createNode(host2, NodeType.host, Node.State.active, false); + nodeRepo.putNodes(zoneId, List.of(configNode, activeNode)); + nodeRepo.hasSpareCapacity(true); + + tester.curator().writeChangeRequest(futureChangeRequest()); + maintainer.maintain(); + + var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).get(); + var configAction = writtenChangeRequest.getHostActionPlan().get(0); + var tenantHostAction = writtenChangeRequest.getHostActionPlan().get(1); + assertEquals(State.REQUIRES_OPERATOR_ACTION, configAction.getState()); + assertEquals(State.PENDING_RETIREMENT, tenantHostAction.getState()); + assertEquals(Status.REQUIRES_OPERATOR_ACTION, writtenChangeRequest.getStatus()); + } + + @Test + public void retires_hosts_when_near_vcmr() { + var activeNode = createNode(host1, NodeType.host, Node.State.active, false); + var failedNode = createNode(host2, NodeType.host, Node.State.failed, false); + nodeRepo.putNodes(zoneId, List.of(activeNode, failedNode)); + nodeRepo.allowPatching(true).hasSpareCapacity(true); + + tester.curator().writeChangeRequest(startingChangeRequest()); + maintainer.maintain(); + + var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).orElseThrow(); + var parkedNodeAction = writtenChangeRequest.getHostActionPlan().get(0); + var failedNodeAction = writtenChangeRequest.getHostActionPlan().get(1); + assertEquals(State.RETIRING, parkedNodeAction.getState()); + assertEquals(State.NONE, failedNodeAction.getState()); + assertEquals(Status.IN_PROGRESS, writtenChangeRequest.getStatus()); + + activeNode = nodeRepo.list(zoneId, List.of(activeNode.hostname())).get(0); + assertTrue(activeNode.wantToRetire()); + + } + + @Test + public void no_spare_capacity_requires_operator_action() { + var activeNode = createNode(host1, NodeType.host, Node.State.active, false); + var failedNode = createNode(host2, NodeType.host, Node.State.failed, false); + nodeRepo.putNodes(zoneId, List.of(activeNode, failedNode)); + nodeRepo.hasSpareCapacity(false); + + tester.curator().writeChangeRequest(startingChangeRequest()); + maintainer.maintain(); + + var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).orElseThrow(); + var parkedNodeAction = writtenChangeRequest.getHostActionPlan().get(0); + var failedNodeAction = writtenChangeRequest.getHostActionPlan().get(1); + assertEquals(State.REQUIRES_OPERATOR_ACTION, parkedNodeAction.getState()); + assertEquals(State.REQUIRES_OPERATOR_ACTION, failedNodeAction.getState()); + assertEquals(Status.REQUIRES_OPERATOR_ACTION, writtenChangeRequest.getStatus()); + } + + @Test + public void updates_status_when_retiring_host_is_parked() { + var parkedNode = createNode(host1, NodeType.host, Node.State.parked, true); + nodeRepo.putNodes(zoneId, parkedNode); + nodeRepo.hasSpareCapacity(true); + + tester.curator().writeChangeRequest(inProgressChangeRequest()); + maintainer.maintain(); + + var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).orElseThrow(); + var parkedNodeAction = writtenChangeRequest.getHostActionPlan().get(0); + assertEquals(State.RETIRED, parkedNodeAction.getState()); + assertEquals(Status.IN_PROGRESS, writtenChangeRequest.getStatus()); + } + + @Test + public void pending_retirement_when_vcmr_is_far_ahead() { + var activeNode = createNode(host2, NodeType.host, Node.State.active, false); + nodeRepo.putNodes(zoneId, List.of(activeNode)); + nodeRepo.hasSpareCapacity(true); + + tester.curator().writeChangeRequest(futureChangeRequest()); + maintainer.maintain(); + + var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).get(); + var tenantHostAction = writtenChangeRequest.getHostActionPlan().get(0); + assertEquals(State.PENDING_RETIREMENT, tenantHostAction.getState()); + assertEquals(Status.PENDING_ACTION, writtenChangeRequest.getStatus()); + } + + @Test + public void recycles_nodes_if_vcmr_is_postponed() { + var parkedNode = createNode(host1, NodeType.host, Node.State.parked, false); + var retiringNode = createNode(host2, NodeType.host, Node.State.active, true); + nodeRepo.putNodes(zoneId, List.of(parkedNode, retiringNode)); + nodeRepo.allowPatching(true).hasSpareCapacity(true); + + tester.curator().writeChangeRequest(postponedChangeRequest()); + maintainer.maintain(); + + var writtenChangeRequest = tester.curator().readChangeRequest(changeRequestId).get(); + var hostAction = writtenChangeRequest.getHostActionPlan().get(0); + assertEquals(State.PENDING_RETIREMENT, hostAction.getState()); + + parkedNode = nodeRepo.list(zoneId, List.of(parkedNode.hostname())).get(0); + assertEquals(Node.State.dirty, parkedNode.state()); + assertFalse(parkedNode.wantToRetire()); + + retiringNode = nodeRepo.list(zoneId, List.of(retiringNode.hostname())).get(0); + assertEquals(Node.State.active, retiringNode.state()); + assertFalse(retiringNode.wantToRetire()); + } + + + private VespaChangeRequest canceledChangeRequest() { + return newChangeRequest(ChangeRequestSource.Status.CANCELED, State.RETIRED, State.RETIRING, ZonedDateTime.now()); + } + + private VespaChangeRequest futureChangeRequest() { + return newChangeRequest(ChangeRequestSource.Status.WAITING_FOR_APPROVAL, State.NONE, State.NONE, ZonedDateTime.now().plus(Duration.ofDays(5L))); + } + + private VespaChangeRequest startingChangeRequest() { + return newChangeRequest(ChangeRequestSource.Status.STARTED, State.PENDING_RETIREMENT, State.NONE, ZonedDateTime.now()); + } + + private VespaChangeRequest inProgressChangeRequest() { + return newChangeRequest(ChangeRequestSource.Status.STARTED, State.RETIRING, State.RETIRING, ZonedDateTime.now()); + } + + private VespaChangeRequest postponedChangeRequest() { + return newChangeRequest(ChangeRequestSource.Status.STARTED, State.RETIRED, State.RETIRING, ZonedDateTime.now().plus(Duration.ofDays(8))); + } + + + private VespaChangeRequest newChangeRequest(ChangeRequestSource.Status sourceStatus, State state1, State state2, ZonedDateTime startTime) { + var source = new ChangeRequestSource("aws", changeRequestId, "url", sourceStatus , startTime, ZonedDateTime.now()); + var actionPlan = List.of( + new HostAction(host1.value(), state1, Instant.now()), + new HostAction(host2.value(), state2, Instant.now()) + ); + return new VespaChangeRequest( + changeRequestId, + source, + List.of("switch1"), + List.of("host1", "host2"), + ChangeRequest.Approval.APPROVED, + ChangeRequest.Impact.VERY_HIGH, + VespaChangeRequest.Status.IN_PROGRESS, + actionPlan, + ZoneId.from("prod.us-east-3") + ); + } + + private Node createNode(HostName hostname, NodeType nodeType, Node.State state, boolean wantToRetire) { + return new Node.Builder() + .hostname(hostname) + .type(nodeType) + .state(state) + .wantToRetire(wantToRetire) + .build(); + } +}
\ No newline at end of file diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/metric/ConfigServerMetricsTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/metric/ConfigServerMetricsTest.java index 3d1cb3eba86..33b043bc93d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/metric/ConfigServerMetricsTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/metric/ConfigServerMetricsTest.java @@ -12,6 +12,7 @@ import org.junit.Before; import org.junit.Test; import java.util.List; +import java.util.Map; import static org.junit.Assert.assertEquals; @@ -44,15 +45,9 @@ public class ConfigServerMetricsTest { // var deploymentId = new DeploymentId(applicationId, zoneId); - var clusterMetrics1 = new ClusterMetrics("niceCluster", "container") {{ - addMetric("queriesPerSecond", 23.0); - addMetric("queryLatency", 1337.0); - }}; + var clusterMetrics1 = new ClusterMetrics("niceCluster", "container", Map.of("queriesPerSecond", 23.0, "queryLatency", 1337.0)); - var clusterMetrics2 = new ClusterMetrics("alsoNiceCluster", "container") {{ - addMetric("queriesPerSecond", 11.0); - addMetric("queryLatency", 12.0); - }}; + var clusterMetrics2 = new ClusterMetrics("alsoNiceCluster", "container", Map.of("queriesPerSecond", 11.0, "queryLatency", 12.0)); var response = List.of(clusterMetrics1, clusterMetrics2); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDbTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDbTest.java new file mode 100644 index 00000000000..90d1ecb2f20 --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/notification/NotificationsDbTest.java @@ -0,0 +1,107 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.notification; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; +import com.yahoo.config.provision.TenantName; +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.path.Path; +import com.yahoo.test.ManualClock; +import com.yahoo.vespa.hosted.controller.api.identifiers.DeploymentId; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; +import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; +import com.yahoo.vespa.hosted.controller.persistence.MockCuratorDb; +import org.junit.Before; +import org.junit.Test; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * @author freva + */ +public class NotificationsDbTest { + + private static final TenantName tenant = TenantName.from("tenant1"); + private static final List<Notification> notifications = List.of( + notification(1001, Notification.Type.DEPLOYMENT_FAILURE, NotificationSource.from(tenant), "tenant msg"), + notification(1101, Notification.Type.DEPLOYMENT_FAILURE, NotificationSource.from(TenantAndApplicationId.from(tenant.value(), "app1")), "app msg"), + notification(1201, Notification.Type.DEPLOYMENT_FAILURE, NotificationSource.from(ApplicationId.from(tenant.value(), "app2", "instance2")), "instance msg"), + notification(1301, Notification.Type.DEPLOYMENT_FAILURE, NotificationSource.from(new DeploymentId(ApplicationId.from(tenant.value(), "app2", "instance2"), ZoneId.from("prod", "us-north-2"))), "deployment msg"), + notification(1401, Notification.Type.DEPLOYMENT_FAILURE, NotificationSource.from(new DeploymentId(ApplicationId.from(tenant.value(), "app1", "instance1"), ZoneId.from("dev", "us-south-1")), ClusterSpec.Id.from("cluster1")), "cluster msg"), + notification(1501, Notification.Type.DEPLOYMENT_FAILURE, NotificationSource.from(new RunId(ApplicationId.from(tenant.value(), "app1", "instance1"), JobType.devUsEast1, 4)), "run id msg")); + + private final ManualClock clock = new ManualClock(Instant.ofEpochSecond(12345)); + private final MockCuratorDb curatorDb = new MockCuratorDb(); + private final NotificationsDb notificationsDb = new NotificationsDb(clock, curatorDb); + + @Test + public void list_test() { + assertEquals(notifications, notificationsDb.listNotifications(NotificationSource.from(tenant), false)); + assertEquals(notificationIndices(0, 1, 3), notificationsDb.listNotifications(NotificationSource.from(tenant), true)); + assertEquals(notificationIndices(2, 3), notificationsDb.listNotifications(NotificationSource.from(TenantAndApplicationId.from(tenant.value(), "app2")), false)); + assertEquals(notificationIndices(4, 5), notificationsDb.listNotifications(NotificationSource.from(ApplicationId.from(tenant.value(), "app1", "instance1")), false)); + assertEquals(notificationIndices(5), notificationsDb.listNotifications(NotificationSource.from(new RunId(ApplicationId.from(tenant.value(), "app1", "instance1"), JobType.devUsEast1, 5)), false)); + assertEquals(List.of(), notificationsDb.listNotifications(NotificationSource.from(new RunId(ApplicationId.from(tenant.value(), "app1", "instance1"), JobType.productionUsEast3, 4)), false)); + } + + @Test + public void add_test() { + Notification notification1 = notification(12345, Notification.Type.DEPLOYMENT_FAILURE, NotificationSource.from(ApplicationId.from(tenant.value(), "app2", "instance2")), "instance msg #2"); + Notification notification2 = notification(12345, Notification.Type.DEPLOYMENT_FAILURE, NotificationSource.from(ApplicationId.from(tenant.value(), "app3", "instance2")), "instance msg #3"); + + // Replace the 3rd notification + notificationsDb.setNotification(notification1.source(), notification1.type(), notification1.messages()); + + // Notification for a new app, add without replacement + notificationsDb.setNotification(notification2.source(), notification2.type(), notification2.messages()); + + List<Notification> expected = notificationIndices(0, 1, 3, 4, 5); + expected.addAll(List.of(notification1, notification2)); + assertEquals(expected, curatorDb.readNotifications(tenant)); + } + + @Test + public void remove_single_test() { + // Remove the 3rd notification + notificationsDb.removeNotification(NotificationSource.from(ApplicationId.from(tenant.value(), "app2", "instance2")), Notification.Type.DEPLOYMENT_FAILURE); + + // Removing something that doesn't exist is OK + notificationsDb.removeNotification(NotificationSource.from(ApplicationId.from(tenant.value(), "app3", "instance2")), Notification.Type.DEPLOYMENT_FAILURE); + + assertEquals(notificationIndices(0, 1, 3, 4, 5), curatorDb.readNotifications(tenant)); + } + + @Test + public void remove_multiple_test() { + // Remove the 3rd notification + notificationsDb.removeNotifications(NotificationSource.from(ApplicationId.from(tenant.value(), "app1", "instance1"))); + assertEquals(notificationIndices(0, 1, 2, 3), curatorDb.readNotifications(tenant)); + assertTrue(curatorDb.curator().exists(Path.fromString("/controller/v1/notifications/" + tenant.value()))); + + notificationsDb.removeNotifications(NotificationSource.from(tenant)); + assertEquals(List.of(), curatorDb.readNotifications(tenant)); + assertFalse(curatorDb.curator().exists(Path.fromString("/controller/v1/notifications/" + tenant.value()))); + } + + @Before + public void init() { + curatorDb.writeNotifications(tenant, notifications); + } + + private static List<Notification> notificationIndices(int... indices) { + return Arrays.stream(indices).mapToObj(notifications::get).collect(Collectors.toCollection(ArrayList::new)); + } + + private static Notification notification(long secondsSinceEpoch, Notification.Type type, NotificationSource source, String... messages) { + return new Notification(Instant.ofEpochSecond(secondsSinceEpoch), type, source, List.of(messages)); + } +} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ChangeRequestSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ChangeRequestSerializerTest.java new file mode 100644 index 00000000000..40a045c44cf --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/ChangeRequestSerializerTest.java @@ -0,0 +1,45 @@ +package com.yahoo.vespa.hosted.controller.persistence; + +import com.yahoo.config.provision.zone.ZoneId; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequestSource; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest; +import org.junit.Test; + +import java.time.Instant; +import java.time.ZonedDateTime; +import java.util.List; + +import static org.junit.Assert.*; + +/** + * @author olaa + */ +public class ChangeRequestSerializerTest { + + @Test + public void reserialization_equality() { + var source = new ChangeRequestSource("aws", "id321", "url", ChangeRequestSource.Status.STARTED, ZonedDateTime.now(), ZonedDateTime.now()); + var actionPlan = List.of( + new HostAction("host1", HostAction.State.RETIRING, Instant.now()), + new HostAction("host2", HostAction.State.RETIRED, Instant.now()) + ); + + var changeRequest = new VespaChangeRequest( + "id123", + source, + List.of("switch1"), + List.of("host1", "host2"), + ChangeRequest.Approval.APPROVED, + ChangeRequest.Impact.VERY_HIGH, + VespaChangeRequest.Status.IN_PROGRESS, + actionPlan, + ZoneId.defaultId() + ); + + var reserialized = ChangeRequestSerializer.fromSlime(ChangeRequestSerializer.toSlime(changeRequest)); + assertEquals(changeRequest, reserialized); + } + +}
\ No newline at end of file diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/NotificationsSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/NotificationsSerializerTest.java new file mode 100644 index 00000000000..f3f2d10cfd0 --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/NotificationsSerializerTest.java @@ -0,0 +1,59 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.controller.persistence; + +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.TenantName; +import com.yahoo.slime.Slime; +import com.yahoo.slime.SlimeUtils; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; +import com.yahoo.vespa.hosted.controller.application.TenantAndApplicationId; +import com.yahoo.vespa.hosted.controller.notification.Notification; +import com.yahoo.vespa.hosted.controller.notification.NotificationSource; +import org.junit.Test; + +import java.io.IOException; +import java.time.Instant; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * @author freva + */ +public class NotificationsSerializerTest { + + @Test + public void serialization_test() throws IOException { + TenantName tenantName = TenantName.from("tenant1"); + List<Notification> notifications = List.of( + new Notification(Instant.ofEpochSecond(1234), + Notification.Type.APPLICATION_PACKAGE_WARNING, + NotificationSource.from(TenantAndApplicationId.from(tenantName.value(), "app1")), + List.of("Something something deprecated...")), + new Notification(Instant.ofEpochSecond(2345), + Notification.Type.DEPLOYMENT_FAILURE, + NotificationSource.from(new RunId(ApplicationId.from(tenantName.value(), "app1", "instance1"), JobType.systemTest, 12)), + List.of("Failed to deploy: Out of capacity"))); + + Slime serialized = NotificationsSerializer.toSlime(notifications); + assertEquals("{\"notifications\":[" + + "{" + + "\"at\":1234000," + + "\"type\":\"APPLICATION_PACKAGE_WARNING\"," + + "\"messages\":[\"Something something deprecated...\"]," + + "\"application\":\"app1\"" + + "},{" + + "\"at\":2345000," + + "\"type\":\"DEPLOYMENT_FAILURE\"," + + "\"messages\":[\"Failed to deploy: Out of capacity\"]," + + "\"application\":\"app1\"," + + "\"instance\":\"instance1\"," + + "\"jobId\":\"system-test\"," + + "\"runNumber\":12" + + "}]}", new String(SlimeUtils.toJsonBytes(serialized))); + + List<Notification> deserialized = NotificationsSerializer.fromSlime(tenantName, serialized); + assertEquals(notifications, deserialized); + } +}
\ No newline at end of file diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java index b1b1c7ffe7a..0137ea7eeba 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/ApplicationApiTest.java @@ -39,6 +39,7 @@ import com.yahoo.vespa.hosted.controller.api.integration.athenz.AthenzDbMock; import com.yahoo.vespa.hosted.controller.api.integration.configserver.ConfigServerException; import com.yahoo.vespa.hosted.controller.api.integration.deployment.ApplicationVersion; import com.yahoo.vespa.hosted.controller.api.integration.deployment.JobType; +import com.yahoo.vespa.hosted.controller.api.integration.deployment.RunId; import com.yahoo.vespa.hosted.controller.api.integration.organization.Contact; import com.yahoo.vespa.hosted.controller.api.integration.organization.IssueId; import com.yahoo.vespa.hosted.controller.api.integration.organization.User; @@ -58,6 +59,8 @@ import com.yahoo.vespa.hosted.controller.deployment.DeploymentTrigger; import com.yahoo.vespa.hosted.controller.integration.ConfigServerMock; import com.yahoo.vespa.hosted.controller.integration.ZoneApiMock; import com.yahoo.vespa.hosted.controller.metric.ApplicationMetrics; +import com.yahoo.vespa.hosted.controller.notification.Notification; +import com.yahoo.vespa.hosted.controller.notification.NotificationSource; import com.yahoo.vespa.hosted.controller.restapi.ContainerTester; import com.yahoo.vespa.hosted.controller.restapi.ControllerContainerTest; import com.yahoo.vespa.hosted.controller.routing.GlobalRouting; @@ -801,6 +804,13 @@ public class ApplicationApiTest extends ControllerContainerTest { .userIdentity(USER_ID), ""); + addNotifications(TenantName.from("tenant1")); + tester.assertResponse(request("/application/v4/tenant/tenant1/notifications", GET).userIdentity(USER_ID), + new File("notifications-tenant1.json")); + tester.assertResponse(request("/application/v4/tenant/tenant1/notifications", GET) + .properties(Map.of("application", "app2")).userIdentity(USER_ID), + new File("notifications-tenant1-app2.json")); + // DELETE the application which no longer has any deployments tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1", DELETE) .userIdentity(USER_ID) @@ -1117,7 +1127,7 @@ public class ApplicationApiTest extends ControllerContainerTest { 400); ConfigServerMock configServer = tester.serviceRegistry().configServerMock(); - configServer.throwOnNextPrepare(new ConfigServerException(new URI("server-url"), "Failed to prepare application", "Invalid application package", ConfigServerException.ErrorCode.INVALID_APPLICATION_PACKAGE, null)); + configServer.throwOnNextPrepare(new ConfigServerException(ConfigServerException.ErrorCode.INVALID_APPLICATION_PACKAGE, "Failed to prepare application", "Invalid application package")); // GET non-existent application package tester.assertResponse(request("/application/v4/tenant/tenant1/application/application1/package", GET).userIdentity(HOSTED_VESPA_OPERATOR), @@ -1628,6 +1638,17 @@ public class ApplicationApiTest extends ControllerContainerTest { )); } + private void addNotifications(TenantName tenantName) { + tester.controller().notificationsDb().setNotification( + NotificationSource.from(TenantAndApplicationId.from(tenantName.value(), "app1")), + Notification.Type.APPLICATION_PACKAGE_WARNING, + "Something something deprecated..."); + tester.controller().notificationsDb().setNotification( + NotificationSource.from(new RunId(ApplicationId.from(tenantName.value(), "app2", "instance1"), JobType.systemTest, 12)), + Notification.Type.DEPLOYMENT_FAILURE, + "Failed to deploy: Out of capacity"); + } + private void assertGlobalRouting(DeploymentId deployment, GlobalRouting.Status status, GlobalRouting.Agent agent) { var changedAt = tester.controller().clock().instant(); var westPolicies = tester.controller().routing().policies().get(deployment); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java index f574d6bc3f1..1be7f16e85f 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelperTest.java @@ -81,7 +81,7 @@ public class JobControllerApiHandlerHelperTest { tester.triggerJobs(); // us-east-3 eats the deployment failure and fails before deployment, while us-west-1 fails after. - tester.configServer().throwOnNextPrepare(new ConfigServerException(URI.create("url"), "Failed to deploy application", "ERROR!", INVALID_APPLICATION_PACKAGE, null)); + tester.configServer().throwOnNextPrepare(new ConfigServerException(INVALID_APPLICATION_PACKAGE, "ERROR!", "Failed to deploy application")); tester.runner().run(); assertEquals(deploymentFailed, tester.jobs().last(app.instanceId(), productionUsEast3).get().status()); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-clusters.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-clusters.json index 499a425087d..9df83cb2089 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-clusters.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/application-clusters.json @@ -90,7 +90,8 @@ }, "cost": "(ignore)" }, - "at": 1234 + "at": 1234, + "completion": 2234 } ], "autoscalingStatus": "the autoscaling status", diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/notifications-tenant1-app2.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/notifications-tenant1-app2.json new file mode 100644 index 00000000000..ab8262e26bd --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/notifications-tenant1-app2.json @@ -0,0 +1,16 @@ +{ + "notifications": [ + { + "at": "(ignore)", + "level": "error", + "type": "DEPLOYMENT_FAILURE", + "messages": [ + "Failed to deploy: Out of capacity" + ], + "application": "app2", + "instance": "instance1", + "jobName": "system-test", + "runNumber": 12 + } + ] +} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/notifications-tenant1.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/notifications-tenant1.json new file mode 100644 index 00000000000..2b2c03bb75a --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/notifications-tenant1.json @@ -0,0 +1,25 @@ +{ + "notifications": [ + { + "at": "(ignore)", + "level": "warning", + "type": "APPLICATION_PACKAGE_WARNING", + "messages": [ + "Something something deprecated..." + ], + "application": "app1" + }, + { + "at": "(ignore)", + "level": "error", + "type": "DEPLOYMENT_FAILURE", + "messages": [ + "Failed to deploy: Out of capacity" + ], + "application": "app2", + "instance": "instance1", + "jobName": "system-test", + "runNumber": 12 + } + ] +} diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/us-east-3-log-without-first.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/us-east-3-log-without-first.json index 6c9315ca64b..588f8839ab7 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/us-east-3-log-without-first.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/us-east-3-log-without-first.json @@ -5,7 +5,7 @@ "deployReal": [ { "at": 1000, - "type": "info", + "type": "warning", "message": "Failed to deploy application: ERROR!" } ] diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/ChangeManagementApiHandlerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/ChangeManagementApiHandlerTest.java index cd815a2064b..c4412531f80 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/ChangeManagementApiHandlerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/ChangeManagementApiHandlerTest.java @@ -12,6 +12,10 @@ import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeOwne import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeRepositoryNode; import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeState; import com.yahoo.vespa.hosted.controller.api.integration.noderepository.NodeType; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequest; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.ChangeRequestSource; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.HostAction; +import com.yahoo.vespa.hosted.controller.api.integration.vcmr.VespaChangeRequest; import com.yahoo.vespa.hosted.controller.restapi.ContainerTester; import com.yahoo.vespa.hosted.controller.restapi.ControllerContainerTest; import org.intellij.lang.annotations.Language; @@ -19,6 +23,8 @@ import org.junit.Before; import org.junit.Test; import java.io.File; +import java.time.Instant; +import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.List; @@ -35,11 +41,14 @@ public class ChangeManagementApiHandlerTest extends ControllerContainerTest { addUserToHostedOperatorRole(operator); tester.serviceRegistry().configServer().nodeRepository().addNodes(ZoneId.from("prod.us-east-3"), createNodes()); tester.serviceRegistry().configServer().nodeRepository().putNodes(ZoneId.from("prod.us-east-3"), createNode()); + tester.controller().curator().writeChangeRequest(createChangeRequest()); + } @Test public void test_api() { assertFile(new Request("http://localhost:8080/changemanagement/v1/assessment", "{\"zone\":\"prod.us-east-3\", \"hosts\": [\"host1\"]}", Request.Method.POST), "initial.json"); + assertFile(new Request("http://localhost:8080/changemanagement/v1/vcmr"), "vcmrs.json"); } private void assertResponse(Request request, @Language("JSON") String body, int statusCode) { @@ -58,6 +67,28 @@ public class ChangeManagementApiHandlerTest extends ControllerContainerTest { .build(); } + private VespaChangeRequest createChangeRequest() { + var instant = Instant.ofEpochMilli(9001); + var date = ZonedDateTime.ofInstant(instant, java.time.ZoneId.of("UTC")); + var source = new ChangeRequestSource("aws", "id321", "url", ChangeRequestSource.Status.STARTED, date, date); + var actionPlan = List.of( + new HostAction("host1", HostAction.State.RETIRING, instant), + new HostAction("host2", HostAction.State.RETIRED, instant) + ); + + return new VespaChangeRequest( + "id123", + source, + List.of("switch1"), + List.of("host1", "host2"), + ChangeRequest.Approval.APPROVED, + ChangeRequest.Impact.VERY_HIGH, + VespaChangeRequest.Status.IN_PROGRESS, + actionPlan, + ZoneId.defaultId() + ); + } + private List<NodeRepositoryNode> createNodes() { List<NodeRepositoryNode> nodes = new ArrayList<>(); nodes.add(createNode("node1", "host1", "default", 0 )); diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/responses/vcmrs.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/responses/vcmrs.json new file mode 100644 index 00000000000..54d4ea8bcbd --- /dev/null +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/changemanagement/responses/vcmrs.json @@ -0,0 +1,40 @@ +{ + "vcmrs": [ + { + "id": "id123", + "status": "IN_PROGRESS", + "impact": "VERY_HIGH", + "approval": "APPROVED", + "zoneId": "prod.default", + "source": { + "system": "aws", + "id": "id321", + "url": "url", + "plannedStartTime": "1970-01-01T00:00:09.001Z[UTC]", + "plannedEndTime": "1970-01-01T00:00:09.001Z[UTC]", + "status": "STARTED" + }, + "actionPlan": { + "hosts": [ + { + "hostname": "host1", + "state": "RETIRING", + "lastUpdated": "1970-01-01T00:00:09.001Z" + }, + { + "hostname": "host2", + "state": "RETIRED", + "lastUpdated": "1970-01-01T00:00:09.001Z" + } + ] + }, + "impactedHosts": [ + "host1", + "host2" + ], + "impactedSwitches": [ + "switch1" + ] + } + ] +}
\ No newline at end of file diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json index 17c93c070fb..3cf79977fb8 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/controller/responses/maintenance.json @@ -43,7 +43,7 @@ "name": "EndpointCertificateMaintainer" }, { - "name": "HostSwitchUpdater" + "name": "HostInfoUpdater" }, { "name": "JobRunner" @@ -91,6 +91,9 @@ "name": "Upgrader" }, { + "name": "VCMRMaintainer" + }, + { "name": "VersionStatusUpdater" } ], diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/routing/RoutingPoliciesTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/routing/RoutingPoliciesTest.java index e96af475216..d03dec06753 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/routing/RoutingPoliciesTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/routing/RoutingPoliciesTest.java @@ -416,7 +416,7 @@ public class RoutingPoliciesTest { var loadBalancer = new LoadBalancer("LB-0-Z-" + zone1.value(), context.instanceId(), ClusterSpec.Id.from("c0"), - newHostname, + Optional.of(newHostname), LoadBalancer.State.active, Optional.of("dns-zone-1")); tester.controllerTester().configServer().putLoadBalancers(zone1, List.of(loadBalancer)); @@ -705,7 +705,7 @@ public class RoutingPoliciesTest { new LoadBalancer("LB-" + i + "-Z-" + zone.value(), application, ClusterSpec.Id.from("c" + i), - lbHostname, + Optional.of(lbHostname), LoadBalancer.State.active, Optional.of("dns-zone-1"))); } |