diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-09-18 17:00:02 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-09-18 17:00:02 +0200 |
commit | 8ef29392f487ec917c597635a27b82f9a7d88342 (patch) | |
tree | 80c5cbbf073bd7f2776dba43cdbde617bc0196e9 | |
parent | 0327c08270ee4d538eaba6f0b5a94cf3edd72eca (diff) |
30s down-moratorium before allowing suspension
22 files changed, 389 insertions, 91 deletions
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceId.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceId.java index 6bdc7a949e7..801213dcf40 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceId.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceId.java @@ -12,6 +12,10 @@ import java.util.Objects; public class ApplicationInstanceId { public static final ApplicationInstanceId CONFIG_SERVER = new ApplicationInstanceId("zone-config-servers"); public static final ApplicationInstanceId CONTROLLER = new ApplicationInstanceId("controller"); + // Unfortunately, for config server host the ApplicationInstanceId is: configserver-host:prod:cd-us-central-1:default + public boolean isConfigServerHost() { return id.startsWith("configserver-host:"); } + public static final ApplicationInstanceId CONTROLLER_HOST = new ApplicationInstanceId("controller-host:prod:default:default"); + public boolean isTenantHost() { return id.startsWith("tenant-host:"); } private final String id; diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceReference.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceReference.java index e761e14caa4..f0b2c46d460 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceReference.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceReference.java @@ -10,7 +10,7 @@ import java.util.Objects; * @author bjorncs */ // TODO: Remove this and use ApplicationId instead (if you need it for the JSON stuff move it to that layer and don't let it leak) -public class ApplicationInstanceReference { +public class ApplicationInstanceReference implements Comparable<ApplicationInstanceReference> { private final TenantId tenantId; private final ApplicationInstanceId applicationInstanceId; @@ -43,6 +43,11 @@ public class ApplicationInstanceReference { } @Override + public int compareTo(ApplicationInstanceReference o) { + return this.asString().compareTo(o.asString()); + } + + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java index 96be7090114..8af47a796f6 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java @@ -12,6 +12,9 @@ public class ClusterId { public static final ClusterId CONFIG_SERVER = new ClusterId("zone-config-servers"); public static final ClusterId CONTROLLER = new ClusterId("controller"); + public static final ClusterId CONFIG_SERVER_HOST = new ClusterId("configserver-host"); + public static final ClusterId CONTROLLER_HOST = new ClusterId("configserver-host"); + public static final ClusterId TENANT_HOST = new ClusterId("tenant-host"); private final String id; diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java index 1faefcb7c61..43f161cfec9 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java @@ -7,6 +7,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.function.Predicate; /** * Represents a collection of service instances that together make up a service with a single cluster id. @@ -51,24 +52,53 @@ public class ServiceCluster { return applicationInstance.get(); } - public boolean isConfigServerClusterLike() { - // config server - if (Objects.equals(applicationInstance.map(ApplicationInstance::tenantId), Optional.of(TenantId.HOSTED_VESPA)) && - Objects.equals(applicationInstance.map(ApplicationInstance::applicationInstanceId), Optional.of(ApplicationInstanceId.CONFIG_SERVER)) && - Objects.equals(clusterId, ClusterId.CONFIG_SERVER) && - Objects.equals(serviceType, ServiceType.CONFIG_SERVER)) { - return true; - } + public boolean isConfigServerLike() { + return isConfigServer() || isController(); + } - // controller - if (Objects.equals(applicationInstance.map(ApplicationInstance::tenantId), Optional.of(TenantId.HOSTED_VESPA)) && - Objects.equals(applicationInstance.map(ApplicationInstance::applicationInstanceId), Optional.of(ApplicationInstanceId.CONTROLLER)) && + public boolean isController() { + return isHostedVespaApplicationWithId(ApplicationInstanceId.CONTROLLER) && Objects.equals(clusterId, ClusterId.CONTROLLER) && - Objects.equals(serviceType, ServiceType.CONTROLLER)) { - return true; - } + Objects.equals(serviceType, ServiceType.CONTROLLER); + } + + /** Is a config server (and not controller!) */ + public boolean isConfigServer() { + return isHostedVespaApplicationWithId(ApplicationInstanceId.CONFIG_SERVER) && + Objects.equals(clusterId, ClusterId.CONFIG_SERVER) && + Objects.equals(serviceType, ServiceType.CONFIG_SERVER); + } + + public boolean isConfigServerHost() { + return isHostedVespaApplicationWithPredicate(ApplicationInstanceId::isConfigServerHost) && + Objects.equals(clusterId, ClusterId.CONFIG_SERVER_HOST) && + Objects.equals(serviceType, ServiceType.HOST_ADMIN); + } + + public boolean isControllerHost() { + return isHostedVespaApplicationWithId(ApplicationInstanceId.CONTROLLER_HOST) && + Objects.equals(clusterId, ClusterId.CONTROLLER_HOST) && + Objects.equals(serviceType, ServiceType.HOST_ADMIN); + } + + public boolean isTenantHost() { + return isHostedVespaApplicationWithPredicate(ApplicationInstanceId::isTenantHost) && + Objects.equals(clusterId, ClusterId.TENANT_HOST) && + Objects.equals(serviceType, ServiceType.HOST_ADMIN); + } + + private boolean isHostedVespaApplicationWithId(ApplicationInstanceId id) { + return isHostedVespaTenant() && + applicationInstance.map(app -> Objects.equals(app.applicationInstanceId(), id)).orElse(false); + } + + private boolean isHostedVespaApplicationWithPredicate(Predicate<ApplicationInstanceId> predicate) { + return isHostedVespaTenant() && + applicationInstance.map(app -> predicate.test(app.applicationInstanceId())).orElse(false); + } - return false; + private boolean isHostedVespaTenant() { + return applicationInstance.map(a -> Objects.equals(a.tenantId(), TenantId.HOSTED_VESPA)).orElse(false); } @Override diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceInstance.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceInstance.java index b4fce878b0d..d75d3abd5da 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceInstance.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceInstance.java @@ -66,6 +66,27 @@ public class ServiceInstance { '}'; } + /** + * Get a name that can be used in e.g. config server logs that makes it easy to understand which + * service instance this is. + */ + public String descriptiveName() { + if (getServiceCluster().isController() || getServiceCluster().isConfigServer()) { + return getHostnamePrefix(); + } else if (getServiceCluster().isControllerHost() || getServiceCluster().isConfigServerHost()) { + return "host-admin on " + getHostnamePrefix(); + } else if (getServiceCluster().isTenantHost()) { + return "host-admin on " + hostName.s(); + } else { + return configId.s(); + } + } + + private String getHostnamePrefix() { + int dotIndex = hostName.s().indexOf('.'); + return dotIndex == -1 ? hostName().s() : hostName.s().substring(0, dotIndex); + } + @Override public boolean equals(Object o) { if (this == o) return true; diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java index baa98790d28..333edd4b3f9 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java @@ -29,6 +29,7 @@ import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy; import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy; import com.yahoo.vespa.orchestrator.policy.Policy; +import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; import com.yahoo.vespa.orchestrator.status.ApplicationLock; import com.yahoo.vespa.orchestrator.status.HostInfo; @@ -76,13 +77,13 @@ public class OrchestratorImpl implements Orchestrator { { this(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clusterControllerClientFactory, - new ApplicationApiFactory(configServerConfig.zookeeperserver().size())), + new ApplicationApiFactory(configServerConfig.zookeeperserver().size(), Clock.systemUTC())), clusterControllerClientFactory, statusService, serviceMonitor, orchestratorConfig.serviceMonitorConvergenceLatencySeconds(), Clock.systemUTC(), - new ApplicationApiFactory(configServerConfig.zookeeperserver().size()), + new ApplicationApiFactory(configServerConfig.zookeeperserver().size(), Clock.systemUTC()), flagSource); } @@ -227,6 +228,7 @@ public class OrchestratorImpl implements Orchestrator { void suspendGroup(OrchestratorContext context, NodeGroup nodeGroup) throws HostStateChangeDeniedException { ApplicationInstanceReference applicationReference = nodeGroup.getApplicationReference(); + final SuspensionReasons suspensionReasons; try (ApplicationLock lock = statusService.lockApplication(context, applicationReference)) { ApplicationInstanceStatus appStatus = lock.getApplicationInstanceStatus(); if (appStatus == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) { @@ -235,8 +237,10 @@ public class OrchestratorImpl implements Orchestrator { ApplicationApi applicationApi = applicationApiFactory.create( nodeGroup, lock, clusterControllerClientFactory); - policy.grantSuspensionRequest(context.createSubcontextWithinLock(), applicationApi); + suspensionReasons = policy.grantSuspensionRequest(context.createSubcontextWithinLock(), applicationApi); } + + suspensionReasons.makeLogMessage().ifPresent(log::info); } @Override diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiFactory.java index fb52eed2048..d913a1791ba 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiFactory.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiFactory.java @@ -4,21 +4,26 @@ package com.yahoo.vespa.orchestrator.model; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; import com.yahoo.vespa.orchestrator.status.ApplicationLock; +import java.time.Clock; + /** * @author mpolden */ public class ApplicationApiFactory { private final int numberOfConfigServers; + private final Clock clock; - public ApplicationApiFactory(int numberOfConfigServers) { + public ApplicationApiFactory(int numberOfConfigServers, Clock clock) { this.numberOfConfigServers = numberOfConfigServers; + this.clock = clock; } public ApplicationApi create(NodeGroup nodeGroup, ApplicationLock lock, ClusterControllerClientFactory clusterControllerClientFactory) { - return new ApplicationApiImpl(nodeGroup, lock, clusterControllerClientFactory, numberOfConfigServers); + return new ApplicationApiImpl(nodeGroup, lock, clusterControllerClientFactory, + numberOfConfigServers, clock); } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java index ccc89fa9191..7ee1395b7e7 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java @@ -14,6 +14,7 @@ import com.yahoo.vespa.orchestrator.status.HostInfos; import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.orchestrator.status.ApplicationLock; +import java.time.Clock; import java.util.Collection; import java.util.Comparator; import java.util.HashSet; @@ -33,16 +34,18 @@ public class ApplicationApiImpl implements ApplicationApi { private final ApplicationInstance applicationInstance; private final NodeGroup nodeGroup; private final ApplicationLock lock; + private final Clock clock; private final List<ClusterApi> clusterInOrder; private final HostInfos hostInfos; public ApplicationApiImpl(NodeGroup nodeGroup, ApplicationLock lock, ClusterControllerClientFactory clusterControllerClientFactory, - int numberOfConfigServers) { + int numberOfConfigServers, Clock clock) { this.applicationInstance = nodeGroup.getApplication(); this.nodeGroup = nodeGroup; this.lock = lock; + this.clock = clock; Collection<HostName> hosts = getHostsUsedByApplicationInstance(applicationInstance); this.hostInfos = lock.getHostInfos(); this.clusterInOrder = makeClustersInOrder(nodeGroup, hostInfos, clusterControllerClientFactory, numberOfConfigServers); @@ -122,7 +125,8 @@ public class ApplicationApiImpl implements ApplicationApi { nodeGroup, hostInfos, clusterControllerClientFactory, - numberOfConfigServers)) + numberOfConfigServers, + clock)) .sorted(ApplicationApiImpl::compareClusters) .collect(Collectors.toList()); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java index 65c45c8df76..2e7a63ddb2f 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.orchestrator.model; import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import java.util.Optional; @@ -17,7 +18,8 @@ public interface ClusterApi { ServiceType serviceType(); boolean isStorageCluster(); - boolean noServicesInGroupIsUp(); + /** Returns the reasons no services are up in the implied group, or empty if some services are up. */ + Optional<SuspensionReasons> reasonsForNoServicesInGroupIsUp(); boolean noServicesOutsideGroupIsDown(); int percentageOfServicesDown(); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java index ae3136543a7..7417318c572 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java @@ -8,9 +8,13 @@ import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; +import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import com.yahoo.vespa.orchestrator.status.HostInfos; import com.yahoo.vespa.orchestrator.status.HostStatus; +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; import java.util.Collections; import java.util.HashSet; import java.util.Map; @@ -24,12 +28,14 @@ import java.util.stream.Stream; * @author hakonhall */ class ClusterApiImpl implements ClusterApi { + static final Duration downMoratorium = Duration.ofSeconds(30); private final ApplicationApi applicationApi; private final ServiceCluster serviceCluster; private final NodeGroup nodeGroup; private final HostInfos hostInfos; private final ClusterControllerClientFactory clusterControllerClientFactory; + private final Clock clock; private final Set<ServiceInstance> servicesInGroup; private final Set<ServiceInstance> servicesDownInGroup; private final Set<ServiceInstance> servicesNotInGroup; @@ -53,12 +59,14 @@ class ClusterApiImpl implements ClusterApi { NodeGroup nodeGroup, HostInfos hostInfos, ClusterControllerClientFactory clusterControllerClientFactory, - int numberOfConfigServers) { + int numberOfConfigServers, + Clock clock) { this.applicationApi = applicationApi; this.serviceCluster = serviceCluster; this.nodeGroup = nodeGroup; this.hostInfos = hostInfos; this.clusterControllerClientFactory = clusterControllerClientFactory; + this.clock = clock; Map<Boolean, Set<ServiceInstance>> serviceInstancesByLocality = serviceCluster.serviceInstances().stream() @@ -73,7 +81,7 @@ class ClusterApiImpl implements ClusterApi { servicesDownAndNotInGroup = servicesNotInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet()); int serviceInstances = serviceCluster.serviceInstances().size(); - if (serviceCluster.isConfigServerClusterLike() && serviceInstances < numberOfConfigServers) { + if (serviceCluster.isConfigServerLike() && serviceInstances < numberOfConfigServers) { missingServices = numberOfConfigServers - serviceInstances; descriptionOfMissingServices = missingServices + " missing config server" + (missingServices > 1 ? "s" : ""); } else { @@ -108,8 +116,36 @@ class ClusterApiImpl implements ClusterApi { } @Override - public boolean noServicesInGroupIsUp() { - return servicesDownInGroup.size() == servicesInGroup.size(); + public Optional<SuspensionReasons> reasonsForNoServicesInGroupIsUp() { + SuspensionReasons reasons = new SuspensionReasons(); + + for (ServiceInstance service : servicesInGroup) { + if (hostStatus(service.hostName()).isSuspended()) { + reasons.mergeWith(SuspensionReasons.nothingNoteworthy()); + continue; + } + + if (service.serviceStatus() == ServiceStatus.DOWN) { + Optional<Instant> since = service.serviceStatusInfo().since(); + if (since.isEmpty()) { + reasons.mergeWith(SuspensionReasons.isDown(service)); + continue; + } + + // Make sure services truly are down for some period of time before we allow suspension. + // On the other hand, a service coming down and up repeatedly should probably + // also be allowed... difficult without keeping track of history in a better way. + final Duration downDuration = Duration.between(since.get(), clock.instant()); + if (downDuration.compareTo(downMoratorium) > 0) { + reasons.mergeWith(SuspensionReasons.downSince(service, since.get(), downDuration)); + continue; + } + } + + return Optional.empty(); + } + + return Optional.of(reasons); } int missingServices() { return missingServices; } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java index 7c2daf0b597..a0738b725ec 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java @@ -8,8 +8,10 @@ public interface ClusterPolicy { * There's an implicit group of nodes known to clusterApi. This method answers whether * it would be fine, just looking at this cluster (and disregarding Cluster Controller/storage * which is handled separately), to allow all services on all the nodes in the group to go down. + * + * @return notable reasons for why this group is fine with going down. */ - void verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException; + SuspensionReasons verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException; /** * There's an implicit group of nodes known to clusterApi. This method answers whether diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java index ccb0bb57186..4e11961cb1b 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java @@ -4,23 +4,26 @@ package com.yahoo.vespa.orchestrator.policy; import com.yahoo.vespa.orchestrator.model.ClusterApi; import com.yahoo.vespa.orchestrator.model.VespaModelUtil; +import java.util.Optional; + import static com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT; public class HostedVespaClusterPolicy implements ClusterPolicy { @Override - public void verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { + public SuspensionReasons verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { if (clusterApi.noServicesOutsideGroupIsDown()) { - return; - } - - if (clusterApi.noServicesInGroupIsUp()) { - return; + return SuspensionReasons.nothingNoteworthy(); } int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage(); if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) { - return; + return SuspensionReasons.nothingNoteworthy(); + } + + Optional<SuspensionReasons> suspensionReasons = clusterApi.reasonsForNoServicesInGroupIsUp(); + if (suspensionReasons.isPresent()) { + return suspensionReasons.get(); } throw new HostStateChangeDeniedException( @@ -34,7 +37,8 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { } @Override - public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { + public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) + throws HostStateChangeDeniedException { // This policy is similar to verifyGroupGoingDownIsFine, except that services being down in the group // is no excuse to allow suspension (like it is for verifyGroupGoingDownIsFine), since if we grant // suspension in this case they will permanently be down/removed. diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java index 8b74d8a40ef..f9b914e391e 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java @@ -39,11 +39,13 @@ public class HostedVespaPolicy implements Policy { } @Override - public void grantSuspensionRequest(OrchestratorContext context, ApplicationApi application) + public SuspensionReasons grantSuspensionRequest(OrchestratorContext context, ApplicationApi application) throws HostStateChangeDeniedException { + var suspensionReasons = new SuspensionReasons(); + // Apply per-cluster policy for (ClusterApi cluster : application.getClusters()) { - clusterPolicy.verifyGroupGoingDownIsFine(cluster); + suspensionReasons.mergeWith(clusterPolicy.verifyGroupGoingDownIsFine(cluster)); } // Ask Cluster Controller to set UP storage nodes in maintenance. @@ -56,6 +58,8 @@ public class HostedVespaPolicy implements Policy { for (HostName hostName : application.getNodesInGroupWithStatus(HostStatus.NO_REMARKS)) { application.setHostState(context, hostName, HostStatus.ALLOWED_TO_BE_DOWN); } + + return suspensionReasons; } @Override diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java index c410cda23a8..86538450782 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java @@ -15,7 +15,7 @@ public interface Policy { /** * Decide whether to grant a request for temporarily suspending the services on all hosts in the group. */ - void grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException; + SuspensionReasons grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException; void releaseSuspensionGrant(OrchestratorContext context, ApplicationApi application) throws HostStateChangeDeniedException; diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java new file mode 100644 index 00000000000..c043396497b --- /dev/null +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java @@ -0,0 +1,83 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.orchestrator.policy; + +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceInstance; + +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +/** + * Information worth logging when/if suspending a host. + * + * @author hakon + */ +public class SuspensionReasons { + private final Map<HostName, List<String>> reasons = new HashMap<>(); + + public static SuspensionReasons nothingNoteworthy() { return new SuspensionReasons(); } + public static SuspensionReasons isDown(ServiceInstance service) { + return new SuspensionReasons().addReason( + service.hostName(), + service.descriptiveName() + " is down"); + } + + public static SuspensionReasons downSince(ServiceInstance service, Instant instant, Duration downDuration) { + return new SuspensionReasons().addReason( + service.hostName(), + service.descriptiveName() + " has been down since " + + // Round to whole second + Instant.ofEpochSecond(instant.getEpochSecond()).toString() + + " (" + downDuration.getSeconds() + " seconds)"); + } + + public SuspensionReasons() {} + + /** An ordered list of all messages, typically useful for testing. */ + public List<String> getMessagesInOrder() { + return reasons.values().stream().flatMap(Collection::stream).sorted().collect(Collectors.toList()); + } + + public SuspensionReasons mergeWith(SuspensionReasons that) { + for (var entry : that.reasons.entrySet()) { + for (var reason : entry.getValue()) { + addReason(entry.getKey(), reason); + } + } + + return this; + } + + /** + * Makes a log message, if there is anything worth logging about the decision to allow suspension, + * that can be directly fed to {@link java.util.logging.Logger#info(String) Logger.info(String)}. + */ + public Optional<String> makeLogMessage() { + if (reasons.isEmpty()) { + return Optional.empty(); + } + + return Optional.of(reasons.entrySet().stream() + .map(entry -> entry.getKey().s() + " suspended because " + String.join(", ", entry.getValue())) + .sorted() + .collect(Collectors.joining("; "))); + } + + /** Package-private for testing. */ + SuspensionReasons addReason(HostName hostname, String message) { + reasons.computeIfAbsent(hostname, h -> new ArrayList<>()).add(message); + return this; + } + + @Override + public String toString() { + return makeLogMessage().orElse(""); + } +} diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java index b32a6aafa56..3580b305c2a 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java @@ -26,6 +26,7 @@ import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy; import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy; +import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import com.yahoo.vespa.orchestrator.status.ApplicationLock; import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.orchestrator.status.StatusService; @@ -73,7 +74,8 @@ import static org.mockito.internal.verification.VerificationModeFactory.atLeastO */ public class OrchestratorImplTest { - private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3); + private final ManualClock clock = new ManualClock(); + private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3, clock); private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); private final MockCurator curator = new MockCurator(); private ZkStatusService statusService = new ZkStatusService( @@ -344,6 +346,7 @@ public class OrchestratorImplTest { var applicationApiFactory = mock(ApplicationApiFactory.class); var lock = mock(ApplicationLock.class); + when(policy.grantSuspensionRequest(any(), any())).thenReturn(SuspensionReasons.nothingNoteworthy()); when(serviceMonitor.getApplication(any(HostName.class))).thenReturn(Optional.of(applicationInstance)); when(applicationInstance.reference()).thenReturn(applicationInstanceReference); when(zookeeperStatusService.lockApplication(any(), any())).thenReturn(lock); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java index a5cb5cfa630..14d35902738 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator.model; +import com.yahoo.test.ManualClock; import com.yahoo.vespa.applicationmodel.ApplicationInstance; import com.yahoo.vespa.applicationmodel.ApplicationInstanceId; import com.yahoo.vespa.applicationmodel.ClusterId; @@ -8,15 +9,19 @@ import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceCluster; import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.applicationmodel.ServiceStatus; +import com.yahoo.vespa.applicationmodel.ServiceStatusInfo; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.orchestrator.OrchestratorUtil; import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy; +import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import com.yahoo.vespa.orchestrator.status.HostInfos; import com.yahoo.vespa.orchestrator.status.HostStatus; import org.junit.Test; +import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -43,6 +48,7 @@ public class ClusterApiImplTest { private final ApplicationApi applicationApi = mock(ApplicationApi.class); private final ModelTestUtils modelUtils = new ModelTestUtils(); + private final ManualClock clock = new ManualClock(Instant.ofEpochSecond(1600436659)); @Test public void testServicesDownAndNotInGroup() { @@ -76,7 +82,7 @@ public class ClusterApiImplTest { serviceCluster, new NodeGroup(modelUtils.createApplicationInstance(new ArrayList<>()), hostName5), modelUtils.getHostInfos(), - modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS); + modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS, clock); assertEquals("{ clusterId=cluster, serviceType=service-type }", clusterApi.clusterInfo()); assertFalse(clusterApi.isStorageCluster()); @@ -139,16 +145,30 @@ public class ClusterApiImplTest { HostName hostName3 = new HostName("host3"); HostName hostName4 = new HostName("host4"); HostName hostName5 = new HostName("host5"); + HostName hostName6 = new HostName("host6"); + + ServiceInstance service2 = modelUtils.createServiceInstance("service-2", hostName2, ServiceStatus.DOWN); + + // Within down moratorium + Instant downSince5 = clock.instant().minus(ClusterApiImpl.downMoratorium).plus(Duration.ofSeconds(5)); + ServiceInstance service5 = modelUtils.createServiceInstance("service-5", hostName5, + new ServiceStatusInfo(ServiceStatus.DOWN, downSince5, downSince5, Optional.empty(), Optional.empty())); + + // After down moratorium + Instant downSince6 = clock.instant().minus(ClusterApiImpl.downMoratorium).minus(Duration.ofSeconds(5)); + ServiceInstance service6 = modelUtils.createServiceInstance("service-6", hostName6, + new ServiceStatusInfo(ServiceStatus.DOWN, downSince6, downSince6, Optional.empty(), Optional.empty())); ServiceCluster serviceCluster = modelUtils.createServiceCluster( "cluster", new ServiceType("service-type"), Arrays.asList( modelUtils.createServiceInstance("service-1", hostName1, ServiceStatus.UP), - modelUtils.createServiceInstance("service-2", hostName2, ServiceStatus.DOWN), + service2, modelUtils.createServiceInstance("service-3", hostName3, ServiceStatus.UP), modelUtils.createServiceInstance("service-4", hostName4, ServiceStatus.DOWN), - modelUtils.createServiceInstance("service-5", hostName5, ServiceStatus.UP) + service5, + service6 ) ); modelUtils.createApplicationInstance(Collections.singletonList(serviceCluster)); @@ -158,21 +178,33 @@ public class ClusterApiImplTest { modelUtils.createNode(hostName3, HostStatus.ALLOWED_TO_BE_DOWN); modelUtils.createNode(hostName4, HostStatus.ALLOWED_TO_BE_DOWN); modelUtils.createNode(hostName5, HostStatus.NO_REMARKS); - - verifyNoServices(serviceCluster, false, false, hostName1); - verifyNoServices(serviceCluster, true, false, hostName2); - verifyNoServices(serviceCluster, true, false, hostName3); - verifyNoServices(serviceCluster, true, false, hostName4); - verifyNoServices(serviceCluster, false, false, hostName5); - - verifyNoServices(serviceCluster, false, false, hostName1, hostName2); - verifyNoServices(serviceCluster, true, false, hostName2, hostName3); - verifyNoServices(serviceCluster, true, true, hostName2, hostName3, hostName4); - verifyNoServices(serviceCluster, false, true, hostName1, hostName2, hostName3, hostName4); + modelUtils.createNode(hostName6, HostStatus.NO_REMARKS); + + var reason2 = SuspensionReasons.isDown(service2); + var reason6 = SuspensionReasons.downSince(service6, downSince6, Duration.ofSeconds(35)); + var reasons2and6 = new SuspensionReasons().mergeWith(reason2).mergeWith(reason6); + + verifyNoServices(serviceCluster, Optional.empty(), false, hostName1); + verifyNoServices(serviceCluster, Optional.of(reason2), false, hostName2); + verifyNoServices(serviceCluster, Optional.of(SuspensionReasons.nothingNoteworthy()), false, hostName3); + verifyNoServices(serviceCluster, Optional.of(SuspensionReasons.nothingNoteworthy()), false, hostName4); + verifyNoServices(serviceCluster, Optional.empty(), false, hostName5); + verifyNoServices(serviceCluster, Optional.of(reason6), false, hostName6); + + verifyNoServices(serviceCluster, Optional.empty(), false, hostName1, hostName2); + verifyNoServices(serviceCluster, Optional.of(reasons2and6), false, hostName2, hostName3, hostName6); + verifyNoServices(serviceCluster, Optional.of(reasons2and6), false, + hostName2, hostName3, hostName4, hostName6); + verifyNoServices(serviceCluster, Optional.empty(), true, + hostName2, hostName3, hostName4, hostName5, hostName6); + verifyNoServices(serviceCluster, Optional.empty(), false, + hostName1, hostName2, hostName3, hostName4, hostName6); + verifyNoServices(serviceCluster, Optional.empty(), true, + hostName1, hostName2, hostName3, hostName4, hostName5, hostName6); } private void verifyNoServices(ServiceCluster serviceCluster, - boolean expectedNoServicesInGroupIsUp, + Optional<SuspensionReasons> expectedNoServicesInGroupIsUp, boolean expectedNoServicesOutsideGroupIsDown, HostName... groupNodes) { ClusterApiImpl clusterApi = new ClusterApiImpl( @@ -180,9 +212,10 @@ public class ClusterApiImplTest { serviceCluster, new NodeGroup(modelUtils.createApplicationInstance(new ArrayList<>()), groupNodes), modelUtils.getHostInfos(), - modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS); + modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS, clock); - assertEquals(expectedNoServicesInGroupIsUp, clusterApi.noServicesInGroupIsUp()); + assertEquals(expectedNoServicesInGroupIsUp.map(SuspensionReasons::getMessagesInOrder), + clusterApi.reasonsForNoServicesInGroupIsUp().map(SuspensionReasons::getMessagesInOrder)); assertEquals(expectedNoServicesOutsideGroupIsDown, clusterApi.noServicesOutsideGroupIsDown()); } @@ -210,7 +243,7 @@ public class ClusterApiImplTest { serviceCluster, new NodeGroup(applicationInstance, hostName1, hostName3), new HostInfos(), - modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS); + modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS, clock); assertTrue(clusterApi.isStorageCluster()); assertEquals(Optional.of(hostName1), clusterApi.storageNodeInGroup().map(storageNode -> storageNode.hostName())); @@ -233,6 +266,9 @@ public class ClusterApiImplTest { ServiceType.CONFIG_SERVER, instances ); + for (var instance : instances) { + instance.setServiceCluster(serviceCluster); + } Set<ServiceCluster> serviceClusterSet = Set.of(serviceCluster); @@ -250,7 +286,7 @@ public class ClusterApiImplTest { serviceCluster, new NodeGroup(application, hostnames.get(0)), modelUtils.getHostInfos(), - modelUtils.getClusterControllerClientFactory(), clusterSize); + modelUtils.getClusterControllerClientFactory(), clusterSize, clock); assertEquals(clusterSize - serviceStatusList.size(), clusterApi.missingServices()); assertEquals(clusterSize == serviceStatusList.size(), clusterApi.noServicesOutsideGroupIsDown()); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java index 8162542e540..c4087daeb94 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java @@ -13,6 +13,7 @@ import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceCluster; import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.applicationmodel.ServiceStatus; +import com.yahoo.vespa.applicationmodel.ServiceStatusInfo; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.curator.mock.MockCurator; @@ -72,9 +73,10 @@ class ModelTestUtils { new ManualClock(), applicationApiFactory(), flagSource); + private final ManualClock clock = new ManualClock(); ApplicationApiFactory applicationApiFactory() { - return new ApplicationApiFactory(NUMBER_OF_CONFIG_SERVERS); + return new ApplicationApiFactory(NUMBER_OF_CONFIG_SERVERS, clock); } HostInfos getHostInfos() { @@ -142,21 +144,19 @@ class ModelTestUtils { ServiceType serviceType, List<ServiceInstance> serviceInstances) { Set<ServiceInstance> serviceInstanceSet = new HashSet<>(serviceInstances); + var cluster = new ServiceCluster(new ClusterId(clusterId), serviceType, serviceInstanceSet); + for (var service : serviceInstanceSet) { + service.setServiceCluster(cluster); + } + return cluster; + } - return new ServiceCluster( - new ClusterId(clusterId), - serviceType, - serviceInstanceSet); + ServiceInstance createServiceInstance(String configId, HostName hostName, ServiceStatus status) { + return new ServiceInstance(new ConfigId(configId), hostName, status); } - ServiceInstance createServiceInstance( - String configId, - HostName hostName, - ServiceStatus status) { - return new ServiceInstance( - new ConfigId(configId), - hostName, - status); + ServiceInstance createServiceInstance(String configId, HostName hostName, ServiceStatusInfo statusInfo) { + return new ServiceInstance(new ConfigId(configId), hostName, statusInfo); } ClusterControllerClientFactory getClusterControllerClientFactory() { diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java index 4462e886d1b..e5be87ba043 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.orchestrator.policy; import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.applicationmodel.ClusterId; +import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.orchestrator.model.ApplicationApi; import com.yahoo.vespa.orchestrator.model.ClusterApi; @@ -11,6 +12,8 @@ import com.yahoo.vespa.orchestrator.model.VespaModelUtil; import org.junit.Before; import org.junit.Test; +import java.util.Optional; + import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import static org.mockito.Mockito.doReturn; @@ -71,30 +74,31 @@ public class HostedVespaClusterPolicyTest { @Test public void verifyGroupGoingDownIsFine_noServicesOutsideGroupIsDownIsFine() { - verifyGroupGoingDownIsFine(true, false, 13, true); + verifyGroupGoingDownIsFine(true, Optional.empty(), 13, true); } @Test public void verifyGroupGoingDownIsFine_noServicesInGroupIsUp() { - verifyGroupGoingDownIsFine(false, true, 13, true); + var reasons = new SuspensionReasons().addReason(new HostName("host1"), "supension reason 1"); + verifyGroupGoingDownIsFine(false, Optional.of(reasons), 13, true); } @Test public void verifyGroupGoingDownIsFine_percentageIsFine() { - verifyGroupGoingDownIsFine(false, false, 9, true); + verifyGroupGoingDownIsFine(false, Optional.empty(), 9, true); } @Test public void verifyGroupGoingDownIsFine_fails() { - verifyGroupGoingDownIsFine(false, false, 13, false); + verifyGroupGoingDownIsFine(false, Optional.empty(), 13, false); } private void verifyGroupGoingDownIsFine(boolean noServicesOutsideGroupIsDown, - boolean noServicesInGroupIsUp, + Optional<SuspensionReasons> noServicesInGroupIsUp, int percentageOfServicesDownIfGroupIsAllowedToBeDown, boolean expectSuccess) { when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown); - when(clusterApi.noServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp); + when(clusterApi.reasonsForNoServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp); when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20); doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi); @@ -107,12 +111,15 @@ public class HostedVespaClusterPolicyTest { when(clusterApi.getNodeGroup()).thenReturn(nodeGroup); when(nodeGroup.toCommaSeparatedString()).thenReturn("node-group"); - when(clusterApi.noServicesInGroupIsUp()).thenReturn(false); try { - policy.verifyGroupGoingDownIsFine(clusterApi); + SuspensionReasons reasons = policy.verifyGroupGoingDownIsFine(clusterApi); if (!expectSuccess) { fail(); } + + if (noServicesInGroupIsUp.isPresent()) { + assertEquals(noServicesInGroupIsUp.get().getMessagesInOrder(), reasons.getMessagesInOrder()); + } } catch (HostStateChangeDeniedException e) { if (!expectSuccess) { assertEquals("Changing the state of node-group would violate enough-services-up: " + diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java index ed6917a3a4e..599b50548b7 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java @@ -3,6 +3,7 @@ package com.yahoo.vespa.orchestrator.policy; import com.yahoo.config.provision.ApplicationId; +import com.yahoo.test.ManualClock; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.vespa.orchestrator.OrchestratorContext; @@ -34,7 +35,8 @@ public class HostedVespaPolicyTest { private final ClusterControllerClientFactory clientFactory = mock(ClusterControllerClientFactory.class); private final ClusterControllerClient client = mock(ClusterControllerClient.class); - private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3); + private final ManualClock clock = new ManualClock(); + private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3, clock); @Before public void setUp() { @@ -44,6 +46,7 @@ public class HostedVespaPolicyTest { @Test public void testGrantSuspension() throws HostStateChangeDeniedException { final HostedVespaClusterPolicy clusterPolicy = mock(HostedVespaClusterPolicy.class); + when(clusterPolicy.verifyGroupGoingDownIsFine(any())).thenReturn(SuspensionReasons.nothingNoteworthy()); final HostedVespaPolicy policy = new HostedVespaPolicy(clusterPolicy, clientFactory, applicationApiFactory); final ApplicationApi applicationApi = mock(ApplicationApi.class); when(applicationApi.applicationId()).thenReturn(ApplicationId.fromSerializedForm("tenant:app:default")); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasonsTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasonsTest.java new file mode 100644 index 00000000000..11274dc5d75 --- /dev/null +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasonsTest.java @@ -0,0 +1,40 @@ +package com.yahoo.vespa.orchestrator.policy;// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +import com.yahoo.test.ManualClock; +import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import org.junit.Test; + +import java.time.Duration; +import java.time.Instant; +import java.util.Optional; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class SuspensionReasonsTest { + private final ManualClock clock = new ManualClock(Instant.ofEpochMilli(1600440708123L)); + private final ServiceInstance service = mock(ServiceInstance.class); + private final ServiceInstance service2 = mock(ServiceInstance.class); + + @Test + public void test() { + assertEquals(Optional.empty(), new SuspensionReasons().makeLogMessage()); + assertEquals(Optional.empty(), SuspensionReasons.nothingNoteworthy().makeLogMessage()); + + when(service.hostName()).thenReturn(new HostName("host1")); + when(service.descriptiveName()).thenReturn("service1"); + when(service2.hostName()).thenReturn(new HostName("host2")); + when(service2.descriptiveName()).thenReturn("service2"); + + var reasons = SuspensionReasons.downSince(service, clock.instant(), Duration.ofSeconds(30)); + reasons.mergeWith(SuspensionReasons.isDown(service2)); + assertEquals(Optional.of( + "host1 suspended because service1 has been down since 2020-09-18T14:51:48Z (30 seconds); " + + "host2 suspended because service2 is down"), + reasons.makeLogMessage()); + + } + +}
\ No newline at end of file diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java index 0605fcb8a0a..46346e7de7f 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java @@ -32,6 +32,7 @@ import com.yahoo.vespa.orchestrator.model.ApplicationApiFactory; import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.Policy; +import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult; import com.yahoo.vespa.orchestrator.restapi.wire.GetHostResponse; import com.yahoo.vespa.orchestrator.restapi.wire.PatchHostRequest; @@ -83,7 +84,7 @@ public class HostResourceTest { private static final ServiceMonitor serviceMonitor = mock(ServiceMonitor.class); private static final StatusService EVERY_HOST_IS_UP_HOST_STATUS_SERVICE = new ZkStatusService( new MockCurator(), mock(Metric.class), new TestTimer(), new DummyAntiServiceMonitor()); - private static final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3); + private static final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3, clock); static { when(serviceMonitor.getApplication(any(HostName.class))) @@ -107,7 +108,8 @@ public class HostResourceTest { private static class AlwaysAllowPolicy implements Policy { @Override - public void grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) { + public SuspensionReasons grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) { + return SuspensionReasons.nothingNoteworthy(); } @Override @@ -208,18 +210,18 @@ public class HostResourceTest { private static class AlwaysFailPolicy implements Policy { @Override - public void grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException { - doThrow(); + public SuspensionReasons grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException { + throw newHostStateChangeDeniedException(); } @Override public void releaseSuspensionGrant(OrchestratorContext context, ApplicationApi application) throws HostStateChangeDeniedException { - doThrow(); + throw newHostStateChangeDeniedException(); } @Override public void acquirePermissionToRemove(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException { - doThrow(); + throw newHostStateChangeDeniedException(); } @Override @@ -227,11 +229,11 @@ public class HostResourceTest { OrchestratorContext context, ApplicationInstance applicationInstance, HostName hostName, ApplicationLock hostStatusRegistry) throws HostStateChangeDeniedException { - doThrow(); + throw newHostStateChangeDeniedException(); } - private static void doThrow() throws HostStateChangeDeniedException { - throw new HostStateChangeDeniedException( + private static HostStateChangeDeniedException newHostStateChangeDeniedException() { + return new HostStateChangeDeniedException( new HostName("some-host"), "impossible-policy", "This policy rejects all requests"); |