summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-09-18 17:00:02 +0200
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-09-18 17:00:02 +0200
commit8ef29392f487ec917c597635a27b82f9a7d88342 (patch)
tree80c5cbbf073bd7f2776dba43cdbde617bc0196e9
parent0327c08270ee4d538eaba6f0b5a94cf3edd72eca (diff)
30s down-moratorium before allowing suspension
-rw-r--r--application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceId.java4
-rw-r--r--application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceReference.java7
-rw-r--r--application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java3
-rw-r--r--application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java60
-rw-r--r--application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceInstance.java21
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java10
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiFactory.java9
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java8
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java4
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java44
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java4
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java20
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java8
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java2
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java83
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java5
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java74
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java26
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java23
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java5
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasonsTest.java40
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java20
22 files changed, 389 insertions, 91 deletions
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceId.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceId.java
index 6bdc7a949e7..801213dcf40 100644
--- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceId.java
+++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceId.java
@@ -12,6 +12,10 @@ import java.util.Objects;
public class ApplicationInstanceId {
public static final ApplicationInstanceId CONFIG_SERVER = new ApplicationInstanceId("zone-config-servers");
public static final ApplicationInstanceId CONTROLLER = new ApplicationInstanceId("controller");
+ // Unfortunately, for config server host the ApplicationInstanceId is: configserver-host:prod:cd-us-central-1:default
+ public boolean isConfigServerHost() { return id.startsWith("configserver-host:"); }
+ public static final ApplicationInstanceId CONTROLLER_HOST = new ApplicationInstanceId("controller-host:prod:default:default");
+ public boolean isTenantHost() { return id.startsWith("tenant-host:"); }
private final String id;
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceReference.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceReference.java
index e761e14caa4..f0b2c46d460 100644
--- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceReference.java
+++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ApplicationInstanceReference.java
@@ -10,7 +10,7 @@ import java.util.Objects;
* @author bjorncs
*/
// TODO: Remove this and use ApplicationId instead (if you need it for the JSON stuff move it to that layer and don't let it leak)
-public class ApplicationInstanceReference {
+public class ApplicationInstanceReference implements Comparable<ApplicationInstanceReference> {
private final TenantId tenantId;
private final ApplicationInstanceId applicationInstanceId;
@@ -43,6 +43,11 @@ public class ApplicationInstanceReference {
}
@Override
+ public int compareTo(ApplicationInstanceReference o) {
+ return this.asString().compareTo(o.asString());
+ }
+
+ @Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java
index 96be7090114..8af47a796f6 100644
--- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java
+++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ClusterId.java
@@ -12,6 +12,9 @@ public class ClusterId {
public static final ClusterId CONFIG_SERVER = new ClusterId("zone-config-servers");
public static final ClusterId CONTROLLER = new ClusterId("controller");
+ public static final ClusterId CONFIG_SERVER_HOST = new ClusterId("configserver-host");
+ public static final ClusterId CONTROLLER_HOST = new ClusterId("configserver-host");
+ public static final ClusterId TENANT_HOST = new ClusterId("tenant-host");
private final String id;
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java
index 1faefcb7c61..43f161cfec9 100644
--- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java
+++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java
@@ -7,6 +7,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
+import java.util.function.Predicate;
/**
* Represents a collection of service instances that together make up a service with a single cluster id.
@@ -51,24 +52,53 @@ public class ServiceCluster {
return applicationInstance.get();
}
- public boolean isConfigServerClusterLike() {
- // config server
- if (Objects.equals(applicationInstance.map(ApplicationInstance::tenantId), Optional.of(TenantId.HOSTED_VESPA)) &&
- Objects.equals(applicationInstance.map(ApplicationInstance::applicationInstanceId), Optional.of(ApplicationInstanceId.CONFIG_SERVER)) &&
- Objects.equals(clusterId, ClusterId.CONFIG_SERVER) &&
- Objects.equals(serviceType, ServiceType.CONFIG_SERVER)) {
- return true;
- }
+ public boolean isConfigServerLike() {
+ return isConfigServer() || isController();
+ }
- // controller
- if (Objects.equals(applicationInstance.map(ApplicationInstance::tenantId), Optional.of(TenantId.HOSTED_VESPA)) &&
- Objects.equals(applicationInstance.map(ApplicationInstance::applicationInstanceId), Optional.of(ApplicationInstanceId.CONTROLLER)) &&
+ public boolean isController() {
+ return isHostedVespaApplicationWithId(ApplicationInstanceId.CONTROLLER) &&
Objects.equals(clusterId, ClusterId.CONTROLLER) &&
- Objects.equals(serviceType, ServiceType.CONTROLLER)) {
- return true;
- }
+ Objects.equals(serviceType, ServiceType.CONTROLLER);
+ }
+
+ /** Is a config server (and not controller!) */
+ public boolean isConfigServer() {
+ return isHostedVespaApplicationWithId(ApplicationInstanceId.CONFIG_SERVER) &&
+ Objects.equals(clusterId, ClusterId.CONFIG_SERVER) &&
+ Objects.equals(serviceType, ServiceType.CONFIG_SERVER);
+ }
+
+ public boolean isConfigServerHost() {
+ return isHostedVespaApplicationWithPredicate(ApplicationInstanceId::isConfigServerHost) &&
+ Objects.equals(clusterId, ClusterId.CONFIG_SERVER_HOST) &&
+ Objects.equals(serviceType, ServiceType.HOST_ADMIN);
+ }
+
+ public boolean isControllerHost() {
+ return isHostedVespaApplicationWithId(ApplicationInstanceId.CONTROLLER_HOST) &&
+ Objects.equals(clusterId, ClusterId.CONTROLLER_HOST) &&
+ Objects.equals(serviceType, ServiceType.HOST_ADMIN);
+ }
+
+ public boolean isTenantHost() {
+ return isHostedVespaApplicationWithPredicate(ApplicationInstanceId::isTenantHost) &&
+ Objects.equals(clusterId, ClusterId.TENANT_HOST) &&
+ Objects.equals(serviceType, ServiceType.HOST_ADMIN);
+ }
+
+ private boolean isHostedVespaApplicationWithId(ApplicationInstanceId id) {
+ return isHostedVespaTenant() &&
+ applicationInstance.map(app -> Objects.equals(app.applicationInstanceId(), id)).orElse(false);
+ }
+
+ private boolean isHostedVespaApplicationWithPredicate(Predicate<ApplicationInstanceId> predicate) {
+ return isHostedVespaTenant() &&
+ applicationInstance.map(app -> predicate.test(app.applicationInstanceId())).orElse(false);
+ }
- return false;
+ private boolean isHostedVespaTenant() {
+ return applicationInstance.map(a -> Objects.equals(a.tenantId(), TenantId.HOSTED_VESPA)).orElse(false);
}
@Override
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceInstance.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceInstance.java
index b4fce878b0d..d75d3abd5da 100644
--- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceInstance.java
+++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceInstance.java
@@ -66,6 +66,27 @@ public class ServiceInstance {
'}';
}
+ /**
+ * Get a name that can be used in e.g. config server logs that makes it easy to understand which
+ * service instance this is.
+ */
+ public String descriptiveName() {
+ if (getServiceCluster().isController() || getServiceCluster().isConfigServer()) {
+ return getHostnamePrefix();
+ } else if (getServiceCluster().isControllerHost() || getServiceCluster().isConfigServerHost()) {
+ return "host-admin on " + getHostnamePrefix();
+ } else if (getServiceCluster().isTenantHost()) {
+ return "host-admin on " + hostName.s();
+ } else {
+ return configId.s();
+ }
+ }
+
+ private String getHostnamePrefix() {
+ int dotIndex = hostName.s().indexOf('.');
+ return dotIndex == -1 ? hostName().s() : hostName.s().substring(0, dotIndex);
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) return true;
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
index baa98790d28..333edd4b3f9 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
@@ -29,6 +29,7 @@ import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy;
import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy;
import com.yahoo.vespa.orchestrator.policy.Policy;
+import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
import com.yahoo.vespa.orchestrator.status.ApplicationLock;
import com.yahoo.vespa.orchestrator.status.HostInfo;
@@ -76,13 +77,13 @@ public class OrchestratorImpl implements Orchestrator {
{
this(new HostedVespaPolicy(new HostedVespaClusterPolicy(),
clusterControllerClientFactory,
- new ApplicationApiFactory(configServerConfig.zookeeperserver().size())),
+ new ApplicationApiFactory(configServerConfig.zookeeperserver().size(), Clock.systemUTC())),
clusterControllerClientFactory,
statusService,
serviceMonitor,
orchestratorConfig.serviceMonitorConvergenceLatencySeconds(),
Clock.systemUTC(),
- new ApplicationApiFactory(configServerConfig.zookeeperserver().size()),
+ new ApplicationApiFactory(configServerConfig.zookeeperserver().size(), Clock.systemUTC()),
flagSource);
}
@@ -227,6 +228,7 @@ public class OrchestratorImpl implements Orchestrator {
void suspendGroup(OrchestratorContext context, NodeGroup nodeGroup) throws HostStateChangeDeniedException {
ApplicationInstanceReference applicationReference = nodeGroup.getApplicationReference();
+ final SuspensionReasons suspensionReasons;
try (ApplicationLock lock = statusService.lockApplication(context, applicationReference)) {
ApplicationInstanceStatus appStatus = lock.getApplicationInstanceStatus();
if (appStatus == ApplicationInstanceStatus.ALLOWED_TO_BE_DOWN) {
@@ -235,8 +237,10 @@ public class OrchestratorImpl implements Orchestrator {
ApplicationApi applicationApi = applicationApiFactory.create(
nodeGroup, lock, clusterControllerClientFactory);
- policy.grantSuspensionRequest(context.createSubcontextWithinLock(), applicationApi);
+ suspensionReasons = policy.grantSuspensionRequest(context.createSubcontextWithinLock(), applicationApi);
}
+
+ suspensionReasons.makeLogMessage().ifPresent(log::info);
}
@Override
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiFactory.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiFactory.java
index fb52eed2048..d913a1791ba 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiFactory.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiFactory.java
@@ -4,21 +4,26 @@ package com.yahoo.vespa.orchestrator.model;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
import com.yahoo.vespa.orchestrator.status.ApplicationLock;
+import java.time.Clock;
+
/**
* @author mpolden
*/
public class ApplicationApiFactory {
private final int numberOfConfigServers;
+ private final Clock clock;
- public ApplicationApiFactory(int numberOfConfigServers) {
+ public ApplicationApiFactory(int numberOfConfigServers, Clock clock) {
this.numberOfConfigServers = numberOfConfigServers;
+ this.clock = clock;
}
public ApplicationApi create(NodeGroup nodeGroup,
ApplicationLock lock,
ClusterControllerClientFactory clusterControllerClientFactory) {
- return new ApplicationApiImpl(nodeGroup, lock, clusterControllerClientFactory, numberOfConfigServers);
+ return new ApplicationApiImpl(nodeGroup, lock, clusterControllerClientFactory,
+ numberOfConfigServers, clock);
}
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java
index ccc89fa9191..7ee1395b7e7 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java
@@ -14,6 +14,7 @@ import com.yahoo.vespa.orchestrator.status.HostInfos;
import com.yahoo.vespa.orchestrator.status.HostStatus;
import com.yahoo.vespa.orchestrator.status.ApplicationLock;
+import java.time.Clock;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
@@ -33,16 +34,18 @@ public class ApplicationApiImpl implements ApplicationApi {
private final ApplicationInstance applicationInstance;
private final NodeGroup nodeGroup;
private final ApplicationLock lock;
+ private final Clock clock;
private final List<ClusterApi> clusterInOrder;
private final HostInfos hostInfos;
public ApplicationApiImpl(NodeGroup nodeGroup,
ApplicationLock lock,
ClusterControllerClientFactory clusterControllerClientFactory,
- int numberOfConfigServers) {
+ int numberOfConfigServers, Clock clock) {
this.applicationInstance = nodeGroup.getApplication();
this.nodeGroup = nodeGroup;
this.lock = lock;
+ this.clock = clock;
Collection<HostName> hosts = getHostsUsedByApplicationInstance(applicationInstance);
this.hostInfos = lock.getHostInfos();
this.clusterInOrder = makeClustersInOrder(nodeGroup, hostInfos, clusterControllerClientFactory, numberOfConfigServers);
@@ -122,7 +125,8 @@ public class ApplicationApiImpl implements ApplicationApi {
nodeGroup,
hostInfos,
clusterControllerClientFactory,
- numberOfConfigServers))
+ numberOfConfigServers,
+ clock))
.sorted(ApplicationApiImpl::compareClusters)
.collect(Collectors.toList());
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java
index 65c45c8df76..2e7a63ddb2f 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.orchestrator.model;
import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import java.util.Optional;
@@ -17,7 +18,8 @@ public interface ClusterApi {
ServiceType serviceType();
boolean isStorageCluster();
- boolean noServicesInGroupIsUp();
+ /** Returns the reasons no services are up in the implied group, or empty if some services are up. */
+ Optional<SuspensionReasons> reasonsForNoServicesInGroupIsUp();
boolean noServicesOutsideGroupIsDown();
int percentageOfServicesDown();
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
index ae3136543a7..7417318c572 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
@@ -8,9 +8,13 @@ import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
+import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import com.yahoo.vespa.orchestrator.status.HostInfos;
import com.yahoo.vespa.orchestrator.status.HostStatus;
+import java.time.Clock;
+import java.time.Duration;
+import java.time.Instant;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
@@ -24,12 +28,14 @@ import java.util.stream.Stream;
* @author hakonhall
*/
class ClusterApiImpl implements ClusterApi {
+ static final Duration downMoratorium = Duration.ofSeconds(30);
private final ApplicationApi applicationApi;
private final ServiceCluster serviceCluster;
private final NodeGroup nodeGroup;
private final HostInfos hostInfos;
private final ClusterControllerClientFactory clusterControllerClientFactory;
+ private final Clock clock;
private final Set<ServiceInstance> servicesInGroup;
private final Set<ServiceInstance> servicesDownInGroup;
private final Set<ServiceInstance> servicesNotInGroup;
@@ -53,12 +59,14 @@ class ClusterApiImpl implements ClusterApi {
NodeGroup nodeGroup,
HostInfos hostInfos,
ClusterControllerClientFactory clusterControllerClientFactory,
- int numberOfConfigServers) {
+ int numberOfConfigServers,
+ Clock clock) {
this.applicationApi = applicationApi;
this.serviceCluster = serviceCluster;
this.nodeGroup = nodeGroup;
this.hostInfos = hostInfos;
this.clusterControllerClientFactory = clusterControllerClientFactory;
+ this.clock = clock;
Map<Boolean, Set<ServiceInstance>> serviceInstancesByLocality =
serviceCluster.serviceInstances().stream()
@@ -73,7 +81,7 @@ class ClusterApiImpl implements ClusterApi {
servicesDownAndNotInGroup = servicesNotInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet());
int serviceInstances = serviceCluster.serviceInstances().size();
- if (serviceCluster.isConfigServerClusterLike() && serviceInstances < numberOfConfigServers) {
+ if (serviceCluster.isConfigServerLike() && serviceInstances < numberOfConfigServers) {
missingServices = numberOfConfigServers - serviceInstances;
descriptionOfMissingServices = missingServices + " missing config server" + (missingServices > 1 ? "s" : "");
} else {
@@ -108,8 +116,36 @@ class ClusterApiImpl implements ClusterApi {
}
@Override
- public boolean noServicesInGroupIsUp() {
- return servicesDownInGroup.size() == servicesInGroup.size();
+ public Optional<SuspensionReasons> reasonsForNoServicesInGroupIsUp() {
+ SuspensionReasons reasons = new SuspensionReasons();
+
+ for (ServiceInstance service : servicesInGroup) {
+ if (hostStatus(service.hostName()).isSuspended()) {
+ reasons.mergeWith(SuspensionReasons.nothingNoteworthy());
+ continue;
+ }
+
+ if (service.serviceStatus() == ServiceStatus.DOWN) {
+ Optional<Instant> since = service.serviceStatusInfo().since();
+ if (since.isEmpty()) {
+ reasons.mergeWith(SuspensionReasons.isDown(service));
+ continue;
+ }
+
+ // Make sure services truly are down for some period of time before we allow suspension.
+ // On the other hand, a service coming down and up repeatedly should probably
+ // also be allowed... difficult without keeping track of history in a better way.
+ final Duration downDuration = Duration.between(since.get(), clock.instant());
+ if (downDuration.compareTo(downMoratorium) > 0) {
+ reasons.mergeWith(SuspensionReasons.downSince(service, since.get(), downDuration));
+ continue;
+ }
+ }
+
+ return Optional.empty();
+ }
+
+ return Optional.of(reasons);
}
int missingServices() { return missingServices; }
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java
index 7c2daf0b597..a0738b725ec 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/ClusterPolicy.java
@@ -8,8 +8,10 @@ public interface ClusterPolicy {
* There's an implicit group of nodes known to clusterApi. This method answers whether
* it would be fine, just looking at this cluster (and disregarding Cluster Controller/storage
* which is handled separately), to allow all services on all the nodes in the group to go down.
+ *
+ * @return notable reasons for why this group is fine with going down.
*/
- void verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException;
+ SuspensionReasons verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException;
/**
* There's an implicit group of nodes known to clusterApi. This method answers whether
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
index ccb0bb57186..4e11961cb1b 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
@@ -4,23 +4,26 @@ package com.yahoo.vespa.orchestrator.policy;
import com.yahoo.vespa.orchestrator.model.ClusterApi;
import com.yahoo.vespa.orchestrator.model.VespaModelUtil;
+import java.util.Optional;
+
import static com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT;
public class HostedVespaClusterPolicy implements ClusterPolicy {
@Override
- public void verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException {
+ public SuspensionReasons verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException {
if (clusterApi.noServicesOutsideGroupIsDown()) {
- return;
- }
-
- if (clusterApi.noServicesInGroupIsUp()) {
- return;
+ return SuspensionReasons.nothingNoteworthy();
}
int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage();
if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) {
- return;
+ return SuspensionReasons.nothingNoteworthy();
+ }
+
+ Optional<SuspensionReasons> suspensionReasons = clusterApi.reasonsForNoServicesInGroupIsUp();
+ if (suspensionReasons.isPresent()) {
+ return suspensionReasons.get();
}
throw new HostStateChangeDeniedException(
@@ -34,7 +37,8 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
}
@Override
- public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException {
+ public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi)
+ throws HostStateChangeDeniedException {
// This policy is similar to verifyGroupGoingDownIsFine, except that services being down in the group
// is no excuse to allow suspension (like it is for verifyGroupGoingDownIsFine), since if we grant
// suspension in this case they will permanently be down/removed.
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
index 8b74d8a40ef..f9b914e391e 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
@@ -39,11 +39,13 @@ public class HostedVespaPolicy implements Policy {
}
@Override
- public void grantSuspensionRequest(OrchestratorContext context, ApplicationApi application)
+ public SuspensionReasons grantSuspensionRequest(OrchestratorContext context, ApplicationApi application)
throws HostStateChangeDeniedException {
+ var suspensionReasons = new SuspensionReasons();
+
// Apply per-cluster policy
for (ClusterApi cluster : application.getClusters()) {
- clusterPolicy.verifyGroupGoingDownIsFine(cluster);
+ suspensionReasons.mergeWith(clusterPolicy.verifyGroupGoingDownIsFine(cluster));
}
// Ask Cluster Controller to set UP storage nodes in maintenance.
@@ -56,6 +58,8 @@ public class HostedVespaPolicy implements Policy {
for (HostName hostName : application.getNodesInGroupWithStatus(HostStatus.NO_REMARKS)) {
application.setHostState(context, hostName, HostStatus.ALLOWED_TO_BE_DOWN);
}
+
+ return suspensionReasons;
}
@Override
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java
index c410cda23a8..86538450782 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/Policy.java
@@ -15,7 +15,7 @@ public interface Policy {
/**
* Decide whether to grant a request for temporarily suspending the services on all hosts in the group.
*/
- void grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException;
+ SuspensionReasons grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException;
void releaseSuspensionGrant(OrchestratorContext context, ApplicationApi application) throws HostStateChangeDeniedException;
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java
new file mode 100644
index 00000000000..c043396497b
--- /dev/null
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasons.java
@@ -0,0 +1,83 @@
+// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.orchestrator.policy;
+
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ * Information worth logging when/if suspending a host.
+ *
+ * @author hakon
+ */
+public class SuspensionReasons {
+ private final Map<HostName, List<String>> reasons = new HashMap<>();
+
+ public static SuspensionReasons nothingNoteworthy() { return new SuspensionReasons(); }
+ public static SuspensionReasons isDown(ServiceInstance service) {
+ return new SuspensionReasons().addReason(
+ service.hostName(),
+ service.descriptiveName() + " is down");
+ }
+
+ public static SuspensionReasons downSince(ServiceInstance service, Instant instant, Duration downDuration) {
+ return new SuspensionReasons().addReason(
+ service.hostName(),
+ service.descriptiveName() + " has been down since " +
+ // Round to whole second
+ Instant.ofEpochSecond(instant.getEpochSecond()).toString() +
+ " (" + downDuration.getSeconds() + " seconds)");
+ }
+
+ public SuspensionReasons() {}
+
+ /** An ordered list of all messages, typically useful for testing. */
+ public List<String> getMessagesInOrder() {
+ return reasons.values().stream().flatMap(Collection::stream).sorted().collect(Collectors.toList());
+ }
+
+ public SuspensionReasons mergeWith(SuspensionReasons that) {
+ for (var entry : that.reasons.entrySet()) {
+ for (var reason : entry.getValue()) {
+ addReason(entry.getKey(), reason);
+ }
+ }
+
+ return this;
+ }
+
+ /**
+ * Makes a log message, if there is anything worth logging about the decision to allow suspension,
+ * that can be directly fed to {@link java.util.logging.Logger#info(String) Logger.info(String)}.
+ */
+ public Optional<String> makeLogMessage() {
+ if (reasons.isEmpty()) {
+ return Optional.empty();
+ }
+
+ return Optional.of(reasons.entrySet().stream()
+ .map(entry -> entry.getKey().s() + " suspended because " + String.join(", ", entry.getValue()))
+ .sorted()
+ .collect(Collectors.joining("; ")));
+ }
+
+ /** Package-private for testing. */
+ SuspensionReasons addReason(HostName hostname, String message) {
+ reasons.computeIfAbsent(hostname, h -> new ArrayList<>()).add(message);
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ return makeLogMessage().orElse("");
+ }
+}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
index b32a6aafa56..3580b305c2a 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
@@ -26,6 +26,7 @@ import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy;
import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy;
+import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import com.yahoo.vespa.orchestrator.status.ApplicationLock;
import com.yahoo.vespa.orchestrator.status.HostStatus;
import com.yahoo.vespa.orchestrator.status.StatusService;
@@ -73,7 +74,8 @@ import static org.mockito.internal.verification.VerificationModeFactory.atLeastO
*/
public class OrchestratorImplTest {
- private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3);
+ private final ManualClock clock = new ManualClock();
+ private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3, clock);
private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
private final MockCurator curator = new MockCurator();
private ZkStatusService statusService = new ZkStatusService(
@@ -344,6 +346,7 @@ public class OrchestratorImplTest {
var applicationApiFactory = mock(ApplicationApiFactory.class);
var lock = mock(ApplicationLock.class);
+ when(policy.grantSuspensionRequest(any(), any())).thenReturn(SuspensionReasons.nothingNoteworthy());
when(serviceMonitor.getApplication(any(HostName.class))).thenReturn(Optional.of(applicationInstance));
when(applicationInstance.reference()).thenReturn(applicationInstanceReference);
when(zookeeperStatusService.lockApplication(any(), any())).thenReturn(lock);
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
index a5cb5cfa630..14d35902738 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.model;
+import com.yahoo.test.ManualClock;
import com.yahoo.vespa.applicationmodel.ApplicationInstance;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
import com.yahoo.vespa.applicationmodel.ClusterId;
@@ -8,15 +9,19 @@ import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceCluster;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.applicationmodel.ServiceStatus;
+import com.yahoo.vespa.applicationmodel.ServiceStatusInfo;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.orchestrator.OrchestratorUtil;
import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy;
+import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import com.yahoo.vespa.orchestrator.status.HostInfos;
import com.yahoo.vespa.orchestrator.status.HostStatus;
import org.junit.Test;
+import java.time.Duration;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -43,6 +48,7 @@ public class ClusterApiImplTest {
private final ApplicationApi applicationApi = mock(ApplicationApi.class);
private final ModelTestUtils modelUtils = new ModelTestUtils();
+ private final ManualClock clock = new ManualClock(Instant.ofEpochSecond(1600436659));
@Test
public void testServicesDownAndNotInGroup() {
@@ -76,7 +82,7 @@ public class ClusterApiImplTest {
serviceCluster,
new NodeGroup(modelUtils.createApplicationInstance(new ArrayList<>()), hostName5),
modelUtils.getHostInfos(),
- modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS);
+ modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS, clock);
assertEquals("{ clusterId=cluster, serviceType=service-type }", clusterApi.clusterInfo());
assertFalse(clusterApi.isStorageCluster());
@@ -139,16 +145,30 @@ public class ClusterApiImplTest {
HostName hostName3 = new HostName("host3");
HostName hostName4 = new HostName("host4");
HostName hostName5 = new HostName("host5");
+ HostName hostName6 = new HostName("host6");
+
+ ServiceInstance service2 = modelUtils.createServiceInstance("service-2", hostName2, ServiceStatus.DOWN);
+
+ // Within down moratorium
+ Instant downSince5 = clock.instant().minus(ClusterApiImpl.downMoratorium).plus(Duration.ofSeconds(5));
+ ServiceInstance service5 = modelUtils.createServiceInstance("service-5", hostName5,
+ new ServiceStatusInfo(ServiceStatus.DOWN, downSince5, downSince5, Optional.empty(), Optional.empty()));
+
+ // After down moratorium
+ Instant downSince6 = clock.instant().minus(ClusterApiImpl.downMoratorium).minus(Duration.ofSeconds(5));
+ ServiceInstance service6 = modelUtils.createServiceInstance("service-6", hostName6,
+ new ServiceStatusInfo(ServiceStatus.DOWN, downSince6, downSince6, Optional.empty(), Optional.empty()));
ServiceCluster serviceCluster = modelUtils.createServiceCluster(
"cluster",
new ServiceType("service-type"),
Arrays.asList(
modelUtils.createServiceInstance("service-1", hostName1, ServiceStatus.UP),
- modelUtils.createServiceInstance("service-2", hostName2, ServiceStatus.DOWN),
+ service2,
modelUtils.createServiceInstance("service-3", hostName3, ServiceStatus.UP),
modelUtils.createServiceInstance("service-4", hostName4, ServiceStatus.DOWN),
- modelUtils.createServiceInstance("service-5", hostName5, ServiceStatus.UP)
+ service5,
+ service6
)
);
modelUtils.createApplicationInstance(Collections.singletonList(serviceCluster));
@@ -158,21 +178,33 @@ public class ClusterApiImplTest {
modelUtils.createNode(hostName3, HostStatus.ALLOWED_TO_BE_DOWN);
modelUtils.createNode(hostName4, HostStatus.ALLOWED_TO_BE_DOWN);
modelUtils.createNode(hostName5, HostStatus.NO_REMARKS);
-
- verifyNoServices(serviceCluster, false, false, hostName1);
- verifyNoServices(serviceCluster, true, false, hostName2);
- verifyNoServices(serviceCluster, true, false, hostName3);
- verifyNoServices(serviceCluster, true, false, hostName4);
- verifyNoServices(serviceCluster, false, false, hostName5);
-
- verifyNoServices(serviceCluster, false, false, hostName1, hostName2);
- verifyNoServices(serviceCluster, true, false, hostName2, hostName3);
- verifyNoServices(serviceCluster, true, true, hostName2, hostName3, hostName4);
- verifyNoServices(serviceCluster, false, true, hostName1, hostName2, hostName3, hostName4);
+ modelUtils.createNode(hostName6, HostStatus.NO_REMARKS);
+
+ var reason2 = SuspensionReasons.isDown(service2);
+ var reason6 = SuspensionReasons.downSince(service6, downSince6, Duration.ofSeconds(35));
+ var reasons2and6 = new SuspensionReasons().mergeWith(reason2).mergeWith(reason6);
+
+ verifyNoServices(serviceCluster, Optional.empty(), false, hostName1);
+ verifyNoServices(serviceCluster, Optional.of(reason2), false, hostName2);
+ verifyNoServices(serviceCluster, Optional.of(SuspensionReasons.nothingNoteworthy()), false, hostName3);
+ verifyNoServices(serviceCluster, Optional.of(SuspensionReasons.nothingNoteworthy()), false, hostName4);
+ verifyNoServices(serviceCluster, Optional.empty(), false, hostName5);
+ verifyNoServices(serviceCluster, Optional.of(reason6), false, hostName6);
+
+ verifyNoServices(serviceCluster, Optional.empty(), false, hostName1, hostName2);
+ verifyNoServices(serviceCluster, Optional.of(reasons2and6), false, hostName2, hostName3, hostName6);
+ verifyNoServices(serviceCluster, Optional.of(reasons2and6), false,
+ hostName2, hostName3, hostName4, hostName6);
+ verifyNoServices(serviceCluster, Optional.empty(), true,
+ hostName2, hostName3, hostName4, hostName5, hostName6);
+ verifyNoServices(serviceCluster, Optional.empty(), false,
+ hostName1, hostName2, hostName3, hostName4, hostName6);
+ verifyNoServices(serviceCluster, Optional.empty(), true,
+ hostName1, hostName2, hostName3, hostName4, hostName5, hostName6);
}
private void verifyNoServices(ServiceCluster serviceCluster,
- boolean expectedNoServicesInGroupIsUp,
+ Optional<SuspensionReasons> expectedNoServicesInGroupIsUp,
boolean expectedNoServicesOutsideGroupIsDown,
HostName... groupNodes) {
ClusterApiImpl clusterApi = new ClusterApiImpl(
@@ -180,9 +212,10 @@ public class ClusterApiImplTest {
serviceCluster,
new NodeGroup(modelUtils.createApplicationInstance(new ArrayList<>()), groupNodes),
modelUtils.getHostInfos(),
- modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS);
+ modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS, clock);
- assertEquals(expectedNoServicesInGroupIsUp, clusterApi.noServicesInGroupIsUp());
+ assertEquals(expectedNoServicesInGroupIsUp.map(SuspensionReasons::getMessagesInOrder),
+ clusterApi.reasonsForNoServicesInGroupIsUp().map(SuspensionReasons::getMessagesInOrder));
assertEquals(expectedNoServicesOutsideGroupIsDown, clusterApi.noServicesOutsideGroupIsDown());
}
@@ -210,7 +243,7 @@ public class ClusterApiImplTest {
serviceCluster,
new NodeGroup(applicationInstance, hostName1, hostName3),
new HostInfos(),
- modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS);
+ modelUtils.getClusterControllerClientFactory(), ModelTestUtils.NUMBER_OF_CONFIG_SERVERS, clock);
assertTrue(clusterApi.isStorageCluster());
assertEquals(Optional.of(hostName1), clusterApi.storageNodeInGroup().map(storageNode -> storageNode.hostName()));
@@ -233,6 +266,9 @@ public class ClusterApiImplTest {
ServiceType.CONFIG_SERVER,
instances
);
+ for (var instance : instances) {
+ instance.setServiceCluster(serviceCluster);
+ }
Set<ServiceCluster> serviceClusterSet = Set.of(serviceCluster);
@@ -250,7 +286,7 @@ public class ClusterApiImplTest {
serviceCluster,
new NodeGroup(application, hostnames.get(0)),
modelUtils.getHostInfos(),
- modelUtils.getClusterControllerClientFactory(), clusterSize);
+ modelUtils.getClusterControllerClientFactory(), clusterSize, clock);
assertEquals(clusterSize - serviceStatusList.size(), clusterApi.missingServices());
assertEquals(clusterSize == serviceStatusList.size(), clusterApi.noServicesOutsideGroupIsDown());
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
index 8162542e540..c4087daeb94 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
@@ -13,6 +13,7 @@ import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceCluster;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.applicationmodel.ServiceStatus;
+import com.yahoo.vespa.applicationmodel.ServiceStatusInfo;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.curator.mock.MockCurator;
@@ -72,9 +73,10 @@ class ModelTestUtils {
new ManualClock(),
applicationApiFactory(),
flagSource);
+ private final ManualClock clock = new ManualClock();
ApplicationApiFactory applicationApiFactory() {
- return new ApplicationApiFactory(NUMBER_OF_CONFIG_SERVERS);
+ return new ApplicationApiFactory(NUMBER_OF_CONFIG_SERVERS, clock);
}
HostInfos getHostInfos() {
@@ -142,21 +144,19 @@ class ModelTestUtils {
ServiceType serviceType,
List<ServiceInstance> serviceInstances) {
Set<ServiceInstance> serviceInstanceSet = new HashSet<>(serviceInstances);
+ var cluster = new ServiceCluster(new ClusterId(clusterId), serviceType, serviceInstanceSet);
+ for (var service : serviceInstanceSet) {
+ service.setServiceCluster(cluster);
+ }
+ return cluster;
+ }
- return new ServiceCluster(
- new ClusterId(clusterId),
- serviceType,
- serviceInstanceSet);
+ ServiceInstance createServiceInstance(String configId, HostName hostName, ServiceStatus status) {
+ return new ServiceInstance(new ConfigId(configId), hostName, status);
}
- ServiceInstance createServiceInstance(
- String configId,
- HostName hostName,
- ServiceStatus status) {
- return new ServiceInstance(
- new ConfigId(configId),
- hostName,
- status);
+ ServiceInstance createServiceInstance(String configId, HostName hostName, ServiceStatusInfo statusInfo) {
+ return new ServiceInstance(new ConfigId(configId), hostName, statusInfo);
}
ClusterControllerClientFactory getClusterControllerClientFactory() {
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
index 4462e886d1b..e5be87ba043 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.orchestrator.policy;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.vespa.applicationmodel.ClusterId;
+import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.orchestrator.model.ApplicationApi;
import com.yahoo.vespa.orchestrator.model.ClusterApi;
@@ -11,6 +12,8 @@ import com.yahoo.vespa.orchestrator.model.VespaModelUtil;
import org.junit.Before;
import org.junit.Test;
+import java.util.Optional;
+
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.doReturn;
@@ -71,30 +74,31 @@ public class HostedVespaClusterPolicyTest {
@Test
public void verifyGroupGoingDownIsFine_noServicesOutsideGroupIsDownIsFine() {
- verifyGroupGoingDownIsFine(true, false, 13, true);
+ verifyGroupGoingDownIsFine(true, Optional.empty(), 13, true);
}
@Test
public void verifyGroupGoingDownIsFine_noServicesInGroupIsUp() {
- verifyGroupGoingDownIsFine(false, true, 13, true);
+ var reasons = new SuspensionReasons().addReason(new HostName("host1"), "supension reason 1");
+ verifyGroupGoingDownIsFine(false, Optional.of(reasons), 13, true);
}
@Test
public void verifyGroupGoingDownIsFine_percentageIsFine() {
- verifyGroupGoingDownIsFine(false, false, 9, true);
+ verifyGroupGoingDownIsFine(false, Optional.empty(), 9, true);
}
@Test
public void verifyGroupGoingDownIsFine_fails() {
- verifyGroupGoingDownIsFine(false, false, 13, false);
+ verifyGroupGoingDownIsFine(false, Optional.empty(), 13, false);
}
private void verifyGroupGoingDownIsFine(boolean noServicesOutsideGroupIsDown,
- boolean noServicesInGroupIsUp,
+ Optional<SuspensionReasons> noServicesInGroupIsUp,
int percentageOfServicesDownIfGroupIsAllowedToBeDown,
boolean expectSuccess) {
when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown);
- when(clusterApi.noServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp);
+ when(clusterApi.reasonsForNoServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp);
when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20);
doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi);
@@ -107,12 +111,15 @@ public class HostedVespaClusterPolicyTest {
when(clusterApi.getNodeGroup()).thenReturn(nodeGroup);
when(nodeGroup.toCommaSeparatedString()).thenReturn("node-group");
- when(clusterApi.noServicesInGroupIsUp()).thenReturn(false);
try {
- policy.verifyGroupGoingDownIsFine(clusterApi);
+ SuspensionReasons reasons = policy.verifyGroupGoingDownIsFine(clusterApi);
if (!expectSuccess) {
fail();
}
+
+ if (noServicesInGroupIsUp.isPresent()) {
+ assertEquals(noServicesInGroupIsUp.get().getMessagesInOrder(), reasons.getMessagesInOrder());
+ }
} catch (HostStateChangeDeniedException e) {
if (!expectSuccess) {
assertEquals("Changing the state of node-group would violate enough-services-up: " +
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java
index ed6917a3a4e..599b50548b7 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java
@@ -3,6 +3,7 @@ package com.yahoo.vespa.orchestrator.policy;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.test.ManualClock;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.OrchestratorContext;
@@ -34,7 +35,8 @@ public class HostedVespaPolicyTest {
private final ClusterControllerClientFactory clientFactory = mock(ClusterControllerClientFactory.class);
private final ClusterControllerClient client = mock(ClusterControllerClient.class);
- private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3);
+ private final ManualClock clock = new ManualClock();
+ private final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3, clock);
@Before
public void setUp() {
@@ -44,6 +46,7 @@ public class HostedVespaPolicyTest {
@Test
public void testGrantSuspension() throws HostStateChangeDeniedException {
final HostedVespaClusterPolicy clusterPolicy = mock(HostedVespaClusterPolicy.class);
+ when(clusterPolicy.verifyGroupGoingDownIsFine(any())).thenReturn(SuspensionReasons.nothingNoteworthy());
final HostedVespaPolicy policy = new HostedVespaPolicy(clusterPolicy, clientFactory, applicationApiFactory);
final ApplicationApi applicationApi = mock(ApplicationApi.class);
when(applicationApi.applicationId()).thenReturn(ApplicationId.fromSerializedForm("tenant:app:default"));
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasonsTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasonsTest.java
new file mode 100644
index 00000000000..11274dc5d75
--- /dev/null
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/SuspensionReasonsTest.java
@@ -0,0 +1,40 @@
+package com.yahoo.vespa.orchestrator.policy;// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+import com.yahoo.test.ManualClock;
+import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import org.junit.Test;
+
+import java.time.Duration;
+import java.time.Instant;
+import java.util.Optional;
+
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class SuspensionReasonsTest {
+ private final ManualClock clock = new ManualClock(Instant.ofEpochMilli(1600440708123L));
+ private final ServiceInstance service = mock(ServiceInstance.class);
+ private final ServiceInstance service2 = mock(ServiceInstance.class);
+
+ @Test
+ public void test() {
+ assertEquals(Optional.empty(), new SuspensionReasons().makeLogMessage());
+ assertEquals(Optional.empty(), SuspensionReasons.nothingNoteworthy().makeLogMessage());
+
+ when(service.hostName()).thenReturn(new HostName("host1"));
+ when(service.descriptiveName()).thenReturn("service1");
+ when(service2.hostName()).thenReturn(new HostName("host2"));
+ when(service2.descriptiveName()).thenReturn("service2");
+
+ var reasons = SuspensionReasons.downSince(service, clock.instant(), Duration.ofSeconds(30));
+ reasons.mergeWith(SuspensionReasons.isDown(service2));
+ assertEquals(Optional.of(
+ "host1 suspended because service1 has been down since 2020-09-18T14:51:48Z (30 seconds); " +
+ "host2 suspended because service2 is down"),
+ reasons.makeLogMessage());
+
+ }
+
+} \ No newline at end of file
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
index 0605fcb8a0a..46346e7de7f 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java
@@ -32,6 +32,7 @@ import com.yahoo.vespa.orchestrator.model.ApplicationApiFactory;
import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.Policy;
+import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import com.yahoo.vespa.orchestrator.restapi.wire.BatchOperationResult;
import com.yahoo.vespa.orchestrator.restapi.wire.GetHostResponse;
import com.yahoo.vespa.orchestrator.restapi.wire.PatchHostRequest;
@@ -83,7 +84,7 @@ public class HostResourceTest {
private static final ServiceMonitor serviceMonitor = mock(ServiceMonitor.class);
private static final StatusService EVERY_HOST_IS_UP_HOST_STATUS_SERVICE = new ZkStatusService(
new MockCurator(), mock(Metric.class), new TestTimer(), new DummyAntiServiceMonitor());
- private static final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3);
+ private static final ApplicationApiFactory applicationApiFactory = new ApplicationApiFactory(3, clock);
static {
when(serviceMonitor.getApplication(any(HostName.class)))
@@ -107,7 +108,8 @@ public class HostResourceTest {
private static class AlwaysAllowPolicy implements Policy {
@Override
- public void grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) {
+ public SuspensionReasons grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) {
+ return SuspensionReasons.nothingNoteworthy();
}
@Override
@@ -208,18 +210,18 @@ public class HostResourceTest {
private static class AlwaysFailPolicy implements Policy {
@Override
- public void grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException {
- doThrow();
+ public SuspensionReasons grantSuspensionRequest(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException {
+ throw newHostStateChangeDeniedException();
}
@Override
public void releaseSuspensionGrant(OrchestratorContext context, ApplicationApi application) throws HostStateChangeDeniedException {
- doThrow();
+ throw newHostStateChangeDeniedException();
}
@Override
public void acquirePermissionToRemove(OrchestratorContext context, ApplicationApi applicationApi) throws HostStateChangeDeniedException {
- doThrow();
+ throw newHostStateChangeDeniedException();
}
@Override
@@ -227,11 +229,11 @@ public class HostResourceTest {
OrchestratorContext context, ApplicationInstance applicationInstance,
HostName hostName,
ApplicationLock hostStatusRegistry) throws HostStateChangeDeniedException {
- doThrow();
+ throw newHostStateChangeDeniedException();
}
- private static void doThrow() throws HostStateChangeDeniedException {
- throw new HostStateChangeDeniedException(
+ private static HostStateChangeDeniedException newHostStateChangeDeniedException() {
+ return new HostStateChangeDeniedException(
new HostName("some-host"),
"impossible-policy",
"This policy rejects all requests");