aboutsummaryrefslogtreecommitdiffstats
path: root/orchestrator
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@yahooinc.com>2021-09-23 11:34:34 +0200
committerHåkon Hallingstad <hakon@yahooinc.com>2021-09-23 11:34:34 +0200
commitbf5f2567e351e527d8fe573beee17d18326b90f3 (patch)
tree7119bae07f03bc9d01f8d291a6551ffbb6d539e5 /orchestrator
parent13af00d9970476ed82233cf6592f76e0f163264e (diff)
Avoid unknown service status on suspended host
Avoid throwing unknown-service-status suspension denial, if the service is on a host which is suspended: In that case the status is effectively down.
Diffstat (limited to 'orchestrator')
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java2
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java51
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java3
3 files changed, 27 insertions, 29 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java
index 95d862d9e72..f2eae278150 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java
@@ -3,7 +3,7 @@ package com.yahoo.vespa.orchestrator;
import java.util.Arrays;
-public class OrchestrationException extends Exception {
+public class OrchestrationException extends RuntimeException {
public OrchestrationException(Throwable cause) {
super(cause);
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
index 00cb65a09b0..42f8a187e98 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
@@ -19,7 +19,6 @@ import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
-import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
@@ -41,9 +40,10 @@ class ClusterApiImpl implements ClusterApi {
private final ClusterControllerClientFactory clusterControllerClientFactory;
private final Clock clock;
private final Set<ServiceInstance> servicesInGroup;
- private final Set<ServiceInstance> servicesDownInGroup;
private final Set<ServiceInstance> servicesNotInGroup;
- private final Set<ServiceInstance> servicesDownAndNotInGroup;
+
+ /** Lazily initialized in servicesDownAndNotInGroup(), do not access directly. */
+ private Set<ServiceInstance> servicesDownAndNotInGroup = null;
/*
* There are two sources for the number of config servers in a cluster. The config server config and the node
@@ -81,9 +81,6 @@ class ClusterApiImpl implements ClusterApi {
servicesInGroup = serviceInstancesByLocality.getOrDefault(true, Collections.emptySet());
servicesNotInGroup = serviceInstancesByLocality.getOrDefault(false, Collections.emptySet());
- servicesDownInGroup = servicesInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet());
- servicesDownAndNotInGroup = servicesNotInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet());
-
int serviceInstances = serviceCluster.serviceInstances().size();
if (clusterParams.size().isPresent() && serviceInstances < clusterParams.size().getAsInt()) {
missingServices = clusterParams.size().getAsInt() - serviceInstances;
@@ -156,29 +153,19 @@ class ClusterApiImpl implements ClusterApi {
@Override
public boolean noServicesOutsideGroupIsDown() throws HostStateChangeDeniedException {
- Optional<ServiceInstance> serviceWithUnknownStatus = servicesNotInGroup
- .stream()
- .filter(serviceInstance -> serviceInstance.serviceStatus() == ServiceStatus.UNKNOWN)
- .min(Comparator.comparing(ServiceInstance::descriptiveName));
- if (serviceWithUnknownStatus.isPresent()) {
- throw new HostStateChangeDeniedException(
- nodeGroup,
- HostedVespaPolicy.UNKNOWN_SERVICE_STATUS,
- "Service status of " + serviceWithUnknownStatus.get().descriptiveName() + " is not yet known");
- }
-
- return servicesDownAndNotInGroup.size() + missingServices == 0;
+ return servicesDownAndNotInGroup().size() + missingServices == 0;
}
@Override
public int percentageOfServicesDown() {
- int numberOfServicesDown = servicesDownAndNotInGroup.size() + missingServices + servicesDownInGroup.size();
+ int servicesDownInGroupCount = (int) servicesInGroup.stream().filter(this::serviceEffectivelyDown).count();
+ int numberOfServicesDown = servicesDownAndNotInGroup().size() + missingServices + servicesDownInGroupCount;
return numberOfServicesDown * 100 / (serviceCluster.serviceInstances().size() + missingServices);
}
@Override
public int percentageOfServicesDownIfGroupIsAllowedToBeDown() {
- int numberOfServicesDown = servicesDownAndNotInGroup.size() + missingServices + servicesInGroup.size();
+ int numberOfServicesDown = servicesDownAndNotInGroup().size() + missingServices + servicesInGroup.size();
return numberOfServicesDown * 100 / (serviceCluster.serviceInstances().size() + missingServices);
}
@@ -208,7 +195,7 @@ class ClusterApiImpl implements ClusterApi {
description.append(".");
}
- Set<ServiceInstance> downElsewhere = servicesDownAndNotInGroup.stream()
+ Set<ServiceInstance> downElsewhere = servicesDownAndNotInGroup().stream()
.filter(serviceInstance -> !suspended.contains(serviceInstance.hostName()))
.collect(Collectors.toSet());
@@ -288,19 +275,31 @@ class ClusterApiImpl implements ClusterApi {
return "{ clusterId=" + clusterId() + ", serviceType=" + serviceType() + " }";
}
+ private Set<ServiceInstance> servicesDownAndNotInGroup() {
+ if (servicesDownAndNotInGroup == null) {
+ servicesDownAndNotInGroup = servicesNotInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet());
+ }
+ return servicesDownAndNotInGroup;
+ }
+
private HostStatus hostStatus(HostName hostName) {
return hostInfos.getOrNoRemarks(hostName).status();
}
- private boolean serviceEffectivelyDown(ServiceInstance service) {
+ private boolean serviceEffectivelyDown(ServiceInstance service) throws HostStateChangeDeniedException {
if (hostStatus(service.hostName()).isSuspended()) {
return true;
}
- if (service.serviceStatus() == ServiceStatus.DOWN) {
- return true;
+ switch (service.serviceStatus()) {
+ case DOWN: return true;
+ case UNKNOWN:
+ throw new HostStateChangeDeniedException(
+ nodeGroup,
+ HostedVespaPolicy.UNKNOWN_SERVICE_STATUS,
+ "Service status of " + service.descriptiveName() + " is not yet known");
+ default:
+ return false;
}
-
- return false;
}
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
index 208e12690ff..3a6c24a05e3 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
@@ -58,8 +58,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
return;
}
- int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi)
- .asPercentage();
+ int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage();
if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) {
return;
}