diff options
author | Håkon Hallingstad <hakon@yahooinc.com> | 2021-09-23 11:34:34 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@yahooinc.com> | 2021-09-23 11:34:34 +0200 |
commit | bf5f2567e351e527d8fe573beee17d18326b90f3 (patch) | |
tree | 7119bae07f03bc9d01f8d291a6551ffbb6d539e5 /orchestrator | |
parent | 13af00d9970476ed82233cf6592f76e0f163264e (diff) |
Avoid unknown service status on suspended host
Avoid throwing unknown-service-status suspension denial, if the service is on a
host which is suspended: In that case the status is effectively down.
Diffstat (limited to 'orchestrator')
3 files changed, 27 insertions, 29 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java index 95d862d9e72..f2eae278150 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestrationException.java @@ -3,7 +3,7 @@ package com.yahoo.vespa.orchestrator; import java.util.Arrays; -public class OrchestrationException extends Exception { +public class OrchestrationException extends RuntimeException { public OrchestrationException(Throwable cause) { super(cause); diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java index 00cb65a09b0..42f8a187e98 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java @@ -19,7 +19,6 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.Collections; -import java.util.Comparator; import java.util.HashSet; import java.util.Map; import java.util.Optional; @@ -41,9 +40,10 @@ class ClusterApiImpl implements ClusterApi { private final ClusterControllerClientFactory clusterControllerClientFactory; private final Clock clock; private final Set<ServiceInstance> servicesInGroup; - private final Set<ServiceInstance> servicesDownInGroup; private final Set<ServiceInstance> servicesNotInGroup; - private final Set<ServiceInstance> servicesDownAndNotInGroup; + + /** Lazily initialized in servicesDownAndNotInGroup(), do not access directly. */ + private Set<ServiceInstance> servicesDownAndNotInGroup = null; /* * There are two sources for the number of config servers in a cluster. The config server config and the node @@ -81,9 +81,6 @@ class ClusterApiImpl implements ClusterApi { servicesInGroup = serviceInstancesByLocality.getOrDefault(true, Collections.emptySet()); servicesNotInGroup = serviceInstancesByLocality.getOrDefault(false, Collections.emptySet()); - servicesDownInGroup = servicesInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet()); - servicesDownAndNotInGroup = servicesNotInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet()); - int serviceInstances = serviceCluster.serviceInstances().size(); if (clusterParams.size().isPresent() && serviceInstances < clusterParams.size().getAsInt()) { missingServices = clusterParams.size().getAsInt() - serviceInstances; @@ -156,29 +153,19 @@ class ClusterApiImpl implements ClusterApi { @Override public boolean noServicesOutsideGroupIsDown() throws HostStateChangeDeniedException { - Optional<ServiceInstance> serviceWithUnknownStatus = servicesNotInGroup - .stream() - .filter(serviceInstance -> serviceInstance.serviceStatus() == ServiceStatus.UNKNOWN) - .min(Comparator.comparing(ServiceInstance::descriptiveName)); - if (serviceWithUnknownStatus.isPresent()) { - throw new HostStateChangeDeniedException( - nodeGroup, - HostedVespaPolicy.UNKNOWN_SERVICE_STATUS, - "Service status of " + serviceWithUnknownStatus.get().descriptiveName() + " is not yet known"); - } - - return servicesDownAndNotInGroup.size() + missingServices == 0; + return servicesDownAndNotInGroup().size() + missingServices == 0; } @Override public int percentageOfServicesDown() { - int numberOfServicesDown = servicesDownAndNotInGroup.size() + missingServices + servicesDownInGroup.size(); + int servicesDownInGroupCount = (int) servicesInGroup.stream().filter(this::serviceEffectivelyDown).count(); + int numberOfServicesDown = servicesDownAndNotInGroup().size() + missingServices + servicesDownInGroupCount; return numberOfServicesDown * 100 / (serviceCluster.serviceInstances().size() + missingServices); } @Override public int percentageOfServicesDownIfGroupIsAllowedToBeDown() { - int numberOfServicesDown = servicesDownAndNotInGroup.size() + missingServices + servicesInGroup.size(); + int numberOfServicesDown = servicesDownAndNotInGroup().size() + missingServices + servicesInGroup.size(); return numberOfServicesDown * 100 / (serviceCluster.serviceInstances().size() + missingServices); } @@ -208,7 +195,7 @@ class ClusterApiImpl implements ClusterApi { description.append("."); } - Set<ServiceInstance> downElsewhere = servicesDownAndNotInGroup.stream() + Set<ServiceInstance> downElsewhere = servicesDownAndNotInGroup().stream() .filter(serviceInstance -> !suspended.contains(serviceInstance.hostName())) .collect(Collectors.toSet()); @@ -288,19 +275,31 @@ class ClusterApiImpl implements ClusterApi { return "{ clusterId=" + clusterId() + ", serviceType=" + serviceType() + " }"; } + private Set<ServiceInstance> servicesDownAndNotInGroup() { + if (servicesDownAndNotInGroup == null) { + servicesDownAndNotInGroup = servicesNotInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet()); + } + return servicesDownAndNotInGroup; + } + private HostStatus hostStatus(HostName hostName) { return hostInfos.getOrNoRemarks(hostName).status(); } - private boolean serviceEffectivelyDown(ServiceInstance service) { + private boolean serviceEffectivelyDown(ServiceInstance service) throws HostStateChangeDeniedException { if (hostStatus(service.hostName()).isSuspended()) { return true; } - if (service.serviceStatus() == ServiceStatus.DOWN) { - return true; + switch (service.serviceStatus()) { + case DOWN: return true; + case UNKNOWN: + throw new HostStateChangeDeniedException( + nodeGroup, + HostedVespaPolicy.UNKNOWN_SERVICE_STATUS, + "Service status of " + service.descriptiveName() + " is not yet known"); + default: + return false; } - - return false; } } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java index 208e12690ff..3a6c24a05e3 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java @@ -58,8 +58,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { return; } - int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi) - .asPercentage(); + int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage(); if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) { return; } |