diff options
author | Håkon Hallingstad <hakon@yahooinc.com> | 2021-09-14 13:33:21 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@yahooinc.com> | 2021-09-14 13:33:21 +0200 |
commit | 19b4e2f9aef020f520adae5b6f3ff0175b3b94ad (patch) | |
tree | 7bb65ff937dfe4e32199914a85e765af362702d1 /orchestrator/src/main/java | |
parent | 6cb447d48d363e736effb6e57b82f0b9193ed077 (diff) |
Deny suspension with special constraint on unknown status
Diffstat (limited to 'orchestrator/src/main/java')
4 files changed, 21 insertions, 11 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java index 78373282df8..fd115702588 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java @@ -1,9 +1,9 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator.model; -import com.yahoo.config.provision.Zone; import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import java.util.Optional; @@ -21,7 +21,7 @@ public interface ClusterApi { /** Returns the reasons no services are up in the implied group, or empty if some services are up. */ Optional<SuspensionReasons> reasonsForNoServicesInGroupIsUp(); - boolean noServicesOutsideGroupIsDown(); + boolean noServicesOutsideGroupIsDown() throws HostStateChangeDeniedException; int percentageOfServicesDown(); int percentageOfServicesDownIfGroupIsAllowedToBeDown(); @@ -30,5 +30,4 @@ public interface ClusterApi { Optional<StorageNode> upStorageNodeInGroup(); String downDescription(); - } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java index b8538079194..00cb65a09b0 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java @@ -9,6 +9,8 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; import com.yahoo.vespa.orchestrator.policy.ClusterParams; +import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy; import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import com.yahoo.vespa.orchestrator.status.HostInfos; import com.yahoo.vespa.orchestrator.status.HostStatus; @@ -17,6 +19,7 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.Map; import java.util.Optional; @@ -126,10 +129,7 @@ class ClusterApiImpl implements ClusterApi { continue; } - if (service.serviceStatus() == ServiceStatus.UNKNOWN) { - reasons.mergeWith(SuspensionReasons.unknownStatus(service)); - continue; - } else if (service.serviceStatus() == ServiceStatus.DOWN) { + if (service.serviceStatus() == ServiceStatus.DOWN) { Optional<Instant> since = service.serviceStatusInfo().since(); if (since.isEmpty()) { reasons.mergeWith(SuspensionReasons.isDown(service)); @@ -155,7 +155,18 @@ class ClusterApiImpl implements ClusterApi { int missingServices() { return missingServices; } @Override - public boolean noServicesOutsideGroupIsDown() { + public boolean noServicesOutsideGroupIsDown() throws HostStateChangeDeniedException { + Optional<ServiceInstance> serviceWithUnknownStatus = servicesNotInGroup + .stream() + .filter(serviceInstance -> serviceInstance.serviceStatus() == ServiceStatus.UNKNOWN) + .min(Comparator.comparing(ServiceInstance::descriptiveName)); + if (serviceWithUnknownStatus.isPresent()) { + throw new HostStateChangeDeniedException( + nodeGroup, + HostedVespaPolicy.UNKNOWN_SERVICE_STATUS, + "Service status of " + serviceWithUnknownStatus.get().descriptiveName() + " is not yet known"); + } + return servicesDownAndNotInGroup.size() + missingServices == 0; } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java index fd5e8c33c1b..d183e863500 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java @@ -56,9 +56,8 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { @Override public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { - // This policy is similar to verifyGroupGoingDownIsFine, except that services being down in the group - // is no excuse to allow suspension (like it is for verifyGroupGoingDownIsFine), since if we grant - // suspension in this case they will permanently be down/removed. + // This policy is similar to verifyGroupGoingDownIsFine, except that having no services up in the group will + // not allow the suspension: We are a bit more cautious when removing nodes. if (clusterApi.noServicesOutsideGroupIsDown()) { return; diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java index d9fc2a989de..ad37100fa16 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java @@ -22,6 +22,7 @@ public class HostedVespaPolicy implements Policy { public static final String APPLICATION_SUSPENDED_CONSTRAINT = "application-suspended"; public static final String ENOUGH_SERVICES_UP_CONSTRAINT = "enough-services-up"; + public static final String UNKNOWN_SERVICE_STATUS = "unknown-service-status"; public static final String SET_NODE_STATE_CONSTRAINT = "controller-set-node-state"; public static final String CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT = "controller-available"; public static final String DEADLINE_CONSTRAINT = "deadline"; |