From 19b4e2f9aef020f520adae5b6f3ff0175b3b94ad Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Tue, 14 Sep 2021 13:33:21 +0200 Subject: Deny suspension with special constraint on unknown status --- .../yahoo/vespa/orchestrator/model/ClusterApi.java | 5 +- .../vespa/orchestrator/model/ClusterApiImpl.java | 21 ++++-- .../policy/HostedVespaClusterPolicy.java | 5 +- .../orchestrator/policy/HostedVespaPolicy.java | 1 + .../orchestrator/model/ClusterApiImplTest.java | 81 +++++++++++++++++++--- .../policy/HostedVespaClusterPolicyTest.java | 34 ++++++--- 6 files changed, 119 insertions(+), 28 deletions(-) (limited to 'orchestrator/src') diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java index 78373282df8..fd115702588 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java @@ -1,9 +1,9 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator.model; -import com.yahoo.config.provision.Zone; import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import java.util.Optional; @@ -21,7 +21,7 @@ public interface ClusterApi { /** Returns the reasons no services are up in the implied group, or empty if some services are up. */ Optional reasonsForNoServicesInGroupIsUp(); - boolean noServicesOutsideGroupIsDown(); + boolean noServicesOutsideGroupIsDown() throws HostStateChangeDeniedException; int percentageOfServicesDown(); int percentageOfServicesDownIfGroupIsAllowedToBeDown(); @@ -30,5 +30,4 @@ public interface ClusterApi { Optional upStorageNodeInGroup(); String downDescription(); - } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java index b8538079194..00cb65a09b0 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java @@ -9,6 +9,8 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; import com.yahoo.vespa.orchestrator.policy.ClusterParams; +import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; +import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy; import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import com.yahoo.vespa.orchestrator.status.HostInfos; import com.yahoo.vespa.orchestrator.status.HostStatus; @@ -17,6 +19,7 @@ import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.Map; import java.util.Optional; @@ -126,10 +129,7 @@ class ClusterApiImpl implements ClusterApi { continue; } - if (service.serviceStatus() == ServiceStatus.UNKNOWN) { - reasons.mergeWith(SuspensionReasons.unknownStatus(service)); - continue; - } else if (service.serviceStatus() == ServiceStatus.DOWN) { + if (service.serviceStatus() == ServiceStatus.DOWN) { Optional since = service.serviceStatusInfo().since(); if (since.isEmpty()) { reasons.mergeWith(SuspensionReasons.isDown(service)); @@ -155,7 +155,18 @@ class ClusterApiImpl implements ClusterApi { int missingServices() { return missingServices; } @Override - public boolean noServicesOutsideGroupIsDown() { + public boolean noServicesOutsideGroupIsDown() throws HostStateChangeDeniedException { + Optional serviceWithUnknownStatus = servicesNotInGroup + .stream() + .filter(serviceInstance -> serviceInstance.serviceStatus() == ServiceStatus.UNKNOWN) + .min(Comparator.comparing(ServiceInstance::descriptiveName)); + if (serviceWithUnknownStatus.isPresent()) { + throw new HostStateChangeDeniedException( + nodeGroup, + HostedVespaPolicy.UNKNOWN_SERVICE_STATUS, + "Service status of " + serviceWithUnknownStatus.get().descriptiveName() + " is not yet known"); + } + return servicesDownAndNotInGroup.size() + missingServices == 0; } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java index fd5e8c33c1b..d183e863500 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java @@ -56,9 +56,8 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { @Override public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { - // This policy is similar to verifyGroupGoingDownIsFine, except that services being down in the group - // is no excuse to allow suspension (like it is for verifyGroupGoingDownIsFine), since if we grant - // suspension in this case they will permanently be down/removed. + // This policy is similar to verifyGroupGoingDownIsFine, except that having no services up in the group will + // not allow the suspension: We are a bit more cautious when removing nodes. if (clusterApi.noServicesOutsideGroupIsDown()) { return; diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java index d9fc2a989de..ad37100fa16 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java @@ -22,6 +22,7 @@ public class HostedVespaPolicy implements Policy { public static final String APPLICATION_SUSPENDED_CONSTRAINT = "application-suspended"; public static final String ENOUGH_SERVICES_UP_CONSTRAINT = "enough-services-up"; + public static final String UNKNOWN_SERVICE_STATUS = "unknown-service-status"; public static final String SET_NODE_STATE_CONSTRAINT = "controller-set-node-state"; public static final String CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT = "controller-available"; public static final String DEADLINE_CONSTRAINT = "deadline"; diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java index 2946c82eab8..1e29f0ca5de 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.orchestrator.OrchestratorUtil; import com.yahoo.vespa.orchestrator.policy.ClusterParams; import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy; +import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy; import com.yahoo.vespa.orchestrator.policy.SuspensionReasons; import com.yahoo.vespa.orchestrator.status.HostInfos; import com.yahoo.vespa.orchestrator.status.HostStatus; @@ -103,13 +104,75 @@ public class ClusterApiImplTest { assertEquals(80, clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()); } + @Test + public void testUnknownServiceStatusInGroup() throws HostStateChangeDeniedException { + ClusterApi clusterApi = makeClusterApiWithUnknownStatus(ServiceStatus.UNKNOWN, ServiceStatus.UP, ServiceStatus.UP); + clusterApi.noServicesOutsideGroupIsDown(); + } + + @Test + public void testUnknownServiceStatusOutsideGroup() { + ClusterApi clusterApi = makeClusterApiWithUnknownStatus(ServiceStatus.UP, ServiceStatus.UNKNOWN, ServiceStatus.UP); + + try { + clusterApi.noServicesOutsideGroupIsDown(); + fail(); + } catch (HostStateChangeDeniedException e) { + assertEquals(HostedVespaPolicy.UNKNOWN_SERVICE_STATUS, e.getConstraintName()); + } + } + + /** The first service status will be for the service IN the node group, the rest are OUTSIDE. */ + private ClusterApi makeClusterApiWithUnknownStatus(ServiceStatus... statuses) { + var serviceStatusList = List.of(statuses); + var infraApplication = new ConfigServerApplication(); + + var hostnames = IntStream.rangeClosed(1, serviceStatusList.size()) + .mapToObj(i -> new HostName("cfg" + i)) + .collect(Collectors.toList()); + var instances = new ArrayList(); + for (int i = 0; i < hostnames.size(); i++) { + instances.add(modelUtils.createServiceInstance("cs" + i + 1, hostnames.get(i), serviceStatusList.get(i))); + } + + ServiceCluster serviceCluster = modelUtils.createServiceCluster( + infraApplication.getClusterId().s(), + infraApplication.getServiceType(), + instances + ); + for (var instance : instances) { + instance.setServiceCluster(serviceCluster); + } + + Set serviceClusterSet = Set.of(serviceCluster); + + ApplicationInstance application = new ApplicationInstance( + infraApplication.getTenantId(), + infraApplication.getApplicationInstanceId(Zone.defaultZone()), + serviceClusterSet); + + serviceCluster.setApplicationInstance(application); + + when(applicationApi.applicationId()).thenReturn(OrchestratorUtil.toApplicationId(application.reference())); + + return new ClusterApiImpl( + applicationApi, + serviceCluster, + new NodeGroup(application, hostnames.get(0)), + modelUtils.getHostInfos(), + modelUtils.getClusterControllerClientFactory(), + new ClusterParams.Builder().setSize(serviceStatusList.size()).build(), + clock); + } + /** Make a ClusterApiImpl for the cfg1 config server, with cfg3 missing from the cluster (not provisioned). */ - private ClusterApiImpl makeCfg1ClusterApi(ServiceStatus cfg1ServiceStatus, ServiceStatus cfg2ServiceStatus) { + private ClusterApiImpl makeCfg1ClusterApi(ServiceStatus cfg1ServiceStatus, ServiceStatus cfg2ServiceStatus) + throws HostStateChangeDeniedException { return makeConfigClusterApi(ModelTestUtils.NUMBER_OF_CONFIG_SERVERS, cfg1ServiceStatus, cfg2ServiceStatus); } @Test - public void testCfg1SuspensionFailsWithMissingCfg3() { + public void testCfg1SuspensionFailsWithMissingCfg3() throws HostStateChangeDeniedException { ClusterApiImpl clusterApi = makeCfg1ClusterApi(ServiceStatus.UP, ServiceStatus.UP); HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource, zone); @@ -137,7 +200,7 @@ public class ClusterApiImplTest { } @Test - public void testCfghost1SuspensionFailsWithMissingCfghost3() { + public void testCfghost1SuspensionFailsWithMissingCfghost3() throws HostStateChangeDeniedException { ClusterApiImpl clusterApi = makeConfigClusterApi( ModelTestUtils.NUMBER_OF_CONFIG_SERVERS, new ConfigServerHostApplication(), @@ -191,7 +254,7 @@ public class ClusterApiImplTest { } @Test - public void testSingleConfigServerCanSuspend() { + public void testSingleConfigServerCanSuspend() throws HostStateChangeDeniedException { for (var status : EnumSet.of(ServiceStatus.UP, ServiceStatus.DOWN, ServiceStatus.UNKNOWN)) { var clusterApi = makeConfigClusterApi(1, status); var policy = new HostedVespaClusterPolicy(flagSource, zone); @@ -204,7 +267,7 @@ public class ClusterApiImplTest { } @Test - public void testNoServices() { + public void testNoServices() throws HostStateChangeDeniedException { HostName hostName1 = new HostName("host1"); HostName hostName2 = new HostName("host2"); HostName hostName3 = new HostName("host3"); @@ -271,7 +334,7 @@ public class ClusterApiImplTest { private void verifyNoServices(ServiceCluster serviceCluster, Optional expectedNoServicesInGroupIsUp, boolean expectedNoServicesOutsideGroupIsDown, - HostName... groupNodes) { + HostName... groupNodes) throws HostStateChangeDeniedException { ClusterApiImpl clusterApi = new ClusterApiImpl( applicationApi, serviceCluster, @@ -318,12 +381,14 @@ public class ClusterApiImplTest { assertEquals(Optional.of(hostName1), clusterApi.storageNodeInGroup().map(storageNode -> storageNode.hostName())); } - private ClusterApiImpl makeConfigClusterApi(int clusterSize, ServiceStatus first, ServiceStatus... rest) { + private ClusterApiImpl makeConfigClusterApi(int clusterSize, ServiceStatus first, ServiceStatus... rest) + throws HostStateChangeDeniedException { return makeConfigClusterApi(clusterSize, new ConfigServerApplication(), first, rest); } private ClusterApiImpl makeConfigClusterApi(int clusterSize, InfraApplication infraApplication, - ServiceStatus first, ServiceStatus... rest) { + ServiceStatus first, ServiceStatus... rest) + throws HostStateChangeDeniedException { var serviceStatusList = new ArrayList(); serviceStatusList.add(first); serviceStatusList.addAll(List.of(rest)); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java index a100b3efc52..0c3da1656bc 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java @@ -20,6 +20,7 @@ import java.util.Optional; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; @@ -36,6 +37,25 @@ public class HostedVespaClusterPolicyTest { public void setUp() { when(clusterApi.getApplication()).thenReturn(applicationApi); when(zone.system()).thenReturn(SystemName.main); + + NodeGroup nodeGroup = mock(NodeGroup.class); + when(clusterApi.getNodeGroup()).thenReturn(nodeGroup); + when(nodeGroup.toCommaSeparatedString()).thenReturn("node-group"); + } + + @Test + public void testUnknownServiceStatus() throws HostStateChangeDeniedException { + doThrow(new HostStateChangeDeniedException(clusterApi.getNodeGroup(), + HostedVespaPolicy.UNKNOWN_SERVICE_STATUS, + "foo")) + .when(clusterApi).noServicesOutsideGroupIsDown(); + + try { + policy.verifyGroupGoingDownIsFine(clusterApi); + fail(); + } catch (HostStateChangeDeniedException e) { + assertEquals(HostedVespaPolicy.UNKNOWN_SERVICE_STATUS, e.getConstraintName()); + } } @Test @@ -94,30 +114,30 @@ public class HostedVespaClusterPolicyTest { } @Test - public void verifyGroupGoingDownIsFine_noServicesOutsideGroupIsDownIsFine() { + public void verifyGroupGoingDownIsFine_noServicesOutsideGroupIsDownIsFine() throws HostStateChangeDeniedException { verifyGroupGoingDownIsFine(true, Optional.empty(), 13, true); } @Test - public void verifyGroupGoingDownIsFine_noServicesInGroupIsUp() { + public void verifyGroupGoingDownIsFine_noServicesInGroupIsUp() throws HostStateChangeDeniedException { var reasons = new SuspensionReasons().addReason(new HostName("host1"), "supension reason 1"); verifyGroupGoingDownIsFine(false, Optional.of(reasons), 13, true); } @Test - public void verifyGroupGoingDownIsFine_percentageIsFine() { + public void verifyGroupGoingDownIsFine_percentageIsFine() throws HostStateChangeDeniedException { verifyGroupGoingDownIsFine(false, Optional.empty(), 9, true); } @Test - public void verifyGroupGoingDownIsFine_fails() { + public void verifyGroupGoingDownIsFine_fails() throws HostStateChangeDeniedException { verifyGroupGoingDownIsFine(false, Optional.empty(), 13, false); } private void verifyGroupGoingDownIsFine(boolean noServicesOutsideGroupIsDown, Optional noServicesInGroupIsUp, int percentageOfServicesDownIfGroupIsAllowedToBeDown, - boolean expectSuccess) { + boolean expectSuccess) throws HostStateChangeDeniedException { when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown); when(clusterApi.reasonsForNoServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp); when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20); @@ -129,10 +149,6 @@ public class HostedVespaClusterPolicyTest { when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(percentageOfServicesDownIfGroupIsAllowedToBeDown); when(clusterApi.downDescription()).thenReturn(" Down description"); - NodeGroup nodeGroup = mock(NodeGroup.class); - when(clusterApi.getNodeGroup()).thenReturn(nodeGroup); - when(nodeGroup.toCommaSeparatedString()).thenReturn("node-group"); - try { SuspensionReasons reasons = policy.verifyGroupGoingDownIsFine(clusterApi); if (!expectSuccess) { -- cgit v1.2.3