diff options
author | Håkon Hallingstad <hakon@yahooinc.com> | 2021-09-20 14:45:16 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@yahooinc.com> | 2021-09-20 14:45:16 +0200 |
commit | 070a2249baefffc4ffa116728c6094f84148302e (patch) | |
tree | 39f4d6a6e090ab42cb9dad143180e6a35de6927c /orchestrator | |
parent | 23a050f3ae7755670a1b13e9f26fe8b19fb81042 (diff) |
Remove group-permanent-suspension flag
Diffstat (limited to 'orchestrator')
3 files changed, 80 insertions, 151 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java index d183e863500..208e12690ff 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java @@ -4,9 +4,7 @@ package com.yahoo.vespa.orchestrator.policy; import com.yahoo.config.provision.Zone; import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.ServiceType; -import com.yahoo.vespa.flags.BooleanFlag; import com.yahoo.vespa.flags.FlagSource; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.orchestrator.model.ClusterApi; import com.yahoo.vespa.orchestrator.model.VespaModelUtil; @@ -17,12 +15,9 @@ import static com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy.ENOUGH_SERVI public class HostedVespaClusterPolicy implements ClusterPolicy { - private final BooleanFlag groupSuspensionInPermanentSuspendFlag; private final Zone zone; public HostedVespaClusterPolicy(FlagSource flagSource, Zone zone) { - // Note that the "group" in this flag refers to hierarchical groups of a content cluster. - this.groupSuspensionInPermanentSuspendFlag = Flags.GROUP_PERMANENT_SUSPENSION.bindTo(flagSource); this.zone = zone; } @@ -32,7 +27,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { return SuspensionReasons.nothingNoteworthy(); } - int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi, true).asPercentage(); + int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage(); if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) { return SuspensionReasons.nothingNoteworthy(); } @@ -63,9 +58,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { return; } - boolean enableContentGroupSuspension = groupSuspensionInPermanentSuspendFlag.value(); - - int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi, enableContentGroupSuspension) + int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi) .asPercentage(); if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) { return; @@ -81,116 +74,85 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { } // Non-private for testing purposes - ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi, boolean enableContentGroupSuspension) { - if (enableContentGroupSuspension) { - // Possible service clusters on a node as of 2021-01-22: - // - // CLUSTER ID SERVICE TYPE HEALTH ASSOCIATION - // 1 CCN-controllers container-clustercontrollers Slobrok 1, 3, or 6 in content cluster - // 2 CCN distributor Slobrok content cluster - // 3 CCN storagenode Slobrok content cluster - // 4 CCN searchnode Slobrok content cluster - // 5 CCN transactionlogserver not checked content cluster - // 6 JCCN container Slobrok jdisc container cluster - // 7 admin slobrok not checked 1-3 in jdisc container cluster - // 8 metrics metricsproxy-container Slobrok application - // 9 admin logd not checked application - // 10 admin config-sentinel not checked application - // 11 admin configproxy not checked application - // 12 admin logforwarder not checked application - // 13 controller controller state/v1 controllers - // 14 zone-config-servers configserver state/v1 config servers - // 15 controller-host hostadmin state/v1 controller hosts - // 16 configserver-host hostadmin state/v1 config server hosts - // 17 tenant-host hostadmin state/v1 tenant hosts - // 18 proxy-host hostadmin state/v1 proxy hosts - // - // CCN refers to the content cluster's name, as specified in services.xml. - // JCCN refers to the jdisc container cluster's name, as specified in services.xml. - // - // For instance a content node will have 2-5 and 8-12 and possibly 1, while a combined - // cluster node may have all 1-12. - // - // The services on a node can be categorized into these main types, ref association column above: - // A content - // B container - // C tenant host - // D config server - // E config server host - // F controller - // G controller host - // H proxy (same as B) - // I proxy host - - if (clusterApi.serviceType().equals(ServiceType.CLUSTER_CONTROLLER)) { - return ConcurrentSuspensionLimitForCluster.ONE_NODE; - } - - if (Set.of(ServiceType.STORAGE, ServiceType.SEARCH, ServiceType.DISTRIBUTOR, ServiceType.TRANSACTION_LOG_SERVER) - .contains(clusterApi.serviceType())) { - // Delegate to the cluster controller - return ConcurrentSuspensionLimitForCluster.ALL_NODES; - } - - if (clusterApi.serviceType().equals(ServiceType.CONTAINER)) { - return ConcurrentSuspensionLimitForCluster.TEN_PERCENT; - } - - if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) { - if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) { - return ConcurrentSuspensionLimitForCluster.ONE_NODE; - } - - return ConcurrentSuspensionLimitForCluster.ALL_NODES; - } else if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) { - return ConcurrentSuspensionLimitForCluster.ALL_NODES; - } - - if (Set.of(ServiceType.CONFIG_SERVER, ServiceType.CONTROLLER).contains(clusterApi.serviceType())) { - return ConcurrentSuspensionLimitForCluster.ONE_NODE; - } - - if (clusterApi.serviceType().equals(ServiceType.HOST_ADMIN)) { - if (Set.of(ClusterId.CONFIG_SERVER_HOST, ClusterId.CONTROLLER_HOST).contains(clusterApi.clusterId())) { - return ConcurrentSuspensionLimitForCluster.ONE_NODE; - } + ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi) { + // Possible service clusters on a node as of 2021-01-22: + // + // CLUSTER ID SERVICE TYPE HEALTH ASSOCIATION + // 1 CCN-controllers container-clustercontrollers Slobrok 1, 3, or 6 in content cluster + // 2 CCN distributor Slobrok content cluster + // 3 CCN storagenode Slobrok content cluster + // 4 CCN searchnode Slobrok content cluster + // 5 CCN transactionlogserver not checked content cluster + // 6 JCCN container Slobrok jdisc container cluster + // 7 admin slobrok not checked 1-3 in jdisc container cluster + // 8 metrics metricsproxy-container Slobrok application + // 9 admin logd not checked application + // 10 admin config-sentinel not checked application + // 11 admin configproxy not checked application + // 12 admin logforwarder not checked application + // 13 controller controller state/v1 controllers + // 14 zone-config-servers configserver state/v1 config servers + // 15 controller-host hostadmin state/v1 controller hosts + // 16 configserver-host hostadmin state/v1 config server hosts + // 17 tenant-host hostadmin state/v1 tenant hosts + // 18 proxy-host hostadmin state/v1 proxy hosts + // + // CCN refers to the content cluster's name, as specified in services.xml. + // JCCN refers to the jdisc container cluster's name, as specified in services.xml. + // + // For instance a content node will have 2-5 and 8-12 and possibly 1, while a combined + // cluster node may have all 1-12. + // + // The services on a node can be categorized into these main types, ref association column above: + // A content + // B container + // C tenant host + // D config server + // E config server host + // F controller + // G controller host + // H proxy (same as B) + // I proxy host + + if (clusterApi.serviceType().equals(ServiceType.CLUSTER_CONTROLLER)) { + return ConcurrentSuspensionLimitForCluster.ONE_NODE; + } - return zone.system().isCd() - ? ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT - : ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT; - } + if (Set.of(ServiceType.STORAGE, ServiceType.SEARCH, ServiceType.DISTRIBUTOR, ServiceType.TRANSACTION_LOG_SERVER) + .contains(clusterApi.serviceType())) { + // Delegate to the cluster controller + return ConcurrentSuspensionLimitForCluster.ALL_NODES; + } - // The above should cover all cases, but if not we'll return a reasonable default: + if (clusterApi.serviceType().equals(ServiceType.CONTAINER)) { return ConcurrentSuspensionLimitForCluster.TEN_PERCENT; - } else { - // TODO: Remove this legacy branch - if (clusterApi.isStorageCluster()) { - return ConcurrentSuspensionLimitForCluster.ONE_NODE; - } + } - if (ServiceType.CLUSTER_CONTROLLER.equals(clusterApi.serviceType())) { + if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) { + if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) { return ConcurrentSuspensionLimitForCluster.ONE_NODE; } - if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) { - return ConcurrentSuspensionLimitForCluster.ALL_NODES; - } - - if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) { - if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) { - return ConcurrentSuspensionLimitForCluster.ONE_NODE; - } + return ConcurrentSuspensionLimitForCluster.ALL_NODES; + } else if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) { + return ConcurrentSuspensionLimitForCluster.ALL_NODES; + } - return ConcurrentSuspensionLimitForCluster.ALL_NODES; - } + if (Set.of(ServiceType.CONFIG_SERVER, ServiceType.CONTROLLER).contains(clusterApi.serviceType())) { + return ConcurrentSuspensionLimitForCluster.ONE_NODE; + } - if (clusterApi.getApplication().applicationId().equals(VespaModelUtil.TENANT_HOST_APPLICATION_ID)) { - return zone.system().isCd() - ? ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT - : ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT; + if (clusterApi.serviceType().equals(ServiceType.HOST_ADMIN)) { + if (Set.of(ClusterId.CONFIG_SERVER_HOST, ClusterId.CONTROLLER_HOST).contains(clusterApi.clusterId())) { + return ConcurrentSuspensionLimitForCluster.ONE_NODE; } - return ConcurrentSuspensionLimitForCluster.TEN_PERCENT; + return zone.system().isCd() + ? ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT + : ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT; } + + // The above should cover all cases, but if not we'll return a reasonable default: + return ConcurrentSuspensionLimitForCluster.TEN_PERCENT; } } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java index 1e29f0ca5de..da8591c6631 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java @@ -10,7 +10,6 @@ import com.yahoo.vespa.applicationmodel.ServiceInstance; import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceStatusInfo; import com.yahoo.vespa.applicationmodel.ServiceType; -import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.orchestrator.OrchestratorUtil; import com.yahoo.vespa.orchestrator.policy.ClusterParams; @@ -182,18 +181,6 @@ public class ClusterApiImplTest { fail(); } catch (HostStateChangeDeniedException e) { assertThat(e.getMessage(), - containsString("Changing the state of cfg1 would violate enough-services-up: " + - "Suspension of service with type 'configserver' not allowed: 33% are suspended already. " + - "Services down on resumed hosts: [1 missing config server].")); - } - - flagSource.withBooleanFlag(Flags.GROUP_PERMANENT_SUSPENSION.id(), true); - - try { - policy.verifyGroupGoingDownIsFine(clusterApi); - fail(); - } catch (HostStateChangeDeniedException e) { - assertThat(e.getMessage(), containsString("Suspension of service with type 'configserver' not allowed: 33% are suspended already. " + "Services down on resumed hosts: [1 missing config server].")); } @@ -214,18 +201,6 @@ public class ClusterApiImplTest { fail(); } catch (HostStateChangeDeniedException e) { assertThat(e.getMessage(), - containsString("Changing the state of cfg1 would violate enough-services-up: " + - "Suspension of service with type 'hostadmin' not allowed: 33% are suspended already. " + - "Services down on resumed hosts: [1 missing config server host].")); - } - - flagSource.withBooleanFlag(Flags.GROUP_PERMANENT_SUSPENSION.id(), true); - - try { - policy.verifyGroupGoingDownIsFine(clusterApi); - fail(); - } catch (HostStateChangeDeniedException e) { - assertThat(e.getMessage(), containsString("Suspension of service with type 'hostadmin' not allowed: 33% are suspended already. " + "Services down on resumed hosts: [1 missing config server host].")); } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java index 0c3da1656bc..303dabebba8 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java @@ -63,7 +63,7 @@ public class HostedVespaClusterPolicyTest { when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID); when(clusterApi.serviceType()).thenReturn(ServiceType.SLOBROK); assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE, - policy.getConcurrentSuspensionLimit(clusterApi, false)); + policy.getConcurrentSuspensionLimit(clusterApi)); } @Test @@ -71,46 +71,38 @@ public class HostedVespaClusterPolicyTest { when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID); when(clusterApi.serviceType()).thenReturn(new ServiceType("non-slobrok-service-type")); assertEquals(ConcurrentSuspensionLimitForCluster.ALL_NODES, - policy.getConcurrentSuspensionLimit(clusterApi, false)); + policy.getConcurrentSuspensionLimit(clusterApi)); } @Test public void testStorageSuspensionLimit() { when(clusterApi.serviceType()).thenReturn(ServiceType.STORAGE); when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id")); - when(clusterApi.isStorageCluster()).thenReturn(true); assertEquals(ConcurrentSuspensionLimitForCluster.ALL_NODES, - policy.getConcurrentSuspensionLimit(clusterApi, true)); - } - - @Test - public void testStorageSuspensionLimit_legacy() { - when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id")); - when(clusterApi.isStorageCluster()).thenReturn(true); - assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE, - policy.getConcurrentSuspensionLimit(clusterApi, false)); + policy.getConcurrentSuspensionLimit(clusterApi)); } @Test public void testTenantHostSuspensionLimit() { when(applicationApi.applicationId()).thenReturn(VespaModelUtil.TENANT_HOST_APPLICATION_ID); - when(clusterApi.isStorageCluster()).thenReturn(false); + when(clusterApi.clusterId()).thenReturn(ClusterId.TENANT_HOST); + when(clusterApi.serviceType()).thenReturn(ServiceType.HOST_ADMIN); assertEquals(ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT, - policy.getConcurrentSuspensionLimit(clusterApi, false)); + policy.getConcurrentSuspensionLimit(clusterApi)); when(zone.system()).thenReturn(SystemName.cd); assertEquals(ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT, - policy.getConcurrentSuspensionLimit(clusterApi, false)); + policy.getConcurrentSuspensionLimit(clusterApi)); } @Test public void testDefaultSuspensionLimit() { when(applicationApi.applicationId()).thenReturn(ApplicationId.fromSerializedForm("a:b:c")); when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id")); - when(clusterApi.isStorageCluster()).thenReturn(false); + when(clusterApi.serviceType()).thenReturn(new ServiceType("some-service-type")); assertEquals(ConcurrentSuspensionLimitForCluster.TEN_PERCENT, - policy.getConcurrentSuspensionLimit(clusterApi, false)); + policy.getConcurrentSuspensionLimit(clusterApi)); } @Test @@ -141,7 +133,7 @@ public class HostedVespaClusterPolicyTest { when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown); when(clusterApi.reasonsForNoServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp); when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20); - doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi, false); + doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi); when(applicationApi.applicationId()).thenReturn(ApplicationId.fromSerializedForm("a:b:c")); when(clusterApi.serviceType()).thenReturn(new ServiceType("service-type")); |