aboutsummaryrefslogtreecommitdiffstats
path: root/orchestrator
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@yahooinc.com>2021-09-20 14:45:16 +0200
committerHåkon Hallingstad <hakon@yahooinc.com>2021-09-20 14:45:16 +0200
commit070a2249baefffc4ffa116728c6094f84148302e (patch)
tree39f4d6a6e090ab42cb9dad143180e6a35de6927c /orchestrator
parent23a050f3ae7755670a1b13e9f26fe8b19fb81042 (diff)
Remove group-permanent-suspension flag
Diffstat (limited to 'orchestrator')
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java178
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java25
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java28
3 files changed, 80 insertions, 151 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
index d183e863500..208e12690ff 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
@@ -4,9 +4,7 @@ package com.yahoo.vespa.orchestrator.policy;
import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.ServiceType;
-import com.yahoo.vespa.flags.BooleanFlag;
import com.yahoo.vespa.flags.FlagSource;
-import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.orchestrator.model.ClusterApi;
import com.yahoo.vespa.orchestrator.model.VespaModelUtil;
@@ -17,12 +15,9 @@ import static com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy.ENOUGH_SERVI
public class HostedVespaClusterPolicy implements ClusterPolicy {
- private final BooleanFlag groupSuspensionInPermanentSuspendFlag;
private final Zone zone;
public HostedVespaClusterPolicy(FlagSource flagSource, Zone zone) {
- // Note that the "group" in this flag refers to hierarchical groups of a content cluster.
- this.groupSuspensionInPermanentSuspendFlag = Flags.GROUP_PERMANENT_SUSPENSION.bindTo(flagSource);
this.zone = zone;
}
@@ -32,7 +27,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
return SuspensionReasons.nothingNoteworthy();
}
- int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi, true).asPercentage();
+ int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage();
if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) {
return SuspensionReasons.nothingNoteworthy();
}
@@ -63,9 +58,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
return;
}
- boolean enableContentGroupSuspension = groupSuspensionInPermanentSuspendFlag.value();
-
- int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi, enableContentGroupSuspension)
+ int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi)
.asPercentage();
if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) {
return;
@@ -81,116 +74,85 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
}
// Non-private for testing purposes
- ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi, boolean enableContentGroupSuspension) {
- if (enableContentGroupSuspension) {
- // Possible service clusters on a node as of 2021-01-22:
- //
- // CLUSTER ID SERVICE TYPE HEALTH ASSOCIATION
- // 1 CCN-controllers container-clustercontrollers Slobrok 1, 3, or 6 in content cluster
- // 2 CCN distributor Slobrok content cluster
- // 3 CCN storagenode Slobrok content cluster
- // 4 CCN searchnode Slobrok content cluster
- // 5 CCN transactionlogserver not checked content cluster
- // 6 JCCN container Slobrok jdisc container cluster
- // 7 admin slobrok not checked 1-3 in jdisc container cluster
- // 8 metrics metricsproxy-container Slobrok application
- // 9 admin logd not checked application
- // 10 admin config-sentinel not checked application
- // 11 admin configproxy not checked application
- // 12 admin logforwarder not checked application
- // 13 controller controller state/v1 controllers
- // 14 zone-config-servers configserver state/v1 config servers
- // 15 controller-host hostadmin state/v1 controller hosts
- // 16 configserver-host hostadmin state/v1 config server hosts
- // 17 tenant-host hostadmin state/v1 tenant hosts
- // 18 proxy-host hostadmin state/v1 proxy hosts
- //
- // CCN refers to the content cluster's name, as specified in services.xml.
- // JCCN refers to the jdisc container cluster's name, as specified in services.xml.
- //
- // For instance a content node will have 2-5 and 8-12 and possibly 1, while a combined
- // cluster node may have all 1-12.
- //
- // The services on a node can be categorized into these main types, ref association column above:
- // A content
- // B container
- // C tenant host
- // D config server
- // E config server host
- // F controller
- // G controller host
- // H proxy (same as B)
- // I proxy host
-
- if (clusterApi.serviceType().equals(ServiceType.CLUSTER_CONTROLLER)) {
- return ConcurrentSuspensionLimitForCluster.ONE_NODE;
- }
-
- if (Set.of(ServiceType.STORAGE, ServiceType.SEARCH, ServiceType.DISTRIBUTOR, ServiceType.TRANSACTION_LOG_SERVER)
- .contains(clusterApi.serviceType())) {
- // Delegate to the cluster controller
- return ConcurrentSuspensionLimitForCluster.ALL_NODES;
- }
-
- if (clusterApi.serviceType().equals(ServiceType.CONTAINER)) {
- return ConcurrentSuspensionLimitForCluster.TEN_PERCENT;
- }
-
- if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) {
- if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) {
- return ConcurrentSuspensionLimitForCluster.ONE_NODE;
- }
-
- return ConcurrentSuspensionLimitForCluster.ALL_NODES;
- } else if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) {
- return ConcurrentSuspensionLimitForCluster.ALL_NODES;
- }
-
- if (Set.of(ServiceType.CONFIG_SERVER, ServiceType.CONTROLLER).contains(clusterApi.serviceType())) {
- return ConcurrentSuspensionLimitForCluster.ONE_NODE;
- }
-
- if (clusterApi.serviceType().equals(ServiceType.HOST_ADMIN)) {
- if (Set.of(ClusterId.CONFIG_SERVER_HOST, ClusterId.CONTROLLER_HOST).contains(clusterApi.clusterId())) {
- return ConcurrentSuspensionLimitForCluster.ONE_NODE;
- }
+ ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi) {
+ // Possible service clusters on a node as of 2021-01-22:
+ //
+ // CLUSTER ID SERVICE TYPE HEALTH ASSOCIATION
+ // 1 CCN-controllers container-clustercontrollers Slobrok 1, 3, or 6 in content cluster
+ // 2 CCN distributor Slobrok content cluster
+ // 3 CCN storagenode Slobrok content cluster
+ // 4 CCN searchnode Slobrok content cluster
+ // 5 CCN transactionlogserver not checked content cluster
+ // 6 JCCN container Slobrok jdisc container cluster
+ // 7 admin slobrok not checked 1-3 in jdisc container cluster
+ // 8 metrics metricsproxy-container Slobrok application
+ // 9 admin logd not checked application
+ // 10 admin config-sentinel not checked application
+ // 11 admin configproxy not checked application
+ // 12 admin logforwarder not checked application
+ // 13 controller controller state/v1 controllers
+ // 14 zone-config-servers configserver state/v1 config servers
+ // 15 controller-host hostadmin state/v1 controller hosts
+ // 16 configserver-host hostadmin state/v1 config server hosts
+ // 17 tenant-host hostadmin state/v1 tenant hosts
+ // 18 proxy-host hostadmin state/v1 proxy hosts
+ //
+ // CCN refers to the content cluster's name, as specified in services.xml.
+ // JCCN refers to the jdisc container cluster's name, as specified in services.xml.
+ //
+ // For instance a content node will have 2-5 and 8-12 and possibly 1, while a combined
+ // cluster node may have all 1-12.
+ //
+ // The services on a node can be categorized into these main types, ref association column above:
+ // A content
+ // B container
+ // C tenant host
+ // D config server
+ // E config server host
+ // F controller
+ // G controller host
+ // H proxy (same as B)
+ // I proxy host
+
+ if (clusterApi.serviceType().equals(ServiceType.CLUSTER_CONTROLLER)) {
+ return ConcurrentSuspensionLimitForCluster.ONE_NODE;
+ }
- return zone.system().isCd()
- ? ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT
- : ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT;
- }
+ if (Set.of(ServiceType.STORAGE, ServiceType.SEARCH, ServiceType.DISTRIBUTOR, ServiceType.TRANSACTION_LOG_SERVER)
+ .contains(clusterApi.serviceType())) {
+ // Delegate to the cluster controller
+ return ConcurrentSuspensionLimitForCluster.ALL_NODES;
+ }
- // The above should cover all cases, but if not we'll return a reasonable default:
+ if (clusterApi.serviceType().equals(ServiceType.CONTAINER)) {
return ConcurrentSuspensionLimitForCluster.TEN_PERCENT;
- } else {
- // TODO: Remove this legacy branch
- if (clusterApi.isStorageCluster()) {
- return ConcurrentSuspensionLimitForCluster.ONE_NODE;
- }
+ }
- if (ServiceType.CLUSTER_CONTROLLER.equals(clusterApi.serviceType())) {
+ if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) {
+ if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) {
return ConcurrentSuspensionLimitForCluster.ONE_NODE;
}
- if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) {
- return ConcurrentSuspensionLimitForCluster.ALL_NODES;
- }
-
- if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) {
- if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) {
- return ConcurrentSuspensionLimitForCluster.ONE_NODE;
- }
+ return ConcurrentSuspensionLimitForCluster.ALL_NODES;
+ } else if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) {
+ return ConcurrentSuspensionLimitForCluster.ALL_NODES;
+ }
- return ConcurrentSuspensionLimitForCluster.ALL_NODES;
- }
+ if (Set.of(ServiceType.CONFIG_SERVER, ServiceType.CONTROLLER).contains(clusterApi.serviceType())) {
+ return ConcurrentSuspensionLimitForCluster.ONE_NODE;
+ }
- if (clusterApi.getApplication().applicationId().equals(VespaModelUtil.TENANT_HOST_APPLICATION_ID)) {
- return zone.system().isCd()
- ? ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT
- : ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT;
+ if (clusterApi.serviceType().equals(ServiceType.HOST_ADMIN)) {
+ if (Set.of(ClusterId.CONFIG_SERVER_HOST, ClusterId.CONTROLLER_HOST).contains(clusterApi.clusterId())) {
+ return ConcurrentSuspensionLimitForCluster.ONE_NODE;
}
- return ConcurrentSuspensionLimitForCluster.TEN_PERCENT;
+ return zone.system().isCd()
+ ? ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT
+ : ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT;
}
+
+ // The above should cover all cases, but if not we'll return a reasonable default:
+ return ConcurrentSuspensionLimitForCluster.TEN_PERCENT;
}
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
index 1e29f0ca5de..da8591c6631 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
@@ -10,7 +10,6 @@ import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceStatusInfo;
import com.yahoo.vespa.applicationmodel.ServiceType;
-import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.orchestrator.OrchestratorUtil;
import com.yahoo.vespa.orchestrator.policy.ClusterParams;
@@ -182,18 +181,6 @@ public class ClusterApiImplTest {
fail();
} catch (HostStateChangeDeniedException e) {
assertThat(e.getMessage(),
- containsString("Changing the state of cfg1 would violate enough-services-up: " +
- "Suspension of service with type 'configserver' not allowed: 33% are suspended already. " +
- "Services down on resumed hosts: [1 missing config server]."));
- }
-
- flagSource.withBooleanFlag(Flags.GROUP_PERMANENT_SUSPENSION.id(), true);
-
- try {
- policy.verifyGroupGoingDownIsFine(clusterApi);
- fail();
- } catch (HostStateChangeDeniedException e) {
- assertThat(e.getMessage(),
containsString("Suspension of service with type 'configserver' not allowed: 33% are suspended already. " +
"Services down on resumed hosts: [1 missing config server]."));
}
@@ -214,18 +201,6 @@ public class ClusterApiImplTest {
fail();
} catch (HostStateChangeDeniedException e) {
assertThat(e.getMessage(),
- containsString("Changing the state of cfg1 would violate enough-services-up: " +
- "Suspension of service with type 'hostadmin' not allowed: 33% are suspended already. " +
- "Services down on resumed hosts: [1 missing config server host]."));
- }
-
- flagSource.withBooleanFlag(Flags.GROUP_PERMANENT_SUSPENSION.id(), true);
-
- try {
- policy.verifyGroupGoingDownIsFine(clusterApi);
- fail();
- } catch (HostStateChangeDeniedException e) {
- assertThat(e.getMessage(),
containsString("Suspension of service with type 'hostadmin' not allowed: 33% are suspended already. " +
"Services down on resumed hosts: [1 missing config server host]."));
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
index 0c3da1656bc..303dabebba8 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
@@ -63,7 +63,7 @@ public class HostedVespaClusterPolicyTest {
when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID);
when(clusterApi.serviceType()).thenReturn(ServiceType.SLOBROK);
assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE,
- policy.getConcurrentSuspensionLimit(clusterApi, false));
+ policy.getConcurrentSuspensionLimit(clusterApi));
}
@Test
@@ -71,46 +71,38 @@ public class HostedVespaClusterPolicyTest {
when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID);
when(clusterApi.serviceType()).thenReturn(new ServiceType("non-slobrok-service-type"));
assertEquals(ConcurrentSuspensionLimitForCluster.ALL_NODES,
- policy.getConcurrentSuspensionLimit(clusterApi, false));
+ policy.getConcurrentSuspensionLimit(clusterApi));
}
@Test
public void testStorageSuspensionLimit() {
when(clusterApi.serviceType()).thenReturn(ServiceType.STORAGE);
when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id"));
- when(clusterApi.isStorageCluster()).thenReturn(true);
assertEquals(ConcurrentSuspensionLimitForCluster.ALL_NODES,
- policy.getConcurrentSuspensionLimit(clusterApi, true));
- }
-
- @Test
- public void testStorageSuspensionLimit_legacy() {
- when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id"));
- when(clusterApi.isStorageCluster()).thenReturn(true);
- assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE,
- policy.getConcurrentSuspensionLimit(clusterApi, false));
+ policy.getConcurrentSuspensionLimit(clusterApi));
}
@Test
public void testTenantHostSuspensionLimit() {
when(applicationApi.applicationId()).thenReturn(VespaModelUtil.TENANT_HOST_APPLICATION_ID);
- when(clusterApi.isStorageCluster()).thenReturn(false);
+ when(clusterApi.clusterId()).thenReturn(ClusterId.TENANT_HOST);
+ when(clusterApi.serviceType()).thenReturn(ServiceType.HOST_ADMIN);
assertEquals(ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT,
- policy.getConcurrentSuspensionLimit(clusterApi, false));
+ policy.getConcurrentSuspensionLimit(clusterApi));
when(zone.system()).thenReturn(SystemName.cd);
assertEquals(ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT,
- policy.getConcurrentSuspensionLimit(clusterApi, false));
+ policy.getConcurrentSuspensionLimit(clusterApi));
}
@Test
public void testDefaultSuspensionLimit() {
when(applicationApi.applicationId()).thenReturn(ApplicationId.fromSerializedForm("a:b:c"));
when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id"));
- when(clusterApi.isStorageCluster()).thenReturn(false);
+ when(clusterApi.serviceType()).thenReturn(new ServiceType("some-service-type"));
assertEquals(ConcurrentSuspensionLimitForCluster.TEN_PERCENT,
- policy.getConcurrentSuspensionLimit(clusterApi, false));
+ policy.getConcurrentSuspensionLimit(clusterApi));
}
@Test
@@ -141,7 +133,7 @@ public class HostedVespaClusterPolicyTest {
when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown);
when(clusterApi.reasonsForNoServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp);
when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20);
- doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi, false);
+ doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi);
when(applicationApi.applicationId()).thenReturn(ApplicationId.fromSerializedForm("a:b:c"));
when(clusterApi.serviceType()).thenReturn(new ServiceType("service-type"));