diff options
author | HÃ¥kon Hallingstad <hakon@verizonmedia.com> | 2021-01-25 17:10:29 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-25 17:10:29 +0100 |
commit | 7542e391e3468802e4633f4fee7379a1a36a7b87 (patch) | |
tree | 3766df00ffcf92461528357caed8d3d809ec50f9 | |
parent | 134a556ff6a42b72ebb970c1c417a9f55cf96c8f (diff) | |
parent | ae26f1b0c6b901a35a64fe7acfea5795cd6a3509 (diff) |
Merge pull request #16168 from vespa-engine/hakonhall/support-delegating-content-node-suspension-to-cluster-controller
Support delegating content node suspension to cluster controller
11 files changed, 212 insertions, 77 deletions
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java index f01d3c3b3fb..92bdd0f21a0 100644 --- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java +++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java @@ -16,6 +16,12 @@ public class ServiceType { public static final ServiceType HOST_ADMIN = new ServiceType("hostadmin"); public static final ServiceType CONFIG_SERVER = new ServiceType("configserver"); public static final ServiceType CONTROLLER = new ServiceType("controller"); + public static final ServiceType TRANSACTION_LOG_SERVER = new ServiceType("transactionlogserver"); + public static final ServiceType CLUSTER_CONTROLLER = new ServiceType("container-clustercontroller"); + public static final ServiceType DISTRIBUTOR = new ServiceType("distributor"); + public static final ServiceType SEARCH = new ServiceType("searchnode"); + public static final ServiceType STORAGE = new ServiceType("storagenode"); + public static final ServiceType METRICS_PROXY = new ServiceType("metricsproxy-container"); private final String id; diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java index 7c2deeda69b..c75850ee95d 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java @@ -285,6 +285,13 @@ public class Flags { "Takes effect on next internal redeployment", APPLICATION_ID); + public static final UnboundBooleanFlag GROUP_SUSPENSION = defineFeatureFlag( + "group-suspension", false, + List.of("hakon"), "2021-01-22", "2021-03-22", + "Allow all content nodes in a hierarchical group to suspend at the same time", + "Takes effect on the next suspension request to the Orchestrator.", + APPLICATION_ID); + public static final UnboundBooleanFlag RECONFIGURABLE_ZOOKEEPER_SERVER_FOR_CLUSTER_CONTROLLER = defineFeatureFlag( "reconfigurable-zookeeper-server-for-cluster-controller", false, List.of("musum", "mpolden"), "2020-12-16", "2021-02-16", diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java index 630427f49a8..35a8e3578b4 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java @@ -71,7 +71,7 @@ public class OrchestratorImpl implements Orchestrator { ConfigserverConfig configServerConfig, FlagSource flagSource) { - this(new HostedVespaPolicy(new HostedVespaClusterPolicy(), + this(new HostedVespaPolicy(new HostedVespaClusterPolicy(flagSource), clusterControllerClientFactory, new ApplicationApiFactory(configServerConfig.zookeeperserver().size(), Clock.systemUTC())), clusterControllerClientFactory, diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java index c53d35c16dc..3ee72769cf3 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java @@ -37,19 +37,12 @@ public class VespaModelUtil { public static final ClusterId ADMIN_CLUSTER_ID = new ClusterId("admin"); - public static final ServiceType SLOBROK_SERVICE_TYPE = new ServiceType("slobrok"); - public static final ServiceType CLUSTER_CONTROLLER_SERVICE_TYPE = new ServiceType("container-clustercontroller"); - public static final ServiceType DISTRIBUTOR_SERVICE_TYPE = new ServiceType("distributor"); - public static final ServiceType SEARCHNODE_SERVICE_TYPE = new ServiceType("searchnode"); - public static final ServiceType STORAGENODE_SERVICE_TYPE = new ServiceType("storagenode"); - public static final ServiceType METRICS_PROXY_SERVICE_TYPE = new ServiceType("metricsproxy-container"); - private static final Comparator<ServiceInstance> CLUSTER_CONTROLLER_INDEX_COMPARATOR = Comparator.comparing(serviceInstance -> VespaModelUtil.getClusterControllerIndex(serviceInstance.configId())); // @return true iff the service cluster refers to a cluster controller service cluster. public static boolean isClusterController(ServiceCluster cluster) { - return CLUSTER_CONTROLLER_SERVICE_TYPE.equals(cluster.serviceType()); + return ServiceType.CLUSTER_CONTROLLER.equals(cluster.serviceType()); } /** @@ -59,19 +52,25 @@ public class VespaModelUtil { * @return true iff the service cluster consists of storage nodes (proton or vds). */ public static boolean isStorage(ServiceCluster cluster) { - return STORAGENODE_SERVICE_TYPE.equals(cluster.serviceType()); + return ServiceType.STORAGE.equals(cluster.serviceType()); } /** * @return true iff the service cluster is a content service cluster. */ public static boolean isContent(ServiceCluster cluster) { - return DISTRIBUTOR_SERVICE_TYPE.equals(cluster.serviceType()) || - SEARCHNODE_SERVICE_TYPE.equals(cluster.serviceType()) || - STORAGENODE_SERVICE_TYPE.equals(cluster.serviceType()); + return isContent(cluster.serviceType()); } /** + * @return true iff the service type refers to a content service cluster. + */ + public static boolean isContent(ServiceType serviceType) { + return ServiceType.DISTRIBUTOR.equals(serviceType) || + ServiceType.SEARCH.equals(serviceType) || + ServiceType.STORAGE.equals(serviceType); + } + /** * @return The set of all Cluster Controller service instances for the application. */ public static List<HostName> getClusterControllerInstancesInOrder(ApplicationInstance application, diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java index 4e11961cb1b..d3e7afba25f 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java @@ -1,22 +1,39 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.orchestrator.policy; +import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.flags.BooleanFlag; +import com.yahoo.vespa.flags.FetchVector; +import com.yahoo.vespa.flags.FlagSource; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.orchestrator.model.ClusterApi; import com.yahoo.vespa.orchestrator.model.VespaModelUtil; import java.util.Optional; +import java.util.Set; import static com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT; public class HostedVespaClusterPolicy implements ClusterPolicy { + private final BooleanFlag groupSuspensionFlag; + + public HostedVespaClusterPolicy(FlagSource flagSource) { + // Note that the "group" in this flag refers to hierarchical groups of a content cluster. + this.groupSuspensionFlag = Flags.GROUP_SUSPENSION.bindTo(flagSource); + } + @Override public SuspensionReasons verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException { + boolean enableContentGroupSuspension = groupSuspensionFlag + .with(FetchVector.Dimension.APPLICATION_ID, clusterApi.getApplication().applicationId().serializedForm()) + .value(); + if (clusterApi.noServicesOutsideGroupIsDown()) { return SuspensionReasons.nothingNoteworthy(); } - int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage(); + int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi, enableContentGroupSuspension).asPercentage(); if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) { return SuspensionReasons.nothingNoteworthy(); } @@ -26,14 +43,16 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { return suspensionReasons.get(); } - throw new HostStateChangeDeniedException( - clusterApi.getNodeGroup(), - ENOUGH_SERVICES_UP_CONSTRAINT, - "Suspension for service type " + clusterApi.serviceType() - + " would increase from " + clusterApi.percentageOfServicesDown() - + "% to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() - + "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%." - + clusterApi.downDescription()); + String message = percentageOfServicesAllowedToBeDown <= 0 + ? "Suspension of service with type '" + clusterApi.serviceType() + "' not allowed: " + + clusterApi.percentageOfServicesDown() + "% are suspended already." + clusterApi.downDescription() + : "Suspension of service with type '" + clusterApi.serviceType() + + "' would increase from " + clusterApi.percentageOfServicesDown() + + "% to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() + + "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%." + + clusterApi.downDescription(); + + throw new HostStateChangeDeniedException(clusterApi.getNodeGroup(), ENOUGH_SERVICES_UP_CONSTRAINT, message); } @Override @@ -47,7 +66,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { return; } - int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage(); + int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi, false).asPercentage(); if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) { return; } @@ -62,32 +81,110 @@ public class HostedVespaClusterPolicy implements ClusterPolicy { } // Non-private for testing purposes - ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi) { - if (clusterApi.isStorageCluster()) { - return ConcurrentSuspensionLimitForCluster.ONE_NODE; - } + ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi, boolean enableContentGroupSuspension) { + if (enableContentGroupSuspension) { + // Possible service clusters on a node as of 2021-01-22: + // + // CLUSTER ID SERVICE TYPE HEALTH ASSOCIATION + // 1 CCN-controllers container-clustercontrollers Slobrok 1, 3, or 6 in content cluster + // 2 CCN distributor Slobrok content cluster + // 3 CCN storagenode Slobrok content cluster + // 4 CCN searchnode Slobrok content cluster + // 5 CCN transactionlogserver not checked content cluster + // 6 JCCN container Slobrok jdisc container cluster + // 7 admin slobrok not checked 1-3 in jdisc container cluster + // 8 metrics metricsproxy-container Slobrok application + // 9 admin logd not checked application + // 10 admin config-sentinel not checked application + // 11 admin configproxy not checked application + // 12 admin logforwarder not checked application + // 13 controller controller state/v1 controllers + // 14 zone-config-servers configserver state/v1 config servers + // 15 controller-host hostadmin state/v1 controller hosts + // 16 configserver-host hostadmin state/v1 config server hosts + // 17 tenant-host hostadmin state/v1 tenant hosts + // 18 proxy-host hostadmin state/v1 proxy hosts + // + // CCN refers to the content cluster's name, as specified in services.xml. + // JCCN refers to the jdisc container cluster's name, as specified in services.xml. + // + // For instance a content node will have 2-5 and 8-12 and possibly 1, while a combined + // cluster node may have all 1-12. + // + // The services on a node can be categorized into these main types, ref association column above: + // A content + // B container + // C tenant host + // D config server + // E config server host + // F controller + // G controller host + // H proxy (same as B) + // I proxy host + + if (clusterApi.serviceType().equals(ServiceType.CLUSTER_CONTROLLER)) { + // All nodes have all state and we need to be able to remove the half that are retired on cluster migration + return ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT; + } - if (VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE.equals(clusterApi.serviceType())) { - // All nodes have all state and we need to be able to remove the half that are retired on cluster migration - return ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT; - } + if (Set.of(ServiceType.STORAGE, ServiceType.SEARCH, ServiceType.DISTRIBUTOR, ServiceType.TRANSACTION_LOG_SERVER) + .contains(clusterApi.serviceType())) { + // Delegate to the cluster controller + return ConcurrentSuspensionLimitForCluster.ALL_NODES; + } - if (VespaModelUtil.METRICS_PROXY_SERVICE_TYPE.equals(clusterApi.serviceType())) { - return ConcurrentSuspensionLimitForCluster.ALL_NODES; - } + if (clusterApi.serviceType().equals(ServiceType.CONTAINER)) { + return ConcurrentSuspensionLimitForCluster.TEN_PERCENT; + } + + if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) { + if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) { + return ConcurrentSuspensionLimitForCluster.ONE_NODE; + } - if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) { - if (VespaModelUtil.SLOBROK_SERVICE_TYPE.equals(clusterApi.serviceType())) { + return ConcurrentSuspensionLimitForCluster.ALL_NODES; + } else if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) { + return ConcurrentSuspensionLimitForCluster.ALL_NODES; + } + + if (Set.of(ServiceType.CONFIG_SERVER, ServiceType.CONTROLLER).contains(clusterApi.serviceType())) { return ConcurrentSuspensionLimitForCluster.ONE_NODE; } - return ConcurrentSuspensionLimitForCluster.ALL_NODES; - } + if (clusterApi.serviceType().equals(ServiceType.HOST_ADMIN)) { + return ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT; + } - if (clusterApi.getApplication().applicationId().equals(VespaModelUtil.TENANT_HOST_APPLICATION_ID)) { - return ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT; - } + // The above should cover all cases, but if not we'll return a reasonable default: + return ConcurrentSuspensionLimitForCluster.TEN_PERCENT; + } else { + // TODO: Remove this legacy branch + if (clusterApi.isStorageCluster()) { + return ConcurrentSuspensionLimitForCluster.ONE_NODE; + } + + if (ServiceType.CLUSTER_CONTROLLER.equals(clusterApi.serviceType())) { + // All nodes have all state and we need to be able to remove the half that are retired on cluster migration + return ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT; + } - return ConcurrentSuspensionLimitForCluster.TEN_PERCENT; + if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) { + return ConcurrentSuspensionLimitForCluster.ALL_NODES; + } + + if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) { + if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) { + return ConcurrentSuspensionLimitForCluster.ONE_NODE; + } + + return ConcurrentSuspensionLimitForCluster.ALL_NODES; + } + + if (clusterApi.getApplication().applicationId().equals(VespaModelUtil.TENANT_HOST_APPLICATION_ID)) { + return ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT; + } + + return ConcurrentSuspensionLimitForCluster.TEN_PERCENT; + } } } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java index 3580b305c2a..22aa578cb55 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java @@ -101,7 +101,7 @@ public class OrchestratorImplTest { app2 = OrchestratorUtil.toApplicationId(iterator.next().reference()); clustercontroller = new ClusterControllerClientFactoryMock(); - orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clustercontroller, applicationApiFactory), + orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(flagSource), clustercontroller, applicationApiFactory), clustercontroller, statusService, new DummyServiceMonitor(), @@ -433,7 +433,7 @@ public class OrchestratorImplTest { ServiceMonitor serviceMonitor = () -> new ServiceModel(Map.of(reference, applicationInstance)); - orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clusterControllerClientFactory, applicationApiFactory), + orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(flagSource), clusterControllerClientFactory, applicationApiFactory), clusterControllerClientFactory, statusService, serviceMonitor, diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java index 9a013689224..314c21f5aae 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java @@ -95,7 +95,7 @@ public class ApplicationApiImplTest { modelUtils.createApplicationInstance(Arrays.asList( modelUtils.createServiceCluster( "cluster-3", - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, Arrays.asList( modelUtils.createServiceInstance("config-id-30", hostName1, ServiceStatus.UP), modelUtils.createServiceInstance("config-id-31", hostName2, ServiceStatus.UP) @@ -103,7 +103,7 @@ public class ApplicationApiImplTest { ), modelUtils.createServiceCluster( "cluster-1", - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, Arrays.asList( modelUtils.createServiceInstance("config-id-10", hostName3, ServiceStatus.DOWN), modelUtils.createServiceInstance("config-id-11", hostName4, ServiceStatus.UP) @@ -121,7 +121,7 @@ public class ApplicationApiImplTest { ), modelUtils.createServiceCluster( "cluster-2", - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, Arrays.asList( modelUtils.createServiceInstance("config-id-20", hostName6, ServiceStatus.DOWN), modelUtils.createServiceInstance("config-id-21", hostName7, ServiceStatus.UP) @@ -176,7 +176,7 @@ public class ApplicationApiImplTest { modelUtils.createApplicationInstance(Arrays.asList( modelUtils.createServiceCluster( "cluster-1", - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, Arrays.asList(modelUtils.createServiceInstance("config-id-1", hostName1, serviceStatus)) ) )); @@ -269,7 +269,7 @@ public class ApplicationApiImplTest { modelUtils.createApplicationInstance(Arrays.asList( modelUtils.createServiceCluster( "cluster-4", - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, Arrays.asList( modelUtils.createServiceInstance("config-id-40", allowedToBeDownHost1, ServiceStatus.UP), modelUtils.createServiceInstance("config-id-41", noRemarksHost2, ServiceStatus.DOWN) @@ -285,7 +285,7 @@ public class ApplicationApiImplTest { ), modelUtils.createServiceCluster( "cluster-3", - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, Arrays.asList( modelUtils.createServiceInstance("config-id-30", allowedToBeDownHost4, ServiceStatus.UP), modelUtils.createServiceInstance("config-id-31", noRemarksHost5, ServiceStatus.UP) @@ -293,7 +293,7 @@ public class ApplicationApiImplTest { ), modelUtils.createServiceCluster( "cluster-2", - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, Arrays.asList( modelUtils.createServiceInstance("config-id-20", noRemarksHost6, ServiceStatus.UP), modelUtils.createServiceInstance("config-id-21", allowedToBeDownHost7, ServiceStatus.UP) diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java index 14d35902738..bc621bfc6aa 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java @@ -12,6 +12,8 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.applicationmodel.ServiceStatusInfo; import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; +import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.orchestrator.OrchestratorUtil; import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException; import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy; @@ -49,6 +51,7 @@ public class ClusterApiImplTest { private final ApplicationApi applicationApi = mock(ApplicationApi.class); private final ModelTestUtils modelUtils = new ModelTestUtils(); private final ManualClock clock = new ManualClock(Instant.ofEpochSecond(1600436659)); + private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); @Test public void testServicesDownAndNotInGroup() { @@ -103,7 +106,7 @@ public class ClusterApiImplTest { public void testCfg1SuspensionFailsWithMissingCfg3() { ClusterApiImpl clusterApi = makeCfg1ClusterApi(ServiceStatus.UP, ServiceStatus.UP); - HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(); + HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource); try { policy.verifyGroupGoingDownIsFine(clusterApi); @@ -111,16 +114,27 @@ public class ClusterApiImplTest { } catch (HostStateChangeDeniedException e) { assertThat(e.getMessage(), containsString("Changing the state of cfg1 would violate enough-services-up: " + - "Suspension for service type configserver would increase from 33% to 66%, " + + "Suspension of service with type 'configserver' would increase from 33% to 66%, " + "over the limit of 10%. Services down on resumed hosts: [1 missing config server].")); } + + flagSource.withBooleanFlag(Flags.GROUP_SUSPENSION.id(), true); + + try { + policy.verifyGroupGoingDownIsFine(clusterApi); + fail(); + } catch (HostStateChangeDeniedException e) { + assertThat(e.getMessage(), + containsString("Suspension of service with type 'configserver' not allowed: 33% are suspended already. " + + "Services down on resumed hosts: [1 missing config server].")); + } } @Test public void testCfg1SuspendsIfDownWithMissingCfg3() throws HostStateChangeDeniedException { ClusterApiImpl clusterApi = makeCfg1ClusterApi(ServiceStatus.DOWN, ServiceStatus.UP); - HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(); + HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource); policy.verifyGroupGoingDownIsFine(clusterApi); } @@ -129,7 +143,7 @@ public class ClusterApiImplTest { public void testSingleConfigServerCanSuspend() { for (var status : EnumSet.of(ServiceStatus.UP, ServiceStatus.DOWN)) { var clusterApi = makeConfigClusterApi(1, status); - var policy = new HostedVespaClusterPolicy(); + var policy = new HostedVespaClusterPolicy(flagSource); try { policy.verifyGroupGoingDownIsFine(clusterApi); } catch (HostStateChangeDeniedException e) { @@ -227,7 +241,7 @@ public class ClusterApiImplTest { ServiceCluster serviceCluster = modelUtils.createServiceCluster( "cluster", - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, Arrays.asList( modelUtils.createServiceInstance("storage-1", hostName1, ServiceStatus.UP), modelUtils.createServiceInstance("storage-2", hostName2, ServiceStatus.DOWN) diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java index 3f79aa3a098..0f71def5d2e 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java @@ -65,7 +65,7 @@ class ModelTestUtils { mock(Metric.class), new TestTimer(), new DummyAntiServiceMonitor()); - private final Orchestrator orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clusterControllerClientFactory, applicationApiFactory()), + private final Orchestrator orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(flagSource), clusterControllerClientFactory, applicationApiFactory()), clusterControllerClientFactory, statusService, serviceMonitor, diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java index 537101e6c51..d4199da3a74 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java @@ -49,7 +49,7 @@ public class VespaModelUtilTest { private static final ServiceCluster controllerCluster = new ServiceCluster( new ClusterId(CONTENT_CLUSTER_ID.s() + "-controller"), - VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE, + ServiceType.CLUSTER_CONTROLLER, makeServiceInstanceSet(controller1, controller0)); // Distributor Service Cluster @@ -63,7 +63,7 @@ public class VespaModelUtilTest { private static final ServiceCluster distributorCluster = new ServiceCluster( CONTENT_CLUSTER_ID, - VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE, + ServiceType.DISTRIBUTOR, makeServiceInstanceSet(distributor0)); // Storage Node Service Cluster @@ -77,7 +77,7 @@ public class VespaModelUtilTest { private static final ServiceCluster storageCluster = new ServiceCluster( CONTENT_CLUSTER_ID, - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, makeServiceInstanceSet(storage0)); // Secondary Distributor Service Cluster @@ -91,7 +91,7 @@ public class VespaModelUtilTest { private static final ServiceCluster secondaryDistributorCluster = new ServiceCluster( SECONDARY_CONTENT_CLUSTER_ID, - VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE, + ServiceType.DISTRIBUTOR, makeServiceInstanceSet(secondaryDistributor0)); // Secondary Storage Node Service Cluster @@ -105,7 +105,7 @@ public class VespaModelUtilTest { private static final ServiceCluster secondaryStorageCluster = new ServiceCluster( SECONDARY_CONTENT_CLUSTER_ID, - VespaModelUtil.STORAGENODE_SERVICE_TYPE, + ServiceType.STORAGE, makeServiceInstanceSet(secondaryStorage0)); // The Application Instance @@ -130,7 +130,7 @@ public class VespaModelUtilTest { @Test public void verifyControllerClusterIsRecognized() { - ServiceCluster cluster = createServiceCluster(VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE); + ServiceCluster cluster = createServiceCluster(ServiceType.CLUSTER_CONTROLLER); assertTrue(VespaModelUtil.isClusterController(cluster)); } @@ -142,9 +142,9 @@ public class VespaModelUtilTest { @Test public void verifyStorageClusterIsRecognized() { - ServiceCluster cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE); + ServiceCluster cluster = createServiceCluster(ServiceType.STORAGE); assertTrue(VespaModelUtil.isStorage(cluster)); - cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE); + cluster = createServiceCluster(ServiceType.STORAGE); assertTrue(VespaModelUtil.isStorage(cluster)); } @@ -156,11 +156,11 @@ public class VespaModelUtilTest { @Test public void verifyContentClusterIsRecognized() { - ServiceCluster cluster = createServiceCluster(VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE); + ServiceCluster cluster = createServiceCluster(ServiceType.DISTRIBUTOR); assertTrue(VespaModelUtil.isContent(cluster)); - cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE); + cluster = createServiceCluster(ServiceType.STORAGE); assertTrue(VespaModelUtil.isContent(cluster)); - cluster = createServiceCluster(VespaModelUtil.SEARCHNODE_SERVICE_TYPE); + cluster = createServiceCluster(ServiceType.SEARCH); assertTrue(VespaModelUtil.isContent(cluster)); } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java index e5be87ba043..97c1c7ab5de 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java @@ -5,6 +5,7 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.vespa.applicationmodel.ClusterId; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.applicationmodel.ServiceType; +import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.orchestrator.model.ApplicationApi; import com.yahoo.vespa.orchestrator.model.ClusterApi; import com.yahoo.vespa.orchestrator.model.NodeGroup; @@ -24,7 +25,8 @@ import static org.mockito.Mockito.when; public class HostedVespaClusterPolicyTest { private ApplicationApi applicationApi = mock(ApplicationApi.class); private ClusterApi clusterApi = mock(ClusterApi.class); - private HostedVespaClusterPolicy policy = spy(new HostedVespaClusterPolicy()); + private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); + private HostedVespaClusterPolicy policy = spy(new HostedVespaClusterPolicy(flagSource)); @Before public void setUp() { @@ -34,9 +36,9 @@ public class HostedVespaClusterPolicyTest { @Test public void testSlobrokSuspensionLimit() { when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID); - when(clusterApi.serviceType()).thenReturn(VespaModelUtil.SLOBROK_SERVICE_TYPE); + when(clusterApi.serviceType()).thenReturn(ServiceType.SLOBROK); assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE, - policy.getConcurrentSuspensionLimit(clusterApi)); + policy.getConcurrentSuspensionLimit(clusterApi, false)); } @Test @@ -44,15 +46,24 @@ public class HostedVespaClusterPolicyTest { when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID); when(clusterApi.serviceType()).thenReturn(new ServiceType("non-slobrok-service-type")); assertEquals(ConcurrentSuspensionLimitForCluster.ALL_NODES, - policy.getConcurrentSuspensionLimit(clusterApi)); + policy.getConcurrentSuspensionLimit(clusterApi, false)); } @Test public void testStorageSuspensionLimit() { + when(clusterApi.serviceType()).thenReturn(ServiceType.STORAGE); + when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id")); + when(clusterApi.isStorageCluster()).thenReturn(true); + assertEquals(ConcurrentSuspensionLimitForCluster.ALL_NODES, + policy.getConcurrentSuspensionLimit(clusterApi, true)); + } + + @Test + public void testStorageSuspensionLimit_legacy() { when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id")); when(clusterApi.isStorageCluster()).thenReturn(true); assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE, - policy.getConcurrentSuspensionLimit(clusterApi)); + policy.getConcurrentSuspensionLimit(clusterApi, false)); } @Test @@ -60,7 +71,7 @@ public class HostedVespaClusterPolicyTest { when(applicationApi.applicationId()).thenReturn(VespaModelUtil.TENANT_HOST_APPLICATION_ID); when(clusterApi.isStorageCluster()).thenReturn(false); assertEquals(ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT, - policy.getConcurrentSuspensionLimit(clusterApi)); + policy.getConcurrentSuspensionLimit(clusterApi, false)); } @Test @@ -69,7 +80,7 @@ public class HostedVespaClusterPolicyTest { when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id")); when(clusterApi.isStorageCluster()).thenReturn(false); assertEquals(ConcurrentSuspensionLimitForCluster.TEN_PERCENT, - policy.getConcurrentSuspensionLimit(clusterApi)); + policy.getConcurrentSuspensionLimit(clusterApi, false)); } @Test @@ -100,8 +111,9 @@ public class HostedVespaClusterPolicyTest { when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown); when(clusterApi.reasonsForNoServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp); when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20); - doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi); + doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi, false); + when(applicationApi.applicationId()).thenReturn(ApplicationId.fromSerializedForm("a:b:c")); when(clusterApi.serviceType()).thenReturn(new ServiceType("service-type")); when(clusterApi.percentageOfServicesDown()).thenReturn(5); when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(percentageOfServicesDownIfGroupIsAllowedToBeDown); @@ -123,7 +135,7 @@ public class HostedVespaClusterPolicyTest { } catch (HostStateChangeDeniedException e) { if (!expectSuccess) { assertEquals("Changing the state of node-group would violate enough-services-up: " + - "Suspension for service type service-type would increase from 5% to 13%, " + + "Suspension of service with type 'service-type' would increase from 5% to 13%, " + "over the limit of 10%. Down description", e.getMessage()); assertEquals("enough-services-up", e.getConstraintName()); } |