summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2021-01-22 15:54:23 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2021-01-22 15:54:23 +0100
commit27e316ebbcea4bbee612d40166e7a0f7a007acc7 (patch)
treeb50f9c9647adc9c27b41895e0b9212a58b5f0832
parent8612ccf19dbfdcaec328a1743142ded1ab85274f (diff)
Support delegating content node suspension to cluster controller
This PR introduces a new flag group-suspension, which if true, enables: - Instead of allowing at most one storagenode to suspend at any given time, it will now ignore storagenode, searchnode, and distributor service clusters, and rely on the cluster controller to allow or deny the request to suspend. This will increase the load on the cluster controllers. Combined with earlier changes to the cluster controller, this new flag effectively guard the feature of allowing all nodes within a hierarchical group to suspend concurrently. I also took the opportunity to tune related policies: - Allow at most one config server and controller to be down at any given time. This is actually a no-op, since it was effectivelly equal to the older policy of 10% down. - Allows 20% of all host-admins to be down, not just tenant host-admins. This is effectively equal to the old policy of 10% except that it may allow 2 proxy host-admins to go down at the same time. Should be fine.
-rw-r--r--application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java6
-rw-r--r--flags/src/main/java/com/yahoo/vespa/flags/Flags.java7
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java2
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java23
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java155
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java4
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java14
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java24
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java2
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java22
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java29
11 files changed, 211 insertions, 77 deletions
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java
index f01d3c3b3fb..92bdd0f21a0 100644
--- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java
+++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceType.java
@@ -16,6 +16,12 @@ public class ServiceType {
public static final ServiceType HOST_ADMIN = new ServiceType("hostadmin");
public static final ServiceType CONFIG_SERVER = new ServiceType("configserver");
public static final ServiceType CONTROLLER = new ServiceType("controller");
+ public static final ServiceType TRANSACTION_LOG_SERVER = new ServiceType("transactionlogserver");
+ public static final ServiceType CLUSTER_CONTROLLER = new ServiceType("container-clustercontroller");
+ public static final ServiceType DISTRIBUTOR = new ServiceType("distributor");
+ public static final ServiceType SEARCH = new ServiceType("searchnode");
+ public static final ServiceType STORAGE = new ServiceType("storagenode");
+ public static final ServiceType METRICS_PROXY = new ServiceType("metricsproxy-container");
private final String id;
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
index 6f7970e1357..df1a341eefa 100644
--- a/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
+++ b/flags/src/main/java/com/yahoo/vespa/flags/Flags.java
@@ -278,6 +278,13 @@ public class Flags {
"Takes effect on next internal redeployment",
APPLICATION_ID);
+ public static final UnboundBooleanFlag GROUP_SUSPENSION = defineFeatureFlag(
+ "group-suspension", false,
+ List.of("hakon"), "2021-01-22", "2021-03-22",
+ "Allow all content nodes in a hierarchical group to suspend at the same time",
+ "Takes effect on the next suspension request to the Orchestrator.",
+ APPLICATION_ID);
+
public static final UnboundBooleanFlag RECONFIGURABLE_ZOOKEEPER_SERVER_FOR_CLUSTER_CONTROLLER = defineFeatureFlag(
"reconfigurable-zookeeper-server-for-cluster-controller", false,
List.of("musum", "mpolden"), "2020-12-16", "2021-02-16",
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
index 630427f49a8..35a8e3578b4 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java
@@ -71,7 +71,7 @@ public class OrchestratorImpl implements Orchestrator {
ConfigserverConfig configServerConfig,
FlagSource flagSource)
{
- this(new HostedVespaPolicy(new HostedVespaClusterPolicy(),
+ this(new HostedVespaPolicy(new HostedVespaClusterPolicy(flagSource),
clusterControllerClientFactory,
new ApplicationApiFactory(configServerConfig.zookeeperserver().size(), Clock.systemUTC())),
clusterControllerClientFactory,
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java
index c53d35c16dc..3ee72769cf3 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java
@@ -37,19 +37,12 @@ public class VespaModelUtil {
public static final ClusterId ADMIN_CLUSTER_ID = new ClusterId("admin");
- public static final ServiceType SLOBROK_SERVICE_TYPE = new ServiceType("slobrok");
- public static final ServiceType CLUSTER_CONTROLLER_SERVICE_TYPE = new ServiceType("container-clustercontroller");
- public static final ServiceType DISTRIBUTOR_SERVICE_TYPE = new ServiceType("distributor");
- public static final ServiceType SEARCHNODE_SERVICE_TYPE = new ServiceType("searchnode");
- public static final ServiceType STORAGENODE_SERVICE_TYPE = new ServiceType("storagenode");
- public static final ServiceType METRICS_PROXY_SERVICE_TYPE = new ServiceType("metricsproxy-container");
-
private static final Comparator<ServiceInstance> CLUSTER_CONTROLLER_INDEX_COMPARATOR =
Comparator.comparing(serviceInstance -> VespaModelUtil.getClusterControllerIndex(serviceInstance.configId()));
// @return true iff the service cluster refers to a cluster controller service cluster.
public static boolean isClusterController(ServiceCluster cluster) {
- return CLUSTER_CONTROLLER_SERVICE_TYPE.equals(cluster.serviceType());
+ return ServiceType.CLUSTER_CONTROLLER.equals(cluster.serviceType());
}
/**
@@ -59,19 +52,25 @@ public class VespaModelUtil {
* @return true iff the service cluster consists of storage nodes (proton or vds).
*/
public static boolean isStorage(ServiceCluster cluster) {
- return STORAGENODE_SERVICE_TYPE.equals(cluster.serviceType());
+ return ServiceType.STORAGE.equals(cluster.serviceType());
}
/**
* @return true iff the service cluster is a content service cluster.
*/
public static boolean isContent(ServiceCluster cluster) {
- return DISTRIBUTOR_SERVICE_TYPE.equals(cluster.serviceType()) ||
- SEARCHNODE_SERVICE_TYPE.equals(cluster.serviceType()) ||
- STORAGENODE_SERVICE_TYPE.equals(cluster.serviceType());
+ return isContent(cluster.serviceType());
}
/**
+ * @return true iff the service type refers to a content service cluster.
+ */
+ public static boolean isContent(ServiceType serviceType) {
+ return ServiceType.DISTRIBUTOR.equals(serviceType) ||
+ ServiceType.SEARCH.equals(serviceType) ||
+ ServiceType.STORAGE.equals(serviceType);
+ }
+ /**
* @return The set of all Cluster Controller service instances for the application.
*/
public static List<HostName> getClusterControllerInstancesInOrder(ApplicationInstance application,
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
index 4e11961cb1b..d3e7afba25f 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
@@ -1,22 +1,39 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.policy;
+import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.flags.BooleanFlag;
+import com.yahoo.vespa.flags.FetchVector;
+import com.yahoo.vespa.flags.FlagSource;
+import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.orchestrator.model.ClusterApi;
import com.yahoo.vespa.orchestrator.model.VespaModelUtil;
import java.util.Optional;
+import java.util.Set;
import static com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy.ENOUGH_SERVICES_UP_CONSTRAINT;
public class HostedVespaClusterPolicy implements ClusterPolicy {
+ private final BooleanFlag groupSuspensionFlag;
+
+ public HostedVespaClusterPolicy(FlagSource flagSource) {
+ // Note that the "group" in this flag refers to hierarchical groups of a content cluster.
+ this.groupSuspensionFlag = Flags.GROUP_SUSPENSION.bindTo(flagSource);
+ }
+
@Override
public SuspensionReasons verifyGroupGoingDownIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException {
+ boolean enableContentGroupSuspension = groupSuspensionFlag
+ .with(FetchVector.Dimension.APPLICATION_ID, clusterApi.getApplication().applicationId().serializedForm())
+ .value();
+
if (clusterApi.noServicesOutsideGroupIsDown()) {
return SuspensionReasons.nothingNoteworthy();
}
- int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage();
+ int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi, enableContentGroupSuspension).asPercentage();
if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) {
return SuspensionReasons.nothingNoteworthy();
}
@@ -26,14 +43,16 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
return suspensionReasons.get();
}
- throw new HostStateChangeDeniedException(
- clusterApi.getNodeGroup(),
- ENOUGH_SERVICES_UP_CONSTRAINT,
- "Suspension for service type " + clusterApi.serviceType()
- + " would increase from " + clusterApi.percentageOfServicesDown()
- + "% to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()
- + "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%."
- + clusterApi.downDescription());
+ String message = percentageOfServicesAllowedToBeDown <= 0
+ ? "Suspension of service with type '" + clusterApi.serviceType() + "' not allowed: "
+ + clusterApi.percentageOfServicesDown() + "% are suspended already." + clusterApi.downDescription()
+ : "Suspension of service with type '" + clusterApi.serviceType()
+ + "' would increase from " + clusterApi.percentageOfServicesDown()
+ + "% to " + clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()
+ + "%, over the limit of " + percentageOfServicesAllowedToBeDown + "%."
+ + clusterApi.downDescription();
+
+ throw new HostStateChangeDeniedException(clusterApi.getNodeGroup(), ENOUGH_SERVICES_UP_CONSTRAINT, message);
}
@Override
@@ -47,7 +66,7 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
return;
}
- int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi).asPercentage();
+ int percentageOfServicesAllowedToBeDown = getConcurrentSuspensionLimit(clusterApi, false).asPercentage();
if (clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown() <= percentageOfServicesAllowedToBeDown) {
return;
}
@@ -62,32 +81,110 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
}
// Non-private for testing purposes
- ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi) {
- if (clusterApi.isStorageCluster()) {
- return ConcurrentSuspensionLimitForCluster.ONE_NODE;
- }
+ ConcurrentSuspensionLimitForCluster getConcurrentSuspensionLimit(ClusterApi clusterApi, boolean enableContentGroupSuspension) {
+ if (enableContentGroupSuspension) {
+ // Possible service clusters on a node as of 2021-01-22:
+ //
+ // CLUSTER ID SERVICE TYPE HEALTH ASSOCIATION
+ // 1 CCN-controllers container-clustercontrollers Slobrok 1, 3, or 6 in content cluster
+ // 2 CCN distributor Slobrok content cluster
+ // 3 CCN storagenode Slobrok content cluster
+ // 4 CCN searchnode Slobrok content cluster
+ // 5 CCN transactionlogserver not checked content cluster
+ // 6 JCCN container Slobrok jdisc container cluster
+ // 7 admin slobrok not checked 1-3 in jdisc container cluster
+ // 8 metrics metricsproxy-container Slobrok application
+ // 9 admin logd not checked application
+ // 10 admin config-sentinel not checked application
+ // 11 admin configproxy not checked application
+ // 12 admin logforwarder not checked application
+ // 13 controller controller state/v1 controllers
+ // 14 zone-config-servers configserver state/v1 config servers
+ // 15 controller-host hostadmin state/v1 controller hosts
+ // 16 configserver-host hostadmin state/v1 config server hosts
+ // 17 tenant-host hostadmin state/v1 tenant hosts
+ // 18 proxy-host hostadmin state/v1 proxy hosts
+ //
+ // CCN refers to the content cluster's name, as specified in services.xml.
+ // JCCN refers to the jdisc container cluster's name, as specified in services.xml.
+ //
+ // For instance a content node will have 2-5 and 8-12 and possibly 1, while a combined
+ // cluster node may have all 1-12.
+ //
+ // The services on a node can be categorized into these main types, ref association column above:
+ // A content
+ // B container
+ // C tenant host
+ // D config server
+ // E config server host
+ // F controller
+ // G controller host
+ // H proxy (same as B)
+ // I proxy host
+
+ if (clusterApi.serviceType().equals(ServiceType.CLUSTER_CONTROLLER)) {
+ // All nodes have all state and we need to be able to remove the half that are retired on cluster migration
+ return ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT;
+ }
- if (VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE.equals(clusterApi.serviceType())) {
- // All nodes have all state and we need to be able to remove the half that are retired on cluster migration
- return ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT;
- }
+ if (Set.of(ServiceType.STORAGE, ServiceType.SEARCH, ServiceType.DISTRIBUTOR, ServiceType.TRANSACTION_LOG_SERVER)
+ .contains(clusterApi.serviceType())) {
+ // Delegate to the cluster controller
+ return ConcurrentSuspensionLimitForCluster.ALL_NODES;
+ }
- if (VespaModelUtil.METRICS_PROXY_SERVICE_TYPE.equals(clusterApi.serviceType())) {
- return ConcurrentSuspensionLimitForCluster.ALL_NODES;
- }
+ if (clusterApi.serviceType().equals(ServiceType.CONTAINER)) {
+ return ConcurrentSuspensionLimitForCluster.TEN_PERCENT;
+ }
+
+ if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) {
+ if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) {
+ return ConcurrentSuspensionLimitForCluster.ONE_NODE;
+ }
- if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) {
- if (VespaModelUtil.SLOBROK_SERVICE_TYPE.equals(clusterApi.serviceType())) {
+ return ConcurrentSuspensionLimitForCluster.ALL_NODES;
+ } else if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) {
+ return ConcurrentSuspensionLimitForCluster.ALL_NODES;
+ }
+
+ if (Set.of(ServiceType.CONFIG_SERVER, ServiceType.CONTROLLER).contains(clusterApi.serviceType())) {
return ConcurrentSuspensionLimitForCluster.ONE_NODE;
}
- return ConcurrentSuspensionLimitForCluster.ALL_NODES;
- }
+ if (clusterApi.serviceType().equals(ServiceType.HOST_ADMIN)) {
+ return ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT;
+ }
- if (clusterApi.getApplication().applicationId().equals(VespaModelUtil.TENANT_HOST_APPLICATION_ID)) {
- return ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT;
- }
+ // The above should cover all cases, but if not we'll return a reasonable default:
+ return ConcurrentSuspensionLimitForCluster.TEN_PERCENT;
+ } else {
+ // TODO: Remove this legacy branch
+ if (clusterApi.isStorageCluster()) {
+ return ConcurrentSuspensionLimitForCluster.ONE_NODE;
+ }
+
+ if (ServiceType.CLUSTER_CONTROLLER.equals(clusterApi.serviceType())) {
+ // All nodes have all state and we need to be able to remove the half that are retired on cluster migration
+ return ConcurrentSuspensionLimitForCluster.FIFTY_PERCENT;
+ }
- return ConcurrentSuspensionLimitForCluster.TEN_PERCENT;
+ if (ServiceType.METRICS_PROXY.equals(clusterApi.serviceType())) {
+ return ConcurrentSuspensionLimitForCluster.ALL_NODES;
+ }
+
+ if (VespaModelUtil.ADMIN_CLUSTER_ID.equals(clusterApi.clusterId())) {
+ if (ServiceType.SLOBROK.equals(clusterApi.serviceType())) {
+ return ConcurrentSuspensionLimitForCluster.ONE_NODE;
+ }
+
+ return ConcurrentSuspensionLimitForCluster.ALL_NODES;
+ }
+
+ if (clusterApi.getApplication().applicationId().equals(VespaModelUtil.TENANT_HOST_APPLICATION_ID)) {
+ return ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT;
+ }
+
+ return ConcurrentSuspensionLimitForCluster.TEN_PERCENT;
+ }
}
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
index 3580b305c2a..22aa578cb55 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java
@@ -101,7 +101,7 @@ public class OrchestratorImplTest {
app2 = OrchestratorUtil.toApplicationId(iterator.next().reference());
clustercontroller = new ClusterControllerClientFactoryMock();
- orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clustercontroller, applicationApiFactory),
+ orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(flagSource), clustercontroller, applicationApiFactory),
clustercontroller,
statusService,
new DummyServiceMonitor(),
@@ -433,7 +433,7 @@ public class OrchestratorImplTest {
ServiceMonitor serviceMonitor = () -> new ServiceModel(Map.of(reference, applicationInstance));
- orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clusterControllerClientFactory, applicationApiFactory),
+ orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(flagSource), clusterControllerClientFactory, applicationApiFactory),
clusterControllerClientFactory,
statusService,
serviceMonitor,
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java
index 9a013689224..314c21f5aae 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java
@@ -95,7 +95,7 @@ public class ApplicationApiImplTest {
modelUtils.createApplicationInstance(Arrays.asList(
modelUtils.createServiceCluster(
"cluster-3",
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
Arrays.asList(
modelUtils.createServiceInstance("config-id-30", hostName1, ServiceStatus.UP),
modelUtils.createServiceInstance("config-id-31", hostName2, ServiceStatus.UP)
@@ -103,7 +103,7 @@ public class ApplicationApiImplTest {
),
modelUtils.createServiceCluster(
"cluster-1",
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
Arrays.asList(
modelUtils.createServiceInstance("config-id-10", hostName3, ServiceStatus.DOWN),
modelUtils.createServiceInstance("config-id-11", hostName4, ServiceStatus.UP)
@@ -121,7 +121,7 @@ public class ApplicationApiImplTest {
),
modelUtils.createServiceCluster(
"cluster-2",
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
Arrays.asList(
modelUtils.createServiceInstance("config-id-20", hostName6, ServiceStatus.DOWN),
modelUtils.createServiceInstance("config-id-21", hostName7, ServiceStatus.UP)
@@ -176,7 +176,7 @@ public class ApplicationApiImplTest {
modelUtils.createApplicationInstance(Arrays.asList(
modelUtils.createServiceCluster(
"cluster-1",
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
Arrays.asList(modelUtils.createServiceInstance("config-id-1", hostName1, serviceStatus))
)
));
@@ -269,7 +269,7 @@ public class ApplicationApiImplTest {
modelUtils.createApplicationInstance(Arrays.asList(
modelUtils.createServiceCluster(
"cluster-4",
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
Arrays.asList(
modelUtils.createServiceInstance("config-id-40", allowedToBeDownHost1, ServiceStatus.UP),
modelUtils.createServiceInstance("config-id-41", noRemarksHost2, ServiceStatus.DOWN)
@@ -285,7 +285,7 @@ public class ApplicationApiImplTest {
),
modelUtils.createServiceCluster(
"cluster-3",
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
Arrays.asList(
modelUtils.createServiceInstance("config-id-30", allowedToBeDownHost4, ServiceStatus.UP),
modelUtils.createServiceInstance("config-id-31", noRemarksHost5, ServiceStatus.UP)
@@ -293,7 +293,7 @@ public class ApplicationApiImplTest {
),
modelUtils.createServiceCluster(
"cluster-2",
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
Arrays.asList(
modelUtils.createServiceInstance("config-id-20", noRemarksHost6, ServiceStatus.UP),
modelUtils.createServiceInstance("config-id-21", allowedToBeDownHost7, ServiceStatus.UP)
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
index 14d35902738..bc621bfc6aa 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
@@ -12,6 +12,8 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceStatusInfo;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
+import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.orchestrator.OrchestratorUtil;
import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy;
@@ -49,6 +51,7 @@ public class ClusterApiImplTest {
private final ApplicationApi applicationApi = mock(ApplicationApi.class);
private final ModelTestUtils modelUtils = new ModelTestUtils();
private final ManualClock clock = new ManualClock(Instant.ofEpochSecond(1600436659));
+ private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
@Test
public void testServicesDownAndNotInGroup() {
@@ -103,7 +106,7 @@ public class ClusterApiImplTest {
public void testCfg1SuspensionFailsWithMissingCfg3() {
ClusterApiImpl clusterApi = makeCfg1ClusterApi(ServiceStatus.UP, ServiceStatus.UP);
- HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy();
+ HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource);
try {
policy.verifyGroupGoingDownIsFine(clusterApi);
@@ -111,16 +114,27 @@ public class ClusterApiImplTest {
} catch (HostStateChangeDeniedException e) {
assertThat(e.getMessage(),
containsString("Changing the state of cfg1 would violate enough-services-up: " +
- "Suspension for service type configserver would increase from 33% to 66%, " +
+ "Suspension of service with type 'configserver' would increase from 33% to 66%, " +
"over the limit of 10%. Services down on resumed hosts: [1 missing config server]."));
}
+
+ flagSource.withBooleanFlag(Flags.GROUP_SUSPENSION.id(), true);
+
+ try {
+ policy.verifyGroupGoingDownIsFine(clusterApi);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ assertThat(e.getMessage(),
+ containsString("Suspension of service with type 'configserver' not allowed: 33% are suspended already. " +
+ "Services down on resumed hosts: [1 missing config server]."));
+ }
}
@Test
public void testCfg1SuspendsIfDownWithMissingCfg3() throws HostStateChangeDeniedException {
ClusterApiImpl clusterApi = makeCfg1ClusterApi(ServiceStatus.DOWN, ServiceStatus.UP);
- HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy();
+ HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource);
policy.verifyGroupGoingDownIsFine(clusterApi);
}
@@ -129,7 +143,7 @@ public class ClusterApiImplTest {
public void testSingleConfigServerCanSuspend() {
for (var status : EnumSet.of(ServiceStatus.UP, ServiceStatus.DOWN)) {
var clusterApi = makeConfigClusterApi(1, status);
- var policy = new HostedVespaClusterPolicy();
+ var policy = new HostedVespaClusterPolicy(flagSource);
try {
policy.verifyGroupGoingDownIsFine(clusterApi);
} catch (HostStateChangeDeniedException e) {
@@ -227,7 +241,7 @@ public class ClusterApiImplTest {
ServiceCluster serviceCluster = modelUtils.createServiceCluster(
"cluster",
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
Arrays.asList(
modelUtils.createServiceInstance("storage-1", hostName1, ServiceStatus.UP),
modelUtils.createServiceInstance("storage-2", hostName2, ServiceStatus.DOWN)
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
index 3f79aa3a098..0f71def5d2e 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ModelTestUtils.java
@@ -65,7 +65,7 @@ class ModelTestUtils {
mock(Metric.class),
new TestTimer(),
new DummyAntiServiceMonitor());
- private final Orchestrator orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(), clusterControllerClientFactory, applicationApiFactory()),
+ private final Orchestrator orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(flagSource), clusterControllerClientFactory, applicationApiFactory()),
clusterControllerClientFactory,
statusService,
serviceMonitor,
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java
index 537101e6c51..d4199da3a74 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java
@@ -49,7 +49,7 @@ public class VespaModelUtilTest {
private static final ServiceCluster controllerCluster =
new ServiceCluster(
new ClusterId(CONTENT_CLUSTER_ID.s() + "-controller"),
- VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE,
+ ServiceType.CLUSTER_CONTROLLER,
makeServiceInstanceSet(controller1, controller0));
// Distributor Service Cluster
@@ -63,7 +63,7 @@ public class VespaModelUtilTest {
private static final ServiceCluster distributorCluster =
new ServiceCluster(
CONTENT_CLUSTER_ID,
- VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE,
+ ServiceType.DISTRIBUTOR,
makeServiceInstanceSet(distributor0));
// Storage Node Service Cluster
@@ -77,7 +77,7 @@ public class VespaModelUtilTest {
private static final ServiceCluster storageCluster =
new ServiceCluster(
CONTENT_CLUSTER_ID,
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
makeServiceInstanceSet(storage0));
// Secondary Distributor Service Cluster
@@ -91,7 +91,7 @@ public class VespaModelUtilTest {
private static final ServiceCluster secondaryDistributorCluster =
new ServiceCluster(
SECONDARY_CONTENT_CLUSTER_ID,
- VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE,
+ ServiceType.DISTRIBUTOR,
makeServiceInstanceSet(secondaryDistributor0));
// Secondary Storage Node Service Cluster
@@ -105,7 +105,7 @@ public class VespaModelUtilTest {
private static final ServiceCluster secondaryStorageCluster =
new ServiceCluster(
SECONDARY_CONTENT_CLUSTER_ID,
- VespaModelUtil.STORAGENODE_SERVICE_TYPE,
+ ServiceType.STORAGE,
makeServiceInstanceSet(secondaryStorage0));
// The Application Instance
@@ -130,7 +130,7 @@ public class VespaModelUtilTest {
@Test
public void verifyControllerClusterIsRecognized() {
- ServiceCluster cluster = createServiceCluster(VespaModelUtil.CLUSTER_CONTROLLER_SERVICE_TYPE);
+ ServiceCluster cluster = createServiceCluster(ServiceType.CLUSTER_CONTROLLER);
assertTrue(VespaModelUtil.isClusterController(cluster));
}
@@ -142,9 +142,9 @@ public class VespaModelUtilTest {
@Test
public void verifyStorageClusterIsRecognized() {
- ServiceCluster cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
+ ServiceCluster cluster = createServiceCluster(ServiceType.STORAGE);
assertTrue(VespaModelUtil.isStorage(cluster));
- cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
+ cluster = createServiceCluster(ServiceType.STORAGE);
assertTrue(VespaModelUtil.isStorage(cluster));
}
@@ -156,11 +156,11 @@ public class VespaModelUtilTest {
@Test
public void verifyContentClusterIsRecognized() {
- ServiceCluster cluster = createServiceCluster(VespaModelUtil.DISTRIBUTOR_SERVICE_TYPE);
+ ServiceCluster cluster = createServiceCluster(ServiceType.DISTRIBUTOR);
assertTrue(VespaModelUtil.isContent(cluster));
- cluster = createServiceCluster(VespaModelUtil.STORAGENODE_SERVICE_TYPE);
+ cluster = createServiceCluster(ServiceType.STORAGE);
assertTrue(VespaModelUtil.isContent(cluster));
- cluster = createServiceCluster(VespaModelUtil.SEARCHNODE_SERVICE_TYPE);
+ cluster = createServiceCluster(ServiceType.SEARCH);
assertTrue(VespaModelUtil.isContent(cluster));
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
index e5be87ba043..ea5f1784265 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
@@ -5,6 +5,7 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.orchestrator.model.ApplicationApi;
import com.yahoo.vespa.orchestrator.model.ClusterApi;
import com.yahoo.vespa.orchestrator.model.NodeGroup;
@@ -24,7 +25,8 @@ import static org.mockito.Mockito.when;
public class HostedVespaClusterPolicyTest {
private ApplicationApi applicationApi = mock(ApplicationApi.class);
private ClusterApi clusterApi = mock(ClusterApi.class);
- private HostedVespaClusterPolicy policy = spy(new HostedVespaClusterPolicy());
+ private final InMemoryFlagSource flagSource = new InMemoryFlagSource();
+ private HostedVespaClusterPolicy policy = spy(new HostedVespaClusterPolicy(flagSource));
@Before
public void setUp() {
@@ -34,9 +36,9 @@ public class HostedVespaClusterPolicyTest {
@Test
public void testSlobrokSuspensionLimit() {
when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID);
- when(clusterApi.serviceType()).thenReturn(VespaModelUtil.SLOBROK_SERVICE_TYPE);
+ when(clusterApi.serviceType()).thenReturn(ServiceType.SLOBROK);
assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE,
- policy.getConcurrentSuspensionLimit(clusterApi));
+ policy.getConcurrentSuspensionLimit(clusterApi, false));
}
@Test
@@ -44,7 +46,7 @@ public class HostedVespaClusterPolicyTest {
when(clusterApi.clusterId()).thenReturn(VespaModelUtil.ADMIN_CLUSTER_ID);
when(clusterApi.serviceType()).thenReturn(new ServiceType("non-slobrok-service-type"));
assertEquals(ConcurrentSuspensionLimitForCluster.ALL_NODES,
- policy.getConcurrentSuspensionLimit(clusterApi));
+ policy.getConcurrentSuspensionLimit(clusterApi, false));
}
@Test
@@ -52,7 +54,15 @@ public class HostedVespaClusterPolicyTest {
when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id"));
when(clusterApi.isStorageCluster()).thenReturn(true);
assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE,
- policy.getConcurrentSuspensionLimit(clusterApi));
+ policy.getConcurrentSuspensionLimit(clusterApi, false));
+ }
+
+ @Test
+ public void testStorageSuspensionLimit_legacy() {
+ when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id"));
+ when(clusterApi.isStorageCluster()).thenReturn(true);
+ assertEquals(ConcurrentSuspensionLimitForCluster.ONE_NODE,
+ policy.getConcurrentSuspensionLimit(clusterApi, false));
}
@Test
@@ -60,7 +70,7 @@ public class HostedVespaClusterPolicyTest {
when(applicationApi.applicationId()).thenReturn(VespaModelUtil.TENANT_HOST_APPLICATION_ID);
when(clusterApi.isStorageCluster()).thenReturn(false);
assertEquals(ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT,
- policy.getConcurrentSuspensionLimit(clusterApi));
+ policy.getConcurrentSuspensionLimit(clusterApi, false));
}
@Test
@@ -69,7 +79,7 @@ public class HostedVespaClusterPolicyTest {
when(clusterApi.clusterId()).thenReturn(new ClusterId("some-cluster-id"));
when(clusterApi.isStorageCluster()).thenReturn(false);
assertEquals(ConcurrentSuspensionLimitForCluster.TEN_PERCENT,
- policy.getConcurrentSuspensionLimit(clusterApi));
+ policy.getConcurrentSuspensionLimit(clusterApi, false));
}
@Test
@@ -100,8 +110,9 @@ public class HostedVespaClusterPolicyTest {
when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown);
when(clusterApi.reasonsForNoServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp);
when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20);
- doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi);
+ doReturn(ConcurrentSuspensionLimitForCluster.TEN_PERCENT).when(policy).getConcurrentSuspensionLimit(clusterApi, false);
+ when(applicationApi.applicationId()).thenReturn(ApplicationId.fromSerializedForm("a:b:c"));
when(clusterApi.serviceType()).thenReturn(new ServiceType("service-type"));
when(clusterApi.percentageOfServicesDown()).thenReturn(5);
when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(percentageOfServicesDownIfGroupIsAllowedToBeDown);
@@ -123,7 +134,7 @@ public class HostedVespaClusterPolicyTest {
} catch (HostStateChangeDeniedException e) {
if (!expectSuccess) {
assertEquals("Changing the state of node-group would violate enough-services-up: " +
- "Suspension for service type service-type would increase from 5% to 13%, " +
+ "Suspension of service with type 'service-type' would increase from 5% to 13%, " +
"over the limit of 10%. Down description", e.getMessage());
assertEquals("enough-services-up", e.getConstraintName());
}