aboutsummaryrefslogtreecommitdiffstats
path: root/orchestrator
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2021-03-23 11:56:24 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2021-03-23 11:56:24 +0100
commit44f64849fdd2a0dc62e84eb701bae1216d0b2215 (patch)
treeeb8700943c722c4e5bfc62714580025e01f49a5d /orchestrator
parent81871d3f99f2155b0d81b347b61ac685a7bbc13e (diff)
Require 3 config server (and controller) hosts
We already require 3 config server (and controller) nodes, but it is not sufficient to protect the hosts from being left with only 1 healthy host: Say the config server host application contains 2 nodes. An upgrade of host-admin on one of those nodes is allowed, since only the host is suspended and none of the 2 nodes are down. This is fixed by handling config server hosts similar to config servers: assume 3 nodes.
Diffstat (limited to 'orchestrator')
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java5
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java7
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java66
3 files changed, 68 insertions, 10 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
index 7417318c572..57dcb5f3069 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
@@ -81,9 +81,10 @@ class ClusterApiImpl implements ClusterApi {
servicesDownAndNotInGroup = servicesNotInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet());
int serviceInstances = serviceCluster.serviceInstances().size();
- if (serviceCluster.isConfigServerLike() && serviceInstances < numberOfConfigServers) {
+ if ((serviceCluster.isConfigServerLike() || serviceCluster.isConfigServerHostLike()) &&
+ serviceInstances < numberOfConfigServers) {
missingServices = numberOfConfigServers - serviceInstances;
- descriptionOfMissingServices = missingServices + " missing config server" + (missingServices > 1 ? "s" : "");
+ descriptionOfMissingServices = missingServices + " missing " + serviceCluster.nodeDescription(missingServices > 1);
} else {
missingServices = 0;
descriptionOfMissingServices = "NA";
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
index ff1c56f6b2f..47586425c1d 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.policy;
+import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.flags.BooleanFlag;
import com.yahoo.vespa.flags.FetchVector;
@@ -149,7 +150,11 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
return ConcurrentSuspensionLimitForCluster.ONE_NODE;
}
- if (clusterApi.serviceType().equals(ServiceType.HOST_ADMIN)) {
+ if (clusterApi.serviceType() == ServiceType.HOST_ADMIN) {
+ if (Set.of(ClusterId.CONFIG_SERVER_HOST, ClusterId.CONTROLLER_HOST).contains(clusterApi.clusterId())) {
+ return ConcurrentSuspensionLimitForCluster.ONE_NODE;
+ }
+
return ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT;
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
index 412a08a8305..6d7c79176fb 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
@@ -1,17 +1,15 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.model;
+import com.yahoo.config.provision.Zone;
import com.yahoo.test.ManualClock;
import com.yahoo.vespa.applicationmodel.ApplicationInstance;
-import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
-import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceCluster;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceStatusInfo;
import com.yahoo.vespa.applicationmodel.ServiceType;
-import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.orchestrator.OrchestratorUtil;
@@ -20,6 +18,9 @@ import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy;
import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import com.yahoo.vespa.orchestrator.status.HostInfos;
import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.service.duper.ConfigServerApplication;
+import com.yahoo.vespa.service.duper.ConfigServerHostApplication;
+import com.yahoo.vespa.service.duper.InfraApplication;
import org.junit.Test;
import java.time.Duration;
@@ -131,6 +132,38 @@ public class ClusterApiImplTest {
}
@Test
+ public void testCfghost1SuspensionFailsWithMissingCfghost3() {
+ ClusterApiImpl clusterApi = makeConfigClusterApi(
+ ModelTestUtils.NUMBER_OF_CONFIG_SERVERS,
+ new ConfigServerHostApplication(),
+ ServiceStatus.UP,
+ ServiceStatus.UP);
+
+ HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource);
+
+ try {
+ policy.verifyGroupGoingDownIsFine(clusterApi);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ assertThat(e.getMessage(),
+ containsString("Changing the state of cfg1 would violate enough-services-up: " +
+ "Suspension of service with type 'hostadmin' not allowed: 33% are suspended already. " +
+ "Services down on resumed hosts: [1 missing config server host]."));
+ }
+
+ flagSource.withBooleanFlag(Flags.GROUP_SUSPENSION.id(), true);
+
+ try {
+ policy.verifyGroupGoingDownIsFine(clusterApi);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ assertThat(e.getMessage(),
+ containsString("Suspension of service with type 'hostadmin' not allowed: 33% are suspended already. " +
+ "Services down on resumed hosts: [1 missing config server host]."));
+ }
+ }
+
+ @Test
public void testCfg1SuspendsIfDownWithMissingCfg3() throws HostStateChangeDeniedException {
ClusterApiImpl clusterApi = makeCfg1ClusterApi(ServiceStatus.DOWN, ServiceStatus.UP);
@@ -140,6 +173,19 @@ public class ClusterApiImplTest {
}
@Test
+ public void testCfghost1SuspendsIfDownWithMissingCfghost3() throws HostStateChangeDeniedException {
+ ClusterApiImpl clusterApi = makeConfigClusterApi(
+ ModelTestUtils.NUMBER_OF_CONFIG_SERVERS,
+ new ConfigServerHostApplication(),
+ ServiceStatus.DOWN,
+ ServiceStatus.UP);
+
+ HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource);
+
+ policy.verifyGroupGoingDownIsFine(clusterApi);
+ }
+
+ @Test
public void testSingleConfigServerCanSuspend() {
for (var status : EnumSet.of(ServiceStatus.UP, ServiceStatus.DOWN)) {
var clusterApi = makeConfigClusterApi(1, status);
@@ -265,6 +311,11 @@ public class ClusterApiImplTest {
}
private ClusterApiImpl makeConfigClusterApi(int clusterSize, ServiceStatus first, ServiceStatus... rest) {
+ return makeConfigClusterApi(clusterSize, new ConfigServerApplication(), first, rest);
+ }
+
+ private ClusterApiImpl makeConfigClusterApi(int clusterSize, InfraApplication infraApplication,
+ ServiceStatus first, ServiceStatus... rest) {
var serviceStatusList = new ArrayList<ServiceStatus>();
serviceStatusList.add(first);
serviceStatusList.addAll(List.of(rest));
@@ -275,9 +326,10 @@ public class ClusterApiImplTest {
for (int i = 0; i < hostnames.size(); i++) {
instances.add(modelUtils.createServiceInstance("cs" + i + 1, hostnames.get(i), serviceStatusList.get(i)));
}
+
ServiceCluster serviceCluster = modelUtils.createServiceCluster(
- ClusterId.CONFIG_SERVER.s(),
- ServiceType.CONFIG_SERVER,
+ infraApplication.getClusterId().s(),
+ infraApplication.getServiceType(),
instances
);
for (var instance : instances) {
@@ -287,8 +339,8 @@ public class ClusterApiImplTest {
Set<ServiceCluster> serviceClusterSet = Set.of(serviceCluster);
ApplicationInstance application = new ApplicationInstance(
- TenantId.HOSTED_VESPA,
- ApplicationInstanceId.CONFIG_SERVER,
+ infraApplication.getTenantId(),
+ infraApplication.getApplicationInstanceId(Zone.defaultZone()),
serviceClusterSet);
serviceCluster.setApplicationInstance(application);