aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2021-03-23 11:56:24 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2021-03-23 11:56:24 +0100
commit44f64849fdd2a0dc62e84eb701bae1216d0b2215 (patch)
treeeb8700943c722c4e5bfc62714580025e01f49a5d
parent81871d3f99f2155b0d81b347b61ac685a7bbc13e (diff)
Require 3 config server (and controller) hosts
We already require 3 config server (and controller) nodes, but it is not sufficient to protect the hosts from being left with only 1 healthy host: Say the config server host application contains 2 nodes. An upgrade of host-admin on one of those nodes is allowed, since only the host is suspended and none of the 2 nodes are down. This is fixed by handling config server hosts similar to config servers: assume 3 nodes.
-rw-r--r--application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java14
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java5
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java7
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java66
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/duper/ConfigServerApplication.java11
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/duper/InfraApplication.java6
-rw-r--r--service-monitor/src/main/java/com/yahoo/vespa/service/model/ApplicationInstanceGenerator.java2
7 files changed, 98 insertions, 13 deletions
diff --git a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java
index 43f161cfec9..417b5792586 100644
--- a/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java
+++ b/application-model/src/main/java/com/yahoo/vespa/applicationmodel/ServiceCluster.java
@@ -81,12 +81,26 @@ public class ServiceCluster {
Objects.equals(serviceType, ServiceType.HOST_ADMIN);
}
+ public boolean isConfigServerHostLike() {
+ return isConfigServerHost() || isControllerHost();
+ }
+
public boolean isTenantHost() {
return isHostedVespaApplicationWithPredicate(ApplicationInstanceId::isTenantHost) &&
Objects.equals(clusterId, ClusterId.TENANT_HOST) &&
Objects.equals(serviceType, ServiceType.HOST_ADMIN);
}
+ public String nodeDescription(boolean plural) {
+ String pluralSuffix = plural ? "s" : "";
+ return isConfigServer() ? "config server" + pluralSuffix :
+ isConfigServerHost() ? "config server host" + pluralSuffix :
+ isController() ? "controller" + pluralSuffix :
+ isControllerHost() ? "controller host" + pluralSuffix :
+ isTenantHost() ? "tenant host" + pluralSuffix :
+ "node" + pluralSuffix + " of {" + serviceType + "," + clusterId + "}";
+ }
+
private boolean isHostedVespaApplicationWithId(ApplicationInstanceId id) {
return isHostedVespaTenant() &&
applicationInstance.map(app -> Objects.equals(app.applicationInstanceId(), id)).orElse(false);
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
index 7417318c572..57dcb5f3069 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
@@ -81,9 +81,10 @@ class ClusterApiImpl implements ClusterApi {
servicesDownAndNotInGroup = servicesNotInGroup.stream().filter(this::serviceEffectivelyDown).collect(Collectors.toSet());
int serviceInstances = serviceCluster.serviceInstances().size();
- if (serviceCluster.isConfigServerLike() && serviceInstances < numberOfConfigServers) {
+ if ((serviceCluster.isConfigServerLike() || serviceCluster.isConfigServerHostLike()) &&
+ serviceInstances < numberOfConfigServers) {
missingServices = numberOfConfigServers - serviceInstances;
- descriptionOfMissingServices = missingServices + " missing config server" + (missingServices > 1 ? "s" : "");
+ descriptionOfMissingServices = missingServices + " missing " + serviceCluster.nodeDescription(missingServices > 1);
} else {
missingServices = 0;
descriptionOfMissingServices = "NA";
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
index ff1c56f6b2f..47586425c1d 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
@@ -1,6 +1,7 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.policy;
+import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.flags.BooleanFlag;
import com.yahoo.vespa.flags.FetchVector;
@@ -149,7 +150,11 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
return ConcurrentSuspensionLimitForCluster.ONE_NODE;
}
- if (clusterApi.serviceType().equals(ServiceType.HOST_ADMIN)) {
+ if (clusterApi.serviceType() == ServiceType.HOST_ADMIN) {
+ if (Set.of(ClusterId.CONFIG_SERVER_HOST, ClusterId.CONTROLLER_HOST).contains(clusterApi.clusterId())) {
+ return ConcurrentSuspensionLimitForCluster.ONE_NODE;
+ }
+
return ConcurrentSuspensionLimitForCluster.TWENTY_PERCENT;
}
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
index 412a08a8305..6d7c79176fb 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
@@ -1,17 +1,15 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.model;
+import com.yahoo.config.provision.Zone;
import com.yahoo.test.ManualClock;
import com.yahoo.vespa.applicationmodel.ApplicationInstance;
-import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
-import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceCluster;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceStatusInfo;
import com.yahoo.vespa.applicationmodel.ServiceType;
-import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.orchestrator.OrchestratorUtil;
@@ -20,6 +18,9 @@ import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy;
import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import com.yahoo.vespa.orchestrator.status.HostInfos;
import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.service.duper.ConfigServerApplication;
+import com.yahoo.vespa.service.duper.ConfigServerHostApplication;
+import com.yahoo.vespa.service.duper.InfraApplication;
import org.junit.Test;
import java.time.Duration;
@@ -131,6 +132,38 @@ public class ClusterApiImplTest {
}
@Test
+ public void testCfghost1SuspensionFailsWithMissingCfghost3() {
+ ClusterApiImpl clusterApi = makeConfigClusterApi(
+ ModelTestUtils.NUMBER_OF_CONFIG_SERVERS,
+ new ConfigServerHostApplication(),
+ ServiceStatus.UP,
+ ServiceStatus.UP);
+
+ HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource);
+
+ try {
+ policy.verifyGroupGoingDownIsFine(clusterApi);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ assertThat(e.getMessage(),
+ containsString("Changing the state of cfg1 would violate enough-services-up: " +
+ "Suspension of service with type 'hostadmin' not allowed: 33% are suspended already. " +
+ "Services down on resumed hosts: [1 missing config server host]."));
+ }
+
+ flagSource.withBooleanFlag(Flags.GROUP_SUSPENSION.id(), true);
+
+ try {
+ policy.verifyGroupGoingDownIsFine(clusterApi);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ assertThat(e.getMessage(),
+ containsString("Suspension of service with type 'hostadmin' not allowed: 33% are suspended already. " +
+ "Services down on resumed hosts: [1 missing config server host]."));
+ }
+ }
+
+ @Test
public void testCfg1SuspendsIfDownWithMissingCfg3() throws HostStateChangeDeniedException {
ClusterApiImpl clusterApi = makeCfg1ClusterApi(ServiceStatus.DOWN, ServiceStatus.UP);
@@ -140,6 +173,19 @@ public class ClusterApiImplTest {
}
@Test
+ public void testCfghost1SuspendsIfDownWithMissingCfghost3() throws HostStateChangeDeniedException {
+ ClusterApiImpl clusterApi = makeConfigClusterApi(
+ ModelTestUtils.NUMBER_OF_CONFIG_SERVERS,
+ new ConfigServerHostApplication(),
+ ServiceStatus.DOWN,
+ ServiceStatus.UP);
+
+ HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource);
+
+ policy.verifyGroupGoingDownIsFine(clusterApi);
+ }
+
+ @Test
public void testSingleConfigServerCanSuspend() {
for (var status : EnumSet.of(ServiceStatus.UP, ServiceStatus.DOWN)) {
var clusterApi = makeConfigClusterApi(1, status);
@@ -265,6 +311,11 @@ public class ClusterApiImplTest {
}
private ClusterApiImpl makeConfigClusterApi(int clusterSize, ServiceStatus first, ServiceStatus... rest) {
+ return makeConfigClusterApi(clusterSize, new ConfigServerApplication(), first, rest);
+ }
+
+ private ClusterApiImpl makeConfigClusterApi(int clusterSize, InfraApplication infraApplication,
+ ServiceStatus first, ServiceStatus... rest) {
var serviceStatusList = new ArrayList<ServiceStatus>();
serviceStatusList.add(first);
serviceStatusList.addAll(List.of(rest));
@@ -275,9 +326,10 @@ public class ClusterApiImplTest {
for (int i = 0; i < hostnames.size(); i++) {
instances.add(modelUtils.createServiceInstance("cs" + i + 1, hostnames.get(i), serviceStatusList.get(i)));
}
+
ServiceCluster serviceCluster = modelUtils.createServiceCluster(
- ClusterId.CONFIG_SERVER.s(),
- ServiceType.CONFIG_SERVER,
+ infraApplication.getClusterId().s(),
+ infraApplication.getServiceType(),
instances
);
for (var instance : instances) {
@@ -287,8 +339,8 @@ public class ClusterApiImplTest {
Set<ServiceCluster> serviceClusterSet = Set.of(serviceCluster);
ApplicationInstance application = new ApplicationInstance(
- TenantId.HOSTED_VESPA,
- ApplicationInstanceId.CONFIG_SERVER,
+ infraApplication.getTenantId(),
+ infraApplication.getApplicationInstanceId(Zone.defaultZone()),
serviceClusterSet);
serviceCluster.setApplicationInstance(application);
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ConfigServerApplication.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ConfigServerApplication.java
index f8eec00a340..cf9825b26cf 100644
--- a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ConfigServerApplication.java
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/ConfigServerApplication.java
@@ -3,6 +3,8 @@ package com.yahoo.vespa.service.duper;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.NodeType;
+import com.yahoo.config.provision.Zone;
+import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
import com.yahoo.vespa.applicationmodel.ServiceType;
/**
@@ -16,4 +18,13 @@ public class ConfigServerApplication extends ConfigServerLikeApplication {
super("zone-config-servers", NodeType.config, ClusterSpec.Type.admin, ServiceType.CONFIG_SERVER);
}
+ /**
+ * A config server application has a particularly simple ApplicationInstanceId.
+ *
+ * @see InfraApplication#getApplicationInstanceId(Zone)
+ */
+ public ApplicationInstanceId getApplicationInstanceId() {
+ return new ApplicationInstanceId(getApplicationId().application().value());
+ }
+
}
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/InfraApplication.java b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/InfraApplication.java
index da00ebc41e0..1bdf1ff67d9 100644
--- a/service-monitor/src/main/java/com/yahoo/vespa/service/duper/InfraApplication.java
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/duper/InfraApplication.java
@@ -12,12 +12,14 @@ import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.HostName;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.TenantName;
+import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.applicationmodel.ApplicationInstanceId;
import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.ConfigId;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.service.health.StateV1HealthModel;
+import com.yahoo.vespa.service.model.ApplicationInstanceGenerator;
import com.yahoo.vespa.service.model.ModelGenerator;
import com.yahoo.vespa.service.monitor.InfraApplicationApi;
@@ -94,8 +96,8 @@ public abstract class InfraApplication implements InfraApplicationApi {
return serviceType;
}
- public ApplicationInstanceId getApplicationInstanceId() {
- return new ApplicationInstanceId(applicationId.application().value());
+ public ApplicationInstanceId getApplicationInstanceId(Zone zone) {
+ return ApplicationInstanceGenerator.toApplicationInstanceId(applicationId, zone);
}
public TenantId getTenantId() {
diff --git a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ApplicationInstanceGenerator.java b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ApplicationInstanceGenerator.java
index d0ecad5f27a..60e22639e8b 100644
--- a/service-monitor/src/main/java/com/yahoo/vespa/service/model/ApplicationInstanceGenerator.java
+++ b/service-monitor/src/main/java/com/yahoo/vespa/service/model/ApplicationInstanceGenerator.java
@@ -164,7 +164,7 @@ public class ApplicationInstanceGenerator {
return new ServiceInstance(configId, hostName, status);
}
- private static ApplicationInstanceId toApplicationInstanceId(ApplicationId applicationId, Zone zone) {
+ public static ApplicationInstanceId toApplicationInstanceId(ApplicationId applicationId, Zone zone) {
if (applicationId.equals(configServerApplicationId)) {
// Removing this historical discrepancy would break orchestration during rollout.
// An alternative may be to use a feature flag and flip it between releases,