aboutsummaryrefslogtreecommitdiffstats
path: root/orchestrator
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@yahooinc.com>2021-09-14 13:33:21 +0200
committerHåkon Hallingstad <hakon@yahooinc.com>2021-09-14 13:33:21 +0200
commit19b4e2f9aef020f520adae5b6f3ff0175b3b94ad (patch)
tree7bb65ff937dfe4e32199914a85e765af362702d1 /orchestrator
parent6cb447d48d363e736effb6e57b82f0b9193ed077 (diff)
Deny suspension with special constraint on unknown status
Diffstat (limited to 'orchestrator')
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java5
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java21
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java5
-rw-r--r--orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java1
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java81
-rw-r--r--orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java34
6 files changed, 119 insertions, 28 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java
index 78373282df8..fd115702588 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApi.java
@@ -1,9 +1,9 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.orchestrator.model;
-import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.applicationmodel.ClusterId;
import com.yahoo.vespa.applicationmodel.ServiceType;
+import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import java.util.Optional;
@@ -21,7 +21,7 @@ public interface ClusterApi {
/** Returns the reasons no services are up in the implied group, or empty if some services are up. */
Optional<SuspensionReasons> reasonsForNoServicesInGroupIsUp();
- boolean noServicesOutsideGroupIsDown();
+ boolean noServicesOutsideGroupIsDown() throws HostStateChangeDeniedException;
int percentageOfServicesDown();
int percentageOfServicesDownIfGroupIsAllowedToBeDown();
@@ -30,5 +30,4 @@ public interface ClusterApi {
Optional<StorageNode> upStorageNodeInGroup();
String downDescription();
-
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
index b8538079194..00cb65a09b0 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ClusterApiImpl.java
@@ -9,6 +9,8 @@ import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory;
import com.yahoo.vespa.orchestrator.policy.ClusterParams;
+import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
+import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy;
import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import com.yahoo.vespa.orchestrator.status.HostInfos;
import com.yahoo.vespa.orchestrator.status.HostStatus;
@@ -17,6 +19,7 @@ import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
+import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
@@ -126,10 +129,7 @@ class ClusterApiImpl implements ClusterApi {
continue;
}
- if (service.serviceStatus() == ServiceStatus.UNKNOWN) {
- reasons.mergeWith(SuspensionReasons.unknownStatus(service));
- continue;
- } else if (service.serviceStatus() == ServiceStatus.DOWN) {
+ if (service.serviceStatus() == ServiceStatus.DOWN) {
Optional<Instant> since = service.serviceStatusInfo().since();
if (since.isEmpty()) {
reasons.mergeWith(SuspensionReasons.isDown(service));
@@ -155,7 +155,18 @@ class ClusterApiImpl implements ClusterApi {
int missingServices() { return missingServices; }
@Override
- public boolean noServicesOutsideGroupIsDown() {
+ public boolean noServicesOutsideGroupIsDown() throws HostStateChangeDeniedException {
+ Optional<ServiceInstance> serviceWithUnknownStatus = servicesNotInGroup
+ .stream()
+ .filter(serviceInstance -> serviceInstance.serviceStatus() == ServiceStatus.UNKNOWN)
+ .min(Comparator.comparing(ServiceInstance::descriptiveName));
+ if (serviceWithUnknownStatus.isPresent()) {
+ throw new HostStateChangeDeniedException(
+ nodeGroup,
+ HostedVespaPolicy.UNKNOWN_SERVICE_STATUS,
+ "Service status of " + serviceWithUnknownStatus.get().descriptiveName() + " is not yet known");
+ }
+
return servicesDownAndNotInGroup.size() + missingServices == 0;
}
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
index fd5e8c33c1b..d183e863500 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicy.java
@@ -56,9 +56,8 @@ public class HostedVespaClusterPolicy implements ClusterPolicy {
@Override
public void verifyGroupGoingDownPermanentlyIsFine(ClusterApi clusterApi) throws HostStateChangeDeniedException {
- // This policy is similar to verifyGroupGoingDownIsFine, except that services being down in the group
- // is no excuse to allow suspension (like it is for verifyGroupGoingDownIsFine), since if we grant
- // suspension in this case they will permanently be down/removed.
+ // This policy is similar to verifyGroupGoingDownIsFine, except that having no services up in the group will
+ // not allow the suspension: We are a bit more cautious when removing nodes.
if (clusterApi.noServicesOutsideGroupIsDown()) {
return;
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
index d9fc2a989de..ad37100fa16 100644
--- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
+++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java
@@ -22,6 +22,7 @@ public class HostedVespaPolicy implements Policy {
public static final String APPLICATION_SUSPENDED_CONSTRAINT = "application-suspended";
public static final String ENOUGH_SERVICES_UP_CONSTRAINT = "enough-services-up";
+ public static final String UNKNOWN_SERVICE_STATUS = "unknown-service-status";
public static final String SET_NODE_STATE_CONSTRAINT = "controller-set-node-state";
public static final String CLUSTER_CONTROLLER_AVAILABLE_CONSTRAINT = "controller-available";
public static final String DEADLINE_CONSTRAINT = "deadline";
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
index 2946c82eab8..1e29f0ca5de 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.orchestrator.OrchestratorUtil;
import com.yahoo.vespa.orchestrator.policy.ClusterParams;
import com.yahoo.vespa.orchestrator.policy.HostStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.policy.HostedVespaClusterPolicy;
+import com.yahoo.vespa.orchestrator.policy.HostedVespaPolicy;
import com.yahoo.vespa.orchestrator.policy.SuspensionReasons;
import com.yahoo.vespa.orchestrator.status.HostInfos;
import com.yahoo.vespa.orchestrator.status.HostStatus;
@@ -103,13 +104,75 @@ public class ClusterApiImplTest {
assertEquals(80, clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown());
}
+ @Test
+ public void testUnknownServiceStatusInGroup() throws HostStateChangeDeniedException {
+ ClusterApi clusterApi = makeClusterApiWithUnknownStatus(ServiceStatus.UNKNOWN, ServiceStatus.UP, ServiceStatus.UP);
+ clusterApi.noServicesOutsideGroupIsDown();
+ }
+
+ @Test
+ public void testUnknownServiceStatusOutsideGroup() {
+ ClusterApi clusterApi = makeClusterApiWithUnknownStatus(ServiceStatus.UP, ServiceStatus.UNKNOWN, ServiceStatus.UP);
+
+ try {
+ clusterApi.noServicesOutsideGroupIsDown();
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ assertEquals(HostedVespaPolicy.UNKNOWN_SERVICE_STATUS, e.getConstraintName());
+ }
+ }
+
+ /** The first service status will be for the service IN the node group, the rest are OUTSIDE. */
+ private ClusterApi makeClusterApiWithUnknownStatus(ServiceStatus... statuses) {
+ var serviceStatusList = List.of(statuses);
+ var infraApplication = new ConfigServerApplication();
+
+ var hostnames = IntStream.rangeClosed(1, serviceStatusList.size())
+ .mapToObj(i -> new HostName("cfg" + i))
+ .collect(Collectors.toList());
+ var instances = new ArrayList<ServiceInstance>();
+ for (int i = 0; i < hostnames.size(); i++) {
+ instances.add(modelUtils.createServiceInstance("cs" + i + 1, hostnames.get(i), serviceStatusList.get(i)));
+ }
+
+ ServiceCluster serviceCluster = modelUtils.createServiceCluster(
+ infraApplication.getClusterId().s(),
+ infraApplication.getServiceType(),
+ instances
+ );
+ for (var instance : instances) {
+ instance.setServiceCluster(serviceCluster);
+ }
+
+ Set<ServiceCluster> serviceClusterSet = Set.of(serviceCluster);
+
+ ApplicationInstance application = new ApplicationInstance(
+ infraApplication.getTenantId(),
+ infraApplication.getApplicationInstanceId(Zone.defaultZone()),
+ serviceClusterSet);
+
+ serviceCluster.setApplicationInstance(application);
+
+ when(applicationApi.applicationId()).thenReturn(OrchestratorUtil.toApplicationId(application.reference()));
+
+ return new ClusterApiImpl(
+ applicationApi,
+ serviceCluster,
+ new NodeGroup(application, hostnames.get(0)),
+ modelUtils.getHostInfos(),
+ modelUtils.getClusterControllerClientFactory(),
+ new ClusterParams.Builder().setSize(serviceStatusList.size()).build(),
+ clock);
+ }
+
/** Make a ClusterApiImpl for the cfg1 config server, with cfg3 missing from the cluster (not provisioned). */
- private ClusterApiImpl makeCfg1ClusterApi(ServiceStatus cfg1ServiceStatus, ServiceStatus cfg2ServiceStatus) {
+ private ClusterApiImpl makeCfg1ClusterApi(ServiceStatus cfg1ServiceStatus, ServiceStatus cfg2ServiceStatus)
+ throws HostStateChangeDeniedException {
return makeConfigClusterApi(ModelTestUtils.NUMBER_OF_CONFIG_SERVERS, cfg1ServiceStatus, cfg2ServiceStatus);
}
@Test
- public void testCfg1SuspensionFailsWithMissingCfg3() {
+ public void testCfg1SuspensionFailsWithMissingCfg3() throws HostStateChangeDeniedException {
ClusterApiImpl clusterApi = makeCfg1ClusterApi(ServiceStatus.UP, ServiceStatus.UP);
HostedVespaClusterPolicy policy = new HostedVespaClusterPolicy(flagSource, zone);
@@ -137,7 +200,7 @@ public class ClusterApiImplTest {
}
@Test
- public void testCfghost1SuspensionFailsWithMissingCfghost3() {
+ public void testCfghost1SuspensionFailsWithMissingCfghost3() throws HostStateChangeDeniedException {
ClusterApiImpl clusterApi = makeConfigClusterApi(
ModelTestUtils.NUMBER_OF_CONFIG_SERVERS,
new ConfigServerHostApplication(),
@@ -191,7 +254,7 @@ public class ClusterApiImplTest {
}
@Test
- public void testSingleConfigServerCanSuspend() {
+ public void testSingleConfigServerCanSuspend() throws HostStateChangeDeniedException {
for (var status : EnumSet.of(ServiceStatus.UP, ServiceStatus.DOWN, ServiceStatus.UNKNOWN)) {
var clusterApi = makeConfigClusterApi(1, status);
var policy = new HostedVespaClusterPolicy(flagSource, zone);
@@ -204,7 +267,7 @@ public class ClusterApiImplTest {
}
@Test
- public void testNoServices() {
+ public void testNoServices() throws HostStateChangeDeniedException {
HostName hostName1 = new HostName("host1");
HostName hostName2 = new HostName("host2");
HostName hostName3 = new HostName("host3");
@@ -271,7 +334,7 @@ public class ClusterApiImplTest {
private void verifyNoServices(ServiceCluster serviceCluster,
Optional<SuspensionReasons> expectedNoServicesInGroupIsUp,
boolean expectedNoServicesOutsideGroupIsDown,
- HostName... groupNodes) {
+ HostName... groupNodes) throws HostStateChangeDeniedException {
ClusterApiImpl clusterApi = new ClusterApiImpl(
applicationApi,
serviceCluster,
@@ -318,12 +381,14 @@ public class ClusterApiImplTest {
assertEquals(Optional.of(hostName1), clusterApi.storageNodeInGroup().map(storageNode -> storageNode.hostName()));
}
- private ClusterApiImpl makeConfigClusterApi(int clusterSize, ServiceStatus first, ServiceStatus... rest) {
+ private ClusterApiImpl makeConfigClusterApi(int clusterSize, ServiceStatus first, ServiceStatus... rest)
+ throws HostStateChangeDeniedException {
return makeConfigClusterApi(clusterSize, new ConfigServerApplication(), first, rest);
}
private ClusterApiImpl makeConfigClusterApi(int clusterSize, InfraApplication infraApplication,
- ServiceStatus first, ServiceStatus... rest) {
+ ServiceStatus first, ServiceStatus... rest)
+ throws HostStateChangeDeniedException {
var serviceStatusList = new ArrayList<ServiceStatus>();
serviceStatusList.add(first);
serviceStatusList.addAll(List.of(rest));
diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
index a100b3efc52..0c3da1656bc 100644
--- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
+++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaClusterPolicyTest.java
@@ -20,6 +20,7 @@ import java.util.Optional;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;
@@ -36,6 +37,25 @@ public class HostedVespaClusterPolicyTest {
public void setUp() {
when(clusterApi.getApplication()).thenReturn(applicationApi);
when(zone.system()).thenReturn(SystemName.main);
+
+ NodeGroup nodeGroup = mock(NodeGroup.class);
+ when(clusterApi.getNodeGroup()).thenReturn(nodeGroup);
+ when(nodeGroup.toCommaSeparatedString()).thenReturn("node-group");
+ }
+
+ @Test
+ public void testUnknownServiceStatus() throws HostStateChangeDeniedException {
+ doThrow(new HostStateChangeDeniedException(clusterApi.getNodeGroup(),
+ HostedVespaPolicy.UNKNOWN_SERVICE_STATUS,
+ "foo"))
+ .when(clusterApi).noServicesOutsideGroupIsDown();
+
+ try {
+ policy.verifyGroupGoingDownIsFine(clusterApi);
+ fail();
+ } catch (HostStateChangeDeniedException e) {
+ assertEquals(HostedVespaPolicy.UNKNOWN_SERVICE_STATUS, e.getConstraintName());
+ }
}
@Test
@@ -94,30 +114,30 @@ public class HostedVespaClusterPolicyTest {
}
@Test
- public void verifyGroupGoingDownIsFine_noServicesOutsideGroupIsDownIsFine() {
+ public void verifyGroupGoingDownIsFine_noServicesOutsideGroupIsDownIsFine() throws HostStateChangeDeniedException {
verifyGroupGoingDownIsFine(true, Optional.empty(), 13, true);
}
@Test
- public void verifyGroupGoingDownIsFine_noServicesInGroupIsUp() {
+ public void verifyGroupGoingDownIsFine_noServicesInGroupIsUp() throws HostStateChangeDeniedException {
var reasons = new SuspensionReasons().addReason(new HostName("host1"), "supension reason 1");
verifyGroupGoingDownIsFine(false, Optional.of(reasons), 13, true);
}
@Test
- public void verifyGroupGoingDownIsFine_percentageIsFine() {
+ public void verifyGroupGoingDownIsFine_percentageIsFine() throws HostStateChangeDeniedException {
verifyGroupGoingDownIsFine(false, Optional.empty(), 9, true);
}
@Test
- public void verifyGroupGoingDownIsFine_fails() {
+ public void verifyGroupGoingDownIsFine_fails() throws HostStateChangeDeniedException {
verifyGroupGoingDownIsFine(false, Optional.empty(), 13, false);
}
private void verifyGroupGoingDownIsFine(boolean noServicesOutsideGroupIsDown,
Optional<SuspensionReasons> noServicesInGroupIsUp,
int percentageOfServicesDownIfGroupIsAllowedToBeDown,
- boolean expectSuccess) {
+ boolean expectSuccess) throws HostStateChangeDeniedException {
when(clusterApi.noServicesOutsideGroupIsDown()).thenReturn(noServicesOutsideGroupIsDown);
when(clusterApi.reasonsForNoServicesInGroupIsUp()).thenReturn(noServicesInGroupIsUp);
when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(20);
@@ -129,10 +149,6 @@ public class HostedVespaClusterPolicyTest {
when(clusterApi.percentageOfServicesDownIfGroupIsAllowedToBeDown()).thenReturn(percentageOfServicesDownIfGroupIsAllowedToBeDown);
when(clusterApi.downDescription()).thenReturn(" Down description");
- NodeGroup nodeGroup = mock(NodeGroup.class);
- when(clusterApi.getNodeGroup()).thenReturn(nodeGroup);
- when(nodeGroup.toCommaSeparatedString()).thenReturn("node-group");
-
try {
SuspensionReasons reasons = policy.verifyGroupGoingDownIsFine(clusterApi);
if (!expectSuccess) {