From 64c5c6e623c653e546aeb41c70e81c09cc3eaa3f Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Fri, 14 May 2021 14:41:40 +0200 Subject: Send setNodeState to CC even if storage node is down Note that this will be wrong if there is a single CC colocated with a content node, i.e. there's only one content node in the cluster, and the CC is down. In this case an operator must intervene to bring up the CC. --- .../vespa/orchestrator/model/ApplicationApi.java | 2 +- .../orchestrator/model/ApplicationApiImpl.java | 10 ++--- .../orchestrator/policy/HostedVespaPolicy.java | 6 +-- .../orchestrator/model/ApplicationApiImplTest.java | 44 +++++++++++----------- .../orchestrator/model/ClusterApiImplTest.java | 1 - .../orchestrator/policy/HostedVespaPolicyTest.java | 4 +- 6 files changed, 32 insertions(+), 35 deletions(-) (limited to 'orchestrator/src') diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java index a6353e39610..a8734dd67e5 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java @@ -36,6 +36,6 @@ public interface ApplicationApi { } List getStorageNodesInGroupInClusterOrder(); - List getUpStorageNodesInGroupInClusterOrder(); + List getNoRemarksStorageNodesInGroupInClusterOrder(); List getSuspendedStorageNodesInGroupInReverseClusterOrder(); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java index 29307b36f4b..efbf9ff7981 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java @@ -10,12 +10,11 @@ import com.yahoo.vespa.orchestrator.OrchestratorContext; import com.yahoo.vespa.orchestrator.OrchestratorUtil; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; +import com.yahoo.vespa.orchestrator.status.ApplicationLock; import com.yahoo.vespa.orchestrator.status.HostInfos; import com.yahoo.vespa.orchestrator.status.HostStatus; -import com.yahoo.vespa.orchestrator.status.ApplicationLock; import java.time.Clock; -import java.util.Collection; import java.util.Comparator; import java.util.HashSet; import java.util.List; @@ -24,8 +23,6 @@ import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collectors; -import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostsUsedByApplicationInstance; - /** * @author hakonhall */ @@ -87,11 +84,12 @@ public class ApplicationApiImpl implements ApplicationApi { } @Override - public List getUpStorageNodesInGroupInClusterOrder() { + public List getNoRemarksStorageNodesInGroupInClusterOrder() { return clusterInOrder.stream() - .map(ClusterApi::upStorageNodeInGroup) + .map(ClusterApi::storageNodeInGroup) .filter(Optional::isPresent) .map(Optional::get) + .filter(x -> hostInfos.getOrNoRemarks(x.hostName()).status() == HostStatus.NO_REMARKS) .collect(Collectors.toList()); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java index 8090a4e95c4..d9fc2a989de 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java @@ -48,9 +48,9 @@ public class HostedVespaPolicy implements Policy { suspensionReasons.mergeWith(clusterPolicy.verifyGroupGoingDownIsFine(cluster)); } - // Ask Cluster Controller to set UP storage nodes in maintenance. - // These storage nodes are guaranteed to be NO_REMARKS - for (StorageNode storageNode : application.getUpStorageNodesInGroupInClusterOrder()) { + // Ask Cluster Controller to set storage nodes in maintenance, unless the node is already allowed + // to be down (or permanently down) in case they are guaranteed to be in maintenance already. + for (StorageNode storageNode : application.getNoRemarksStorageNodesInGroupInClusterOrder()) { storageNode.setNodeState(context, ClusterControllerNodeState.MAINTENANCE); } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java index 314c21f5aae..c3e53b2f340 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java @@ -129,31 +129,31 @@ public class ApplicationApiImplTest { ) )); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1), hostName1); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName2), hostName2); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName3)); // host3 is DOWN - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName4), hostName4); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName5)); // not a storage cluster + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1), hostName1); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName2), hostName2); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName3), hostName3); // host3 is DOWN + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName4), hostName4); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName5)); // not a storage cluster - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1, hostName3), hostName1); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1, hostName3), hostName3, hostName1); // For the node group (host1, host4), they both have an up storage node (service instance) // with clusters (cluster-3, cluster-1) respectively, and so the order of the hosts are reversed // (host4, host1) when sorted by the clusters. - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1, hostName4), hostName4, hostName1); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1, hostName4), hostName4, hostName1); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi( + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi( applicationInstance, hostName1, hostName4, hostName5), hostName4, hostName1); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi( - applicationInstance, hostName1, hostName4, hostName5, hostName6), hostName4, hostName1); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi( + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi( + applicationInstance, hostName1, hostName4, hostName5, hostName6), hostName4, hostName6, hostName1); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi( applicationInstance, hostName1, hostName4, hostName5, hostName7), hostName4, hostName7, hostName1); } - private void verifyUpStorageNodesInOrder(ScopedApplicationApi scopedApi, - HostName... expectedHostNames) { + private void verifyNoRemarksStorageNodesInOrder(ScopedApplicationApi scopedApi, + HostName... expectedHostNames) { try (scopedApi) { - List upStorageNodes = scopedApi.applicationApi().getUpStorageNodesInGroupInClusterOrder().stream() + List upStorageNodes = scopedApi.applicationApi().getNoRemarksStorageNodesInGroupInClusterOrder().stream() .map(storageNode -> storageNode.hostName()) .collect(Collectors.toList()); assertEquals(Arrays.asList(expectedHostNames), upStorageNodes); @@ -162,15 +162,15 @@ public class ApplicationApiImplTest { @Test public void testUpConditionOfStorageNode() { - verifyUpConditionWith(HostStatus.NO_REMARKS, ServiceStatus.UP, true); - verifyUpConditionWith(HostStatus.NO_REMARKS, ServiceStatus.NOT_CHECKED, true); - verifyUpConditionWith(HostStatus.NO_REMARKS, ServiceStatus.DOWN, false); - verifyUpConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.UP, false); - verifyUpConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.NOT_CHECKED, false); - verifyUpConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.DOWN, false); + verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.UP, true); + verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.NOT_CHECKED, true); + verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.DOWN, true); + verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.UP, false); + verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.NOT_CHECKED, false); + verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.DOWN, false); } - private void verifyUpConditionWith(HostStatus hostStatus, ServiceStatus serviceStatus, boolean expectUp) { + private void verifyNoRemarksConditionWith(HostStatus hostStatus, ServiceStatus serviceStatus, boolean expectUp) { HostName hostName1 = new HostName("host1"); ApplicationInstance applicationInstance = modelUtils.createApplicationInstance(Arrays.asList( @@ -187,7 +187,7 @@ public class ApplicationApiImplTest { List upStorageNodes = expectUp ? Arrays.asList(hostName1) : new ArrayList<>(); List actualStorageNodes = scopedApi.applicationApi() - .getUpStorageNodesInGroupInClusterOrder() + .getNoRemarksStorageNodesInGroupInClusterOrder() .stream() .map(storageNode -> storageNode.hostName()) .collect(Collectors.toList()); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java index d40ef0fb283..6bf46052933 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java @@ -309,7 +309,6 @@ public class ClusterApiImplTest { assertTrue(clusterApi.isStorageCluster()); assertEquals(Optional.of(hostName1), clusterApi.storageNodeInGroup().map(storageNode -> storageNode.hostName())); - assertEquals(Optional.of(hostName1), clusterApi.upStorageNodeInGroup().map(storageNode -> storageNode.hostName())); } private ClusterApiImpl makeConfigClusterApi(int clusterSize, ServiceStatus first, ServiceStatus... rest) { diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java index 6f34817930c..385f7b238af 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java @@ -68,7 +68,7 @@ public class HostedVespaPolicyTest { when(storageNode1.hostName()).thenReturn(hostName3); List upStorageNodes = Arrays.asList(storageNode1, storageNode3); - when(applicationApi.getUpStorageNodesInGroupInClusterOrder()).thenReturn(upStorageNodes); + when(applicationApi.getNoRemarksStorageNodesInGroupInClusterOrder()).thenReturn(upStorageNodes); // setHostState List noRemarksHostNames = Arrays.asList(hostName1, hostName2, hostName3); @@ -84,7 +84,7 @@ public class HostedVespaPolicyTest { order.verify(clusterPolicy).verifyGroupGoingDownIsFine(clusterApi2); order.verify(clusterPolicy).verifyGroupGoingDownIsFine(clusterApi3); - order.verify(applicationApi).getUpStorageNodesInGroupInClusterOrder(); + order.verify(applicationApi).getNoRemarksStorageNodesInGroupInClusterOrder(); order.verify(storageNode1).setNodeState(context, ClusterControllerNodeState.MAINTENANCE); order.verify(storageNode3).setNodeState(context, ClusterControllerNodeState.MAINTENANCE); -- cgit v1.2.3