diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-05-14 14:41:40 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2021-05-14 14:41:40 +0200 |
commit | 64c5c6e623c653e546aeb41c70e81c09cc3eaa3f (patch) | |
tree | b81a09ea9f67b58785d18e7a6960cb1041768be3 /orchestrator | |
parent | e2aea77b7e859be5761c779b952a85148ef61f4a (diff) |
Send setNodeState to CC even if storage node is down
Note that this will be wrong if there is a single CC colocated with a content
node, i.e. there's only one content node in the cluster, and the CC is down.
In this case an operator must intervene to bring up the CC.
Diffstat (limited to 'orchestrator')
6 files changed, 32 insertions, 35 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java index a6353e39610..a8734dd67e5 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApi.java @@ -36,6 +36,6 @@ public interface ApplicationApi { } List<StorageNode> getStorageNodesInGroupInClusterOrder(); - List<StorageNode> getUpStorageNodesInGroupInClusterOrder(); + List<StorageNode> getNoRemarksStorageNodesInGroupInClusterOrder(); List<StorageNode> getSuspendedStorageNodesInGroupInReverseClusterOrder(); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java index 29307b36f4b..efbf9ff7981 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImpl.java @@ -10,12 +10,11 @@ import com.yahoo.vespa.orchestrator.OrchestratorContext; import com.yahoo.vespa.orchestrator.OrchestratorUtil; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus; +import com.yahoo.vespa.orchestrator.status.ApplicationLock; import com.yahoo.vespa.orchestrator.status.HostInfos; import com.yahoo.vespa.orchestrator.status.HostStatus; -import com.yahoo.vespa.orchestrator.status.ApplicationLock; import java.time.Clock; -import java.util.Collection; import java.util.Comparator; import java.util.HashSet; import java.util.List; @@ -24,8 +23,6 @@ import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collectors; -import static com.yahoo.vespa.orchestrator.OrchestratorUtil.getHostsUsedByApplicationInstance; - /** * @author hakonhall */ @@ -87,11 +84,12 @@ public class ApplicationApiImpl implements ApplicationApi { } @Override - public List<StorageNode> getUpStorageNodesInGroupInClusterOrder() { + public List<StorageNode> getNoRemarksStorageNodesInGroupInClusterOrder() { return clusterInOrder.stream() - .map(ClusterApi::upStorageNodeInGroup) + .map(ClusterApi::storageNodeInGroup) .filter(Optional::isPresent) .map(Optional::get) + .filter(x -> hostInfos.getOrNoRemarks(x.hostName()).status() == HostStatus.NO_REMARKS) .collect(Collectors.toList()); } diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java index 8090a4e95c4..d9fc2a989de 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicy.java @@ -48,9 +48,9 @@ public class HostedVespaPolicy implements Policy { suspensionReasons.mergeWith(clusterPolicy.verifyGroupGoingDownIsFine(cluster)); } - // Ask Cluster Controller to set UP storage nodes in maintenance. - // These storage nodes are guaranteed to be NO_REMARKS - for (StorageNode storageNode : application.getUpStorageNodesInGroupInClusterOrder()) { + // Ask Cluster Controller to set storage nodes in maintenance, unless the node is already allowed + // to be down (or permanently down) in case they are guaranteed to be in maintenance already. + for (StorageNode storageNode : application.getNoRemarksStorageNodesInGroupInClusterOrder()) { storageNode.setNodeState(context, ClusterControllerNodeState.MAINTENANCE); } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java index 314c21f5aae..c3e53b2f340 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ApplicationApiImplTest.java @@ -129,31 +129,31 @@ public class ApplicationApiImplTest { ) )); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1), hostName1); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName2), hostName2); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName3)); // host3 is DOWN - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName4), hostName4); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName5)); // not a storage cluster + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1), hostName1); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName2), hostName2); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName3), hostName3); // host3 is DOWN + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName4), hostName4); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName5)); // not a storage cluster - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1, hostName3), hostName1); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1, hostName3), hostName3, hostName1); // For the node group (host1, host4), they both have an up storage node (service instance) // with clusters (cluster-3, cluster-1) respectively, and so the order of the hosts are reversed // (host4, host1) when sorted by the clusters. - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1, hostName4), hostName4, hostName1); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi(applicationInstance, hostName1, hostName4), hostName4, hostName1); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi( + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi( applicationInstance, hostName1, hostName4, hostName5), hostName4, hostName1); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi( - applicationInstance, hostName1, hostName4, hostName5, hostName6), hostName4, hostName1); - verifyUpStorageNodesInOrder(modelUtils.createScopedApplicationApi( + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi( + applicationInstance, hostName1, hostName4, hostName5, hostName6), hostName4, hostName6, hostName1); + verifyNoRemarksStorageNodesInOrder(modelUtils.createScopedApplicationApi( applicationInstance, hostName1, hostName4, hostName5, hostName7), hostName4, hostName7, hostName1); } - private void verifyUpStorageNodesInOrder(ScopedApplicationApi scopedApi, - HostName... expectedHostNames) { + private void verifyNoRemarksStorageNodesInOrder(ScopedApplicationApi scopedApi, + HostName... expectedHostNames) { try (scopedApi) { - List<HostName> upStorageNodes = scopedApi.applicationApi().getUpStorageNodesInGroupInClusterOrder().stream() + List<HostName> upStorageNodes = scopedApi.applicationApi().getNoRemarksStorageNodesInGroupInClusterOrder().stream() .map(storageNode -> storageNode.hostName()) .collect(Collectors.toList()); assertEquals(Arrays.asList(expectedHostNames), upStorageNodes); @@ -162,15 +162,15 @@ public class ApplicationApiImplTest { @Test public void testUpConditionOfStorageNode() { - verifyUpConditionWith(HostStatus.NO_REMARKS, ServiceStatus.UP, true); - verifyUpConditionWith(HostStatus.NO_REMARKS, ServiceStatus.NOT_CHECKED, true); - verifyUpConditionWith(HostStatus.NO_REMARKS, ServiceStatus.DOWN, false); - verifyUpConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.UP, false); - verifyUpConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.NOT_CHECKED, false); - verifyUpConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.DOWN, false); + verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.UP, true); + verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.NOT_CHECKED, true); + verifyNoRemarksConditionWith(HostStatus.NO_REMARKS, ServiceStatus.DOWN, true); + verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.UP, false); + verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.NOT_CHECKED, false); + verifyNoRemarksConditionWith(HostStatus.ALLOWED_TO_BE_DOWN, ServiceStatus.DOWN, false); } - private void verifyUpConditionWith(HostStatus hostStatus, ServiceStatus serviceStatus, boolean expectUp) { + private void verifyNoRemarksConditionWith(HostStatus hostStatus, ServiceStatus serviceStatus, boolean expectUp) { HostName hostName1 = new HostName("host1"); ApplicationInstance applicationInstance = modelUtils.createApplicationInstance(Arrays.asList( @@ -187,7 +187,7 @@ public class ApplicationApiImplTest { List<HostName> upStorageNodes = expectUp ? Arrays.asList(hostName1) : new ArrayList<>(); List<HostName> actualStorageNodes = scopedApi.applicationApi() - .getUpStorageNodesInGroupInClusterOrder() + .getNoRemarksStorageNodesInGroupInClusterOrder() .stream() .map(storageNode -> storageNode.hostName()) .collect(Collectors.toList()); diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java index d40ef0fb283..6bf46052933 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/ClusterApiImplTest.java @@ -309,7 +309,6 @@ public class ClusterApiImplTest { assertTrue(clusterApi.isStorageCluster()); assertEquals(Optional.of(hostName1), clusterApi.storageNodeInGroup().map(storageNode -> storageNode.hostName())); - assertEquals(Optional.of(hostName1), clusterApi.upStorageNodeInGroup().map(storageNode -> storageNode.hostName())); } private ClusterApiImpl makeConfigClusterApi(int clusterSize, ServiceStatus first, ServiceStatus... rest) { diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java index 6f34817930c..385f7b238af 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/policy/HostedVespaPolicyTest.java @@ -68,7 +68,7 @@ public class HostedVespaPolicyTest { when(storageNode1.hostName()).thenReturn(hostName3); List<StorageNode> upStorageNodes = Arrays.asList(storageNode1, storageNode3); - when(applicationApi.getUpStorageNodesInGroupInClusterOrder()).thenReturn(upStorageNodes); + when(applicationApi.getNoRemarksStorageNodesInGroupInClusterOrder()).thenReturn(upStorageNodes); // setHostState List<HostName> noRemarksHostNames = Arrays.asList(hostName1, hostName2, hostName3); @@ -84,7 +84,7 @@ public class HostedVespaPolicyTest { order.verify(clusterPolicy).verifyGroupGoingDownIsFine(clusterApi2); order.verify(clusterPolicy).verifyGroupGoingDownIsFine(clusterApi3); - order.verify(applicationApi).getUpStorageNodesInGroupInClusterOrder(); + order.verify(applicationApi).getNoRemarksStorageNodesInGroupInClusterOrder(); order.verify(storageNode1).setNodeState(context, ClusterControllerNodeState.MAINTENANCE); order.verify(storageNode3).setNodeState(context, ClusterControllerNodeState.MAINTENANCE); |