diff options
author | Jon Marius Venstad <venstad@gmail.com> | 2021-02-19 21:16:44 +0100 |
---|---|---|
committer | Jon Marius Venstad <venstad@gmail.com> | 2021-02-19 21:16:44 +0100 |
commit | ff6633bd0b6e04402505c79916dbc4498bb7d16d (patch) | |
tree | 73082e41d5c6901ab5e398494b48020284517712 /orchestrator | |
parent | bce7902f8cbadb805b38a5e5b39640ed092d3d71 (diff) |
Implement isQuiescent by probing for M for all content services
Diffstat (limited to 'orchestrator')
7 files changed, 146 insertions, 46 deletions
diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java index 35a8e3578b4..eba7df8c6ac 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/OrchestratorImpl.java @@ -34,6 +34,7 @@ import com.yahoo.vespa.orchestrator.status.HostInfos; import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.orchestrator.status.StatusService; import com.yahoo.vespa.service.monitor.ServiceMonitor; +import com.yahoo.yolean.Exceptions; import java.io.IOException; import java.time.Clock; @@ -44,8 +45,12 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Function; +import java.util.logging.Level; import java.util.logging.Logger; -import java.util.stream.Collectors; + +import static com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState.MAINTENANCE; +import static java.util.stream.Collectors.toList; +import static java.util.stream.Collectors.toSet; /** * @author oyving @@ -113,7 +118,7 @@ public class OrchestratorImpl implements Orchestrator { .serviceClusters().stream() .flatMap(cluster -> cluster.serviceInstances().stream()) .filter(serviceInstance -> hostName.equals(serviceInstance.hostName())) - .collect(Collectors.toList()); + .collect(toList()); HostInfo hostInfo = statusService.getHostInfo(applicationInstance.reference(), hostName); @@ -215,7 +220,7 @@ public class OrchestratorImpl implements Orchestrator { * Suspend normal operations for a group of nodes in the same application. * * @param nodeGroup The group of nodes in an application. - * @throws HostStateChangeDeniedException if the request cannot be meet due to policy constraints. + * @throws HostStateChangeDeniedException if the request cannot be met due to policy constraints. */ void suspendGroup(OrchestratorContext context, NodeGroup nodeGroup) throws HostStateChangeDeniedException { ApplicationInstanceReference applicationReference = nodeGroup.getApplicationReference(); @@ -244,7 +249,7 @@ public class OrchestratorImpl implements Orchestrator { @Override public Set<ApplicationId> getAllSuspendedApplications() { Set<ApplicationInstanceReference> refSet = statusService.getAllSuspendedApplications(); - return refSet.stream().map(OrchestratorUtil::toApplicationId).collect(Collectors.toSet()); + return refSet.stream().map(OrchestratorUtil::toApplicationId).collect(toSet()); } @Override @@ -337,7 +342,7 @@ public class OrchestratorImpl implements Orchestrator { return nodeGroupMap.values().stream() .sorted(OrchestratorImpl::compareNodeGroupsForSuspend) - .collect(Collectors.toList()); + .collect(toList()); } private static int compareNodeGroupsForSuspend(NodeGroup leftNodeGroup, NodeGroup rightNodeGroup) { @@ -375,13 +380,50 @@ public class OrchestratorImpl implements Orchestrator { // If the clustercontroller throws an error the nodes will be marked as allowed to be down // and be set back up on next resume invocation. - setClusterStateInController(context.createSubcontextWithinLock(), application, ClusterControllerNodeState.MAINTENANCE); + setClusterStateInController(context.createSubcontextWithinLock(), application, MAINTENANCE); } lock.setApplicationInstanceStatus(status); } } + @Override + public boolean isQuiescent(ApplicationId id) { + try { + ApplicationInstance application = serviceMonitor.getApplication(OrchestratorUtil.toApplicationInstanceReference(id, serviceMonitor)) + .orElseThrow(ApplicationIdNotFoundException::new); + + List<ServiceCluster> contentClusters = application.serviceClusters().stream() + .filter(VespaModelUtil::isContent) + .collect(toList()); + + // For all content clusters, probe whether maintenance is OK. + OrchestratorContext context = OrchestratorContext.createContextForSingleAppOp(clock) + .createSubcontextForSingleAppOp(true); // probe + for (ServiceCluster cluster : contentClusters) { + List<HostName> clusterControllers = VespaModelUtil.getClusterControllerInstancesInOrder(application, cluster.clusterId()); + ClusterControllerClient client = clusterControllerClientFactory.createClient(clusterControllers, cluster.clusterId().s()); + for (ServiceInstance service : cluster.serviceInstances()) { + try { + ClusterControllerStateResponse response = client.setNodeState(context, + VespaModelUtil.getStorageNodeIndex(service.configId()), + MAINTENANCE); + if ( ! response.wasModified) + return false; + } + catch (Exception e) { + log.log(Level.INFO, "Failed probing for permission to set " + service + " in MAINTENANCE: " + Exceptions.toMessageString(e)); + return false; + } + } + } + return true; + } + catch (ApplicationIdNotFoundException ignored) { + return false; + } + } + private void setClusterStateInController(OrchestratorContext context, ApplicationInstance application, ClusterControllerNodeState state) @@ -390,7 +432,7 @@ public class OrchestratorImpl implements Orchestrator { Set<ClusterId> contentClusterIds = application.serviceClusters().stream() .filter(VespaModelUtil::isContent) .map(ServiceCluster::clusterId) - .collect(Collectors.toSet()); + .collect(toSet()); // For all content clusters set in maintenance for (ClusterId clusterId : contentClusterIds) { diff --git a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java index f3bfb30aa5a..18d8a3c42cb 100644 --- a/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java +++ b/orchestrator/src/main/java/com/yahoo/vespa/orchestrator/model/VespaModelUtil.java @@ -74,19 +74,20 @@ public class VespaModelUtil { * @return The set of all Cluster Controller service instances for the application. */ public static List<HostName> getClusterControllerInstancesInOrder(ApplicationInstance application, - ClusterId contentClusterId) + ClusterId contentClusterId) { Set<ServiceCluster> controllerClusters = getClusterControllerServiceClusters(application); Collection<ServiceCluster> controllerClustersForContentCluster = filter(controllerClusters, contentClusterId); + // TODO jonmv: the exception will be the new norm here. Set<ServiceInstance> clusterControllerInstances; if (controllerClustersForContentCluster.size() == 1) { clusterControllerInstances = first(controllerClustersForContentCluster).serviceInstances(); } else if (controllerClusters.size() == 1) { ServiceCluster cluster = first(controllerClusters); - log.warning("No cluster controller cluster for content cluster " + contentClusterId - + ", using the only cluster controller cluster available: " + cluster.clusterId()); + log.info("No cluster controller cluster for content cluster " + contentClusterId + + ", using the only cluster controller cluster available: " + cluster.clusterId()); clusterControllerInstances = cluster.serviceInstances(); } else { diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java index 501a09f78ff..62d5e744938 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/DummyServiceMonitor.java @@ -45,11 +45,11 @@ public class DummyServiceMonitor implements ServiceMonitor, AntiServiceMonitor { apps.add(new ApplicationInstance( new TenantId("test-tenant-id"), new ApplicationInstanceId("application:prod:utopia-1:instance"), - TestUtil.makeServiceClusterSet( + Set.of( new ServiceCluster( new ClusterId("test-cluster-id-1"), new ServiceType("storagenode"), - TestUtil.makeServiceInstanceSet( + Set.of( new ServiceInstance( new ConfigId("storage/storage/1"), TEST1_HOST_NAME, @@ -61,7 +61,7 @@ public class DummyServiceMonitor implements ServiceMonitor, AntiServiceMonitor { new ServiceCluster( new ClusterId("clustercontroller"), new ServiceType("container-clustercontroller"), - TestUtil.makeServiceInstanceSet( + Set.of( new ServiceInstance( new ConfigId("clustercontroller-1"), new HostName("myclustercontroller.hostname.tld"), @@ -73,11 +73,11 @@ public class DummyServiceMonitor implements ServiceMonitor, AntiServiceMonitor { apps.add(new ApplicationInstance( new TenantId("mediasearch"), new ApplicationInstanceId("imagesearch:prod:utopia-1:default"), - TestUtil.makeServiceClusterSet( + Set.of( new ServiceCluster( new ClusterId("image"), new ServiceType("storagenode"), - TestUtil.makeServiceInstanceSet( + Set.of( new ServiceInstance( new ConfigId("storage/storage/3"), TEST3_HOST_NAME, @@ -89,7 +89,7 @@ public class DummyServiceMonitor implements ServiceMonitor, AntiServiceMonitor { new ServiceCluster( new ClusterId("clustercontroller"), new ServiceType("container-clustercontroller"), - TestUtil.makeServiceInstanceSet( + Set.of( new ServiceInstance( new ConfigId("clustercontroller-1"), new HostName("myclustercontroller2.hostname.tld"), @@ -101,11 +101,11 @@ public class DummyServiceMonitor implements ServiceMonitor, AntiServiceMonitor { apps.add(new ApplicationInstance( new TenantId("tenant-id-3"), new ApplicationInstanceId("application-instance-3:prod:utopia-1:default"), - TestUtil.makeServiceClusterSet( + Set.of( new ServiceCluster( new ClusterId("cluster-id-3"), new ServiceType("storagenode"), - TestUtil.makeServiceInstanceSet( + Set.of( new ServiceInstance( new ConfigId("storage/storage/1"), TEST6_HOST_NAME, @@ -117,7 +117,7 @@ public class DummyServiceMonitor implements ServiceMonitor, AntiServiceMonitor { new ServiceCluster( new ClusterId("clustercontroller"), new ServiceType("container-clustercontroller"), - TestUtil.makeServiceInstanceSet( + Set.of( new ServiceInstance( new ConfigId("clustercontroller-1"), new HostName("myclustercontroller3.hostname.tld"), diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java index 22aa578cb55..810bc87964c 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/OrchestratorImplTest.java @@ -18,8 +18,11 @@ import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.curator.mock.MockCurator; import com.yahoo.vespa.flags.InMemoryFlagSource; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerClient; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactory; import com.yahoo.vespa.orchestrator.controller.ClusterControllerClientFactoryMock; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerNodeState; +import com.yahoo.vespa.orchestrator.controller.ClusterControllerStateResponse; import com.yahoo.vespa.orchestrator.model.ApplicationApiFactory; import com.yahoo.vespa.orchestrator.model.NodeGroup; import com.yahoo.vespa.orchestrator.policy.BatchHostStateChangeDeniedException; @@ -399,6 +402,77 @@ public class OrchestratorImplTest { } @Test + public void testIsQuiescent() throws Exception { + StatusService statusService = new ZkStatusService(new MockCurator(), + mock(Metric.class), + new TestTimer(), + new DummyAntiServiceMonitor()); + + HostName hostName = new HostName("my.host"); + HostName ccHost = new HostName("cc.host"); + TenantId tenantId = new TenantId("tenant"); + ApplicationInstanceId applicationInstanceId = new ApplicationInstanceId("app:env:region:instance"); + ApplicationInstanceReference reference = new ApplicationInstanceReference(tenantId, applicationInstanceId); + ApplicationId id = ApplicationId.from("tenant", "app", "instance"); + + ApplicationInstance applicationInstance = + new ApplicationInstance(tenantId, + applicationInstanceId, + Set.of(new ServiceCluster(new ClusterId("foo"), + ServiceType.STORAGE, + Set.of(new ServiceInstance(new ConfigId("foo/storage/1"), + hostName, + ServiceStatus.UP), + new ServiceInstance(new ConfigId("foo/storage/2"), + hostName, + ServiceStatus.UP))), + new ServiceCluster(new ClusterId("bar"), + ServiceType.SEARCH, + Set.of(new ServiceInstance(new ConfigId("bar/storage/0"), + hostName, + ServiceStatus.UP), + new ServiceInstance(new ConfigId("bar/storage/3"), + hostName, + ServiceStatus.UP))), + new ServiceCluster(new ClusterId("cluster-controllers"), + ServiceType.CLUSTER_CONTROLLER, + Set.of(new ServiceInstance(new ConfigId("what/standalone/cluster-controllers/0"), + ccHost, + ServiceStatus.UP))))); + + ServiceMonitor serviceMonitor = () -> new ServiceModel(Map.of(reference, applicationInstance)); + + ClusterControllerClientFactory clusterControllerClientFactory = mock(ClusterControllerClientFactory.class); + ClusterControllerClient fooClient = mock(ClusterControllerClient.class); + ClusterControllerClient barClient = mock(ClusterControllerClient.class); + when(clusterControllerClientFactory.createClient(List.of(ccHost), "foo")).thenReturn(fooClient); + when(clusterControllerClientFactory.createClient(List.of(ccHost), "bar")).thenReturn(barClient); + + orchestrator = new OrchestratorImpl(new HostedVespaPolicy(new HostedVespaClusterPolicy(flagSource), clusterControllerClientFactory, applicationApiFactory), + clusterControllerClientFactory, + statusService, + serviceMonitor, + 0, + new ManualClock(), + applicationApiFactory, + flagSource); + + ClusterControllerStateResponse accepted = new ClusterControllerStateResponse(true, "OK"); + ClusterControllerStateResponse denied = new ClusterControllerStateResponse(false, "NO"); + when(fooClient.setNodeState(any(), eq(1), eq(ClusterControllerNodeState.MAINTENANCE))).thenReturn(accepted); + when(fooClient.setNodeState(any(), eq(2), eq(ClusterControllerNodeState.MAINTENANCE))).thenReturn(accepted); + when(barClient.setNodeState(any(), eq(0), eq(ClusterControllerNodeState.MAINTENANCE))).thenReturn(accepted); + when(barClient.setNodeState(any(), eq(3), eq(ClusterControllerNodeState.MAINTENANCE))).thenReturn(accepted); + assertTrue(orchestrator.isQuiescent(id)); + + when(fooClient.setNodeState(any(), eq(2), eq(ClusterControllerNodeState.MAINTENANCE))).thenReturn(denied); + assertFalse(orchestrator.isQuiescent(id)); + + when(fooClient.setNodeState(any(), eq(2), eq(ClusterControllerNodeState.MAINTENANCE))).thenThrow(new RuntimeException()); + assertFalse(orchestrator.isQuiescent(id)); + } + + @Test public void testGetHost() throws Exception { ClusterControllerClientFactory clusterControllerClientFactory = new ClusterControllerClientFactoryMock(); StatusService statusService = new ZkStatusService( diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java index 9b9dc206556..dd27f899423 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/TestUtil.java @@ -2,12 +2,6 @@ package com.yahoo.vespa.orchestrator; import com.yahoo.vespa.applicationmodel.ConfigId; -import com.yahoo.vespa.applicationmodel.ServiceCluster; -import com.yahoo.vespa.applicationmodel.ServiceInstance; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; /** * Utility methods for creating test setups. @@ -15,17 +9,6 @@ import java.util.Set; * @author bakksjo */ public class TestUtil { - @SafeVarargs - public static Set<ServiceInstance> makeServiceInstanceSet( - final ServiceInstance... serviceInstances) { - return new HashSet<>(Arrays.asList(serviceInstances)); - } - - @SafeVarargs - public static Set<ServiceCluster> makeServiceClusterSet( - final ServiceCluster... serviceClusters) { - return new HashSet<>(Arrays.asList(serviceClusters)); - } public static ConfigId storageNodeConfigId(String contentClusterName, int index) { return new ConfigId(contentClusterName + "/storage/" + index); @@ -34,4 +17,5 @@ public class TestUtil { public static ConfigId clusterControllerConfigId(String contentClusterName, int index) { return new ConfigId(contentClusterName + "/standalone/" + contentClusterName + "-controllers/" + index); } + } diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java index d4199da3a74..c34dddfa823 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/model/VespaModelUtilTest.java @@ -18,9 +18,8 @@ import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; -import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet; -import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceInstanceSet; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -50,7 +49,7 @@ public class VespaModelUtilTest { new ServiceCluster( new ClusterId(CONTENT_CLUSTER_ID.s() + "-controller"), ServiceType.CLUSTER_CONTROLLER, - makeServiceInstanceSet(controller1, controller0)); + Set.of(controller1, controller0)); // Distributor Service Cluster @@ -64,7 +63,7 @@ public class VespaModelUtilTest { new ServiceCluster( CONTENT_CLUSTER_ID, ServiceType.DISTRIBUTOR, - makeServiceInstanceSet(distributor0)); + Set.of(distributor0)); // Storage Node Service Cluster @@ -78,7 +77,7 @@ public class VespaModelUtilTest { new ServiceCluster( CONTENT_CLUSTER_ID, ServiceType.STORAGE, - makeServiceInstanceSet(storage0)); + Set.of(storage0)); // Secondary Distributor Service Cluster @@ -92,7 +91,7 @@ public class VespaModelUtilTest { new ServiceCluster( SECONDARY_CONTENT_CLUSTER_ID, ServiceType.DISTRIBUTOR, - makeServiceInstanceSet(secondaryDistributor0)); + Set.of(secondaryDistributor0)); // Secondary Storage Node Service Cluster @@ -106,7 +105,7 @@ public class VespaModelUtilTest { new ServiceCluster( SECONDARY_CONTENT_CLUSTER_ID, ServiceType.STORAGE, - makeServiceInstanceSet(secondaryStorage0)); + Set.of(secondaryStorage0)); // The Application Instance @@ -114,7 +113,7 @@ public class VespaModelUtilTest { new ApplicationInstance( new TenantId("tenant-0"), new ApplicationInstanceId("application-0"), - makeServiceClusterSet( + Set.of( controllerCluster, distributorCluster, storageCluster, diff --git a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java index 46346e7de7f..7f4ef1a336c 100644 --- a/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java +++ b/orchestrator/src/test/java/com/yahoo/vespa/orchestrator/resources/HostResourceTest.java @@ -61,8 +61,8 @@ import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; -import static com.yahoo.vespa.orchestrator.TestUtil.makeServiceClusterSet; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -92,7 +92,7 @@ public class HostResourceTest { new ApplicationInstance( TENANT_ID, APPLICATION_INSTANCE_ID, - makeServiceClusterSet()))); + Set.of()))); } private final InMemoryFlagSource flagSource = new InMemoryFlagSource(); |