From feeff798cfdae712c99da9343cf9795cb22639dd Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Tue, 16 Mar 2021 09:16:52 +0100 Subject: Add test of cfg reprovisioning with RetiredExpirer --- .../provision/maintenance/RetiredExpirer.java | 2 +- .../hosted/provision/testutils/MockDuperModel.java | 9 ++ .../provision/maintenance/RetiredExpirerTest.java | 120 ++++++++++++++++++++- .../provision/provisioning/ProvisioningTester.java | 2 +- 4 files changed, 129 insertions(+), 4 deletions(-) (limited to 'node-repository/src') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index c13c2f9731e..6941fa59b8c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -106,7 +106,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { // and PERMANENTLY_DOWN for cfg2 is cleaned up. // // If the RetiredExpirer on cfg3 now runs before its InfrastructureProvisioner, then - // a. But the duper model still contains cfg2 + // a. The duper model still contains cfg2 // b. The service model still monitors cfg2 for health and it is UP // c. The Orchestrator has no host status (like PERMANENTLY_DOWN) for cfg2, // which is equivalent to NO_REMARKS diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java index e7ebf049e51..a57bcce7db4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockDuperModel.java @@ -26,6 +26,11 @@ public class MockDuperModel implements DuperModelInfraApi { public MockDuperModel() { } + public MockDuperModel support(InfraApplicationApi infraApplication) { + supportedInfraApps.put(infraApplication.getApplicationId(), infraApplication); + return this; + } + @Override public List getSupportedInfraApplications() { return new ArrayList<>(supportedInfraApps.values()); @@ -41,6 +46,10 @@ public class MockDuperModel implements DuperModelInfraApi { return activeApps.containsKey(applicationId); } + public List hostnamesOf(ApplicationId applicationId) { + return activeApps.getOrDefault(applicationId, List.of()); + } + @Override public void infraApplicationActivated(ApplicationId applicationId, List hostnames) { activeApps.put(applicationId, hostnames); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java index c1c6e5b6154..718facd477c 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirerTest.java @@ -1,39 +1,57 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; -import com.yahoo.config.provision.ActivationContext; +import com.yahoo.component.Version; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationName; -import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.InstanceName; import com.yahoo.config.provision.NodeResources; +import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.TenantName; import com.yahoo.test.ManualClock; +import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.IP; +import com.yahoo.vespa.hosted.provision.node.filter.NodeTypeFilter; +import com.yahoo.vespa.hosted.provision.provisioning.InfraDeployerImpl; import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner; import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester; import com.yahoo.vespa.hosted.provision.testutils.MockDeployer; +import com.yahoo.vespa.hosted.provision.testutils.MockDuperModel; +import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver; import com.yahoo.vespa.orchestrator.OrchestrationException; import com.yahoo.vespa.orchestrator.Orchestrator; +import com.yahoo.vespa.service.duper.ConfigServerApplication; import org.junit.Before; import org.junit.Test; import java.time.Duration; +import java.time.Instant; import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertTrue; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; /** * @author bratseth @@ -152,6 +170,104 @@ public class RetiredExpirerTest { assertFalse(node.allocation().get().membership().retired()); } + @Test + public void config_server_reprovisioning() throws OrchestrationException { + NodeList configServers = tester.makeConfigServers(3, "default", Version.emptyVersion); + var cfg1 = new HostName("cfg1"); + assertEquals(Set.of(cfg1.s(), "cfg2", "cfg3"), configServers.stream().map(Node::hostname).collect(Collectors.toSet())); + + var configServerApplication = new ConfigServerApplication(); + var duperModel = new MockDuperModel().support(configServerApplication); + InfraDeployerImpl infraDeployer = new InfraDeployerImpl(tester.nodeRepository(), tester.provisioner(), duperModel); + + var deployer = mock(Deployer.class); + when(deployer.deployFromLocalActive(eq(configServerApplication.getApplicationId()))) + .thenAnswer(invocation -> infraDeployer.getDeployment(configServerApplication.getApplicationId())); + + // Set wantToRetire on all 3 config servers + List wantToRetireNodes = tester.nodeRepository().nodes() + .retire(NodeTypeFilter.from(NodeType.config, null), Agent.operator, Instant.now()); + assertEquals(3, wantToRetireNodes.size()); + + // Redeploy to retire all 3 config servers + infraDeployer.activateAllSupportedInfraApplications(true); + + // Only 1 config server is allowed to retire at any given point in time + List retiredNodes = tester.nodeRepository().nodes().list(() -> {}).stream() + .filter(node -> node.allocation().map(allocation -> allocation.membership().retired()).orElse(false)) + .collect(Collectors.toList()); + assertEquals(1, retiredNodes.size()); + Node retiredNode = retiredNodes.get(0); + String retiredNodeHostname = retiredNode.hostname(); + + // Allow retiredNodeHostname to be removed + doThrow(new OrchestrationException("denied")).when(orchestrator).acquirePermissionToRemove(any()); + doNothing().when(orchestrator).acquirePermissionToRemove(eq(new HostName(retiredNodeHostname))); + + // RetiredExpirer should remove cfg1 from application + RetiredExpirer retiredExpirer = createRetiredExpirer(deployer); + retiredExpirer.run(); + var activeConfigServerHostnames = new HashSet<>(Set.of("cfg1", "cfg2", "cfg3")); + assertTrue(activeConfigServerHostnames.contains(retiredNodeHostname)); + activeConfigServerHostnames.remove(retiredNodeHostname); + assertEquals(activeConfigServerHostnames, configServerHostnames(duperModel)); + assertEquals(1, tester.nodeRepository().nodes().list(Node.State.inactive).nodeType(NodeType.config).size()); + assertEquals(2, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); + + // no changes while 1 cfg is inactive + retiredExpirer.run(); + assertEquals(activeConfigServerHostnames, configServerHostnames(duperModel)); + assertEquals(1, tester.nodeRepository().nodes().list(Node.State.inactive).nodeType(NodeType.config).size()); + assertEquals(2, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); + + // The node will eventually expire from inactive, and be removed by DynamicProvisioningMaintainer + // (depending on its host), and these events should not affect the 2 active config servers. + nodeRepository.nodes().deallocate(retiredNode, Agent.InactiveExpirer, "expired"); + retiredNode = tester.nodeRepository().nodes().list(Node.State.parked).nodeType(NodeType.config).asList().get(0); + nodeRepository.nodes().removeRecursively(retiredNode, true); + infraDeployer.activateAllSupportedInfraApplications(true); + retiredExpirer.run(); + assertEquals(activeConfigServerHostnames, configServerHostnames(duperModel)); + assertEquals(2, tester.nodeRepository().nodes().list().nodeType(NodeType.config).size()); + assertEquals(2, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); + + // Provision and ready new config server + MockNameResolver nameResolver = (MockNameResolver)tester.nodeRepository().nameResolver(); + String ipv4 = "127.0.1.4"; + nameResolver.addRecord(retiredNodeHostname, ipv4); + Node node = Node.create(retiredNodeHostname, new IP.Config(Set.of(ipv4), Set.of()), retiredNodeHostname, + tester.asFlavor("default", NodeType.config), NodeType.config).build(); + var nodes = List.of(node); + nodes = nodeRepository.nodes().addNodes(nodes, Agent.system); + nodes = nodeRepository.nodes().deallocate(nodes, Agent.system, getClass().getSimpleName()); + nodeRepository.nodes().setReady(nodes, Agent.system, getClass().getSimpleName()); + + // no changes while replacement config server is ready + retiredExpirer.run(); + assertEquals(activeConfigServerHostnames, configServerHostnames(duperModel)); + assertEquals(1, tester.nodeRepository().nodes().list(Node.State.ready).nodeType(NodeType.config).size()); + assertEquals(2, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); + + // Activate replacement config server + infraDeployer.activateAllSupportedInfraApplications(true); + assertEquals(3, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); + + // Another config server should now have retired + retiredExpirer.run(); + assertEquals(3, tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.config).size()); + var retiredNodes2 = tester.nodeRepository().nodes().list(() -> {}).stream() + .filter(n -> n.allocation().map(allocation -> allocation.membership().retired()).orElse(false)) + .collect(Collectors.toList()); + assertEquals(1, retiredNodes2.size()); + assertNotEquals(retiredNodeHostname, retiredNodes2.get(0)); + } + + private Set configServerHostnames(MockDuperModel duperModel) { + return duperModel.hostnamesOf(new ConfigServerApplication().getApplicationId()).stream() + .map(com.yahoo.config.provision.HostName::value) + .collect(Collectors.toSet()); + } + private void activate(ApplicationId applicationId, ClusterSpec cluster, int nodes, int groups) { Capacity capacity = Capacity.from(new ClusterResources(nodes, groups, nodeResources)); tester.activate(applicationId, tester.prepare(applicationId, cluster, capacity)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java index a30735df01c..3e71b7f5158 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTester.java @@ -490,7 +490,7 @@ public class ProvisioningTester { return nodeRepository.nodes().setReady(nodes, Agent.system, getClass().getSimpleName()); } - private Flavor asFlavor(String flavorString, NodeType type) { + public Flavor asFlavor(String flavorString, NodeType type) { Optional flavor = nodeFlavors.getFlavor(flavorString); if (flavor.isEmpty()) { // TODO: Remove the need for this by always adding hosts with a given capacity -- cgit v1.2.3