diff options
author | Valerij Fredriksen <valerijf@yahooinc.com> | 2022-10-24 11:55:28 +0200 |
---|---|---|
committer | Valerij Fredriksen <valerijf@yahooinc.com> | 2022-10-24 11:55:45 +0200 |
commit | 636a2c3824e4a12ec5d6050a2b308bfd17e6b0b8 (patch) | |
tree | 2b9268f80bf496619c910d2005c05c1815757472 /node-repository | |
parent | 904e79b6c0c52d8453698ddbad4f9c7a24c5c925 (diff) |
Write provisioned hosts to ZK early
Diffstat (limited to 'node-repository')
5 files changed, 72 insertions, 57 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 87b5719cc19..61589078b69 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -284,13 +284,13 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { try { Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); List<Integer> provisionIndices = nodeRepository().database().readProvisionIndices(count); - List<Node> hosts = hostProvisioner.provisionHosts(provisionIndices, NodeType.host, nodeResources, - ApplicationId.defaultId(), osVersion, HostSharing.shared, - Optional.empty(), Optional.empty()) - .stream() - .map(ProvisionedHost::generateHost) - .collect(Collectors.toList()); - nodeRepository().nodes().addNodes(hosts, Agent.DynamicProvisioningMaintainer); + List<Node> hosts = new ArrayList<>(); + hostProvisioner.provisionHosts(provisionIndices, NodeType.host, nodeResources, ApplicationId.defaultId(), + osVersion, HostSharing.shared, Optional.empty(), Optional.empty(), + provisionedHosts -> { + hosts.addAll(provisionedHosts.stream().map(ProvisionedHost::generateHost).toList()); + nodeRepository().nodes().addNodes(hosts, Agent.DynamicProvisioningMaintainer); + }); return hosts; } catch (NodeAllocationException | IllegalArgumentException | IllegalStateException e) { throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage(), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java index 35f04683157..e1657ec3358 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java @@ -15,8 +15,10 @@ import com.yahoo.vespa.hosted.provision.NodesAndHosts; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing; +import java.util.ArrayList; import java.util.List; import java.util.Optional; +import java.util.function.Consumer; import java.util.function.Supplier; import java.util.logging.Level; import java.util.logging.Logger; @@ -102,33 +104,35 @@ public class GroupPreparer { NodeAllocation allocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes, indices::next, wantedGroups, allNodesAndHosts); NodeType hostType = allocation.nodeType().hostType(); - if (canProvisionDynamically(hostType)) { + if (canProvisionDynamically(hostType) && allocation.hostDeficit().isPresent()) { HostSharing sharing = hostSharing(requestedNodes, hostType); Version osVersion = nodeRepository.osVersions().targetFor(hostType).orElse(Version.emptyVersion); - List<ProvisionedHost> provisionedHosts = allocation.hostDeficit() - .map(deficit -> hostProvisioner.get().provisionHosts(allocation.provisionIndices(deficit.count()), - hostType, - deficit.resources(), - application, - osVersion, - sharing, - Optional.of(cluster.type()), - requestedNodes.cloudAccount())) - .orElseGet(List::of); - - // At this point we have started provisioning of the hosts, the first priority is to make sure that - // the returned hosts are added to the node-repo so that they are tracked by the provision maintainers - List<Node> hosts = provisionedHosts.stream() - .map(ProvisionedHost::generateHost) - .collect(Collectors.toList()); - nodeRepository.nodes().addNodes(hosts, Agent.application); - - // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit - List<NodeCandidate> candidates = provisionedHosts.stream() - .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(), - host.generateHost())) - .collect(Collectors.toList()); - allocation.offer(candidates); + NodeAllocation.HostDeficit deficit = allocation.hostDeficit().get(); + + List<Node> hosts = new ArrayList<>(); + Consumer<List<ProvisionedHost>> provisionedHostsConsumer = provisionedHosts -> { + hosts.addAll(provisionedHosts.stream().map(ProvisionedHost::generateHost).toList()); + nodeRepository.nodes().addNodes(hosts, Agent.application); + + // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit + List<NodeCandidate> candidates = provisionedHosts.stream() + .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(), + host.generateHost())) + .collect(Collectors.toList()); + allocation.offer(candidates); + }; + + try { + hostProvisioner.get().provisionHosts( + allocation.provisionIndices(deficit.count()), hostType, deficit.resources(), application, + osVersion, sharing, Optional.of(cluster.type()), requestedNodes.cloudAccount(), provisionedHostsConsumer); + } catch (NodeAllocationException e) { + // Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do + // not exist, we cannot remove them from ZK here because other nodes may already have been + // allocated on them, so let DynamicProvisioningMaintainer deal with it + hosts.forEach(host -> nodeRepository.nodes().deprovision(host.hostname(), Agent.system, nodeRepository.clock().instant())); + throw e; + } } if (! allocation.fulfilled() && requestedNodes.canFail()) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java index 9b765adca89..1d95cdc09df 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java @@ -6,6 +6,7 @@ import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.CloudAccount; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.HostEvent; +import com.yahoo.config.provision.NodeAllocationException; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.vespa.hosted.provision.Node; @@ -13,6 +14,7 @@ import com.yahoo.vespa.hosted.provision.Node; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.function.Consumer; /** * A service which supports provisioning container hosts dynamically. @@ -45,16 +47,21 @@ public interface HostProvisioner { * @param sharing puts requirements on sharing or exclusivity of the host to be provisioned. * @param clusterType provision host exclusively for this cluster type * @param cloudAccount the cloud account to use - * @return list of {@link ProvisionedHost} describing the provisioned nodes + * @param provisionedHostConsumer consumer of {@link ProvisionedHost}s describing the provisioned nodes, + * the {@link Node} returned from {@link ProvisionedHost#generateHost()} must be + * written to ZK immediately in case the config server goes down while waiting + * for the provisioning to finish. + * @throws NodeAllocationException if the cloud provider cannot satisfy the request */ - List<ProvisionedHost> provisionHosts(List<Integer> provisionIndices, - NodeType hostType, - NodeResources resources, - ApplicationId applicationId, - Version osVersion, - HostSharing sharing, - Optional<ClusterSpec.Type> clusterType, - Optional<CloudAccount> cloudAccount); + void provisionHosts(List<Integer> provisionIndices, + NodeType hostType, + NodeResources resources, + ApplicationId applicationId, + Version osVersion, + HostSharing sharing, + Optional<ClusterSpec.Type> clusterType, + Optional<CloudAccount> cloudAccount, + Consumer<List<ProvisionedHost>> provisionedHostConsumer) throws NodeAllocationException; /** * Continue provisioning of given list of Nodes. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java index 3ebaf764115..f9af176fc00 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java @@ -26,6 +26,7 @@ import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -60,10 +61,10 @@ public class MockHostProvisioner implements HostProvisioner { } @Override - public List<ProvisionedHost> provisionHosts(List<Integer> provisionIndices, NodeType hostType, NodeResources resources, - ApplicationId applicationId, Version osVersion, HostSharing sharing, - Optional<ClusterSpec.Type> clusterType, - Optional<CloudAccount> cloudAccount) { + public void provisionHosts(List<Integer> provisionIndices, NodeType hostType, NodeResources resources, + ApplicationId applicationId, Version osVersion, HostSharing sharing, + Optional<ClusterSpec.Type> clusterType, Optional<CloudAccount> cloudAccount, + Consumer<List<ProvisionedHost>> provisionedHostsConsumer) { Flavor hostFlavor = this.hostFlavor.orElseGet(() -> flavors.stream().filter(f -> compatible(f, resources)) .findFirst() .orElseThrow(() -> new NodeAllocationException("No host flavor matches " + resources, true))); @@ -82,7 +83,7 @@ public class MockHostProvisioner implements HostProvisioner { cloudAccount)); } provisionedHosts.addAll(hosts); - return hosts; + provisionedHostsConsumer.accept(hosts); } @Override diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java index 436cf880f1c..3570e56e196 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java @@ -30,6 +30,7 @@ import java.time.Instant; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -41,6 +42,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; @@ -72,8 +74,8 @@ public class DynamicProvisioningTest { mockHostProvisioner(hostProvisioner, "large", 3, null); // Provision shared hosts prepareAndActivate(application1, clusterSpec("mycluster"), 4, 1, resources); - verify(hostProvisioner).provisionHosts(List.of(100, 101, 102, 103), NodeType.host, resources, application1, - Version.emptyVersion, HostSharing.any, Optional.of(ClusterSpec.Type.content), Optional.empty()); + verify(hostProvisioner).provisionHosts(eq(List.of(100, 101, 102, 103)), eq(NodeType.host), eq(resources), eq(application1), + eq(Version.emptyVersion), eq(HostSharing.any), eq(Optional.of(ClusterSpec.Type.content)), eq(Optional.empty()), any()); // Total of 8 nodes should now be in node-repo, 4 active hosts and 4 active nodes assertEquals(8, tester.nodeRepository().nodes().list().size()); @@ -96,8 +98,8 @@ public class DynamicProvisioningTest { ApplicationId application3 = ProvisioningTester.applicationId(); mockHostProvisioner(hostProvisioner, "large", 3, application3); prepareAndActivate(application3, clusterSpec("mycluster", true), 4, 1, resources); - verify(hostProvisioner).provisionHosts(List.of(104, 105, 106, 107), NodeType.host, resources, application3, - Version.emptyVersion, HostSharing.exclusive, Optional.of(ClusterSpec.Type.content), Optional.empty()); + verify(hostProvisioner).provisionHosts(eq(List.of(104, 105, 106, 107)), eq(NodeType.host), eq(resources), eq(application3), + eq(Version.emptyVersion), eq(HostSharing.exclusive), eq(Optional.of(ClusterSpec.Type.content)), eq(Optional.empty()), any()); // Total of 20 nodes should now be in node-repo, 8 active hosts and 12 active nodes assertEquals(20, tester.nodeRepository().nodes().list().size()); @@ -450,14 +452,14 @@ public class DynamicProvisioningTest { return new ClusterResources(nodes, groups, new NodeResources(vcpu, memory, disk, 0.1, diskSpeed, storageType)); } - @SuppressWarnings("unchecked") private void mockHostProvisioner(HostProvisioner hostProvisioner, String hostFlavorName, int numIps, ApplicationId exclusiveTo) { doAnswer(invocation -> { Flavor hostFlavor = tester.nodeRepository().flavors().getFlavorOrThrow(hostFlavorName); - List<Integer> provisionIndexes = (List<Integer>) invocation.getArguments()[0]; - NodeResources nodeResources = (NodeResources) invocation.getArguments()[2]; + List<Integer> provisionIndexes = invocation.getArgument(0); + NodeResources nodeResources = invocation.getArgument(2); + Consumer<List<ProvisionedHost>> provisionedHostConsumer = invocation.getArgument(8); - return provisionIndexes.stream() + List<ProvisionedHost> provisionedHosts = provisionIndexes.stream() .map(hostIndex -> { String hostHostname = "host-" + hostIndex; String hostIp = "::" + hostIndex + ":0"; @@ -475,9 +477,10 @@ public class DynamicProvisioningTest { when(provisionedHost.generateHost()).thenReturn(parent); when(provisionedHost.generateNode()).thenReturn(child); return provisionedHost; - }) - .collect(Collectors.toList()); - }).when(hostProvisioner).provisionHosts(any(), any(), any(), any(), any(), any(), any(), any()); + }).toList(); + provisionedHostConsumer.accept(provisionedHosts); + return null; + }).when(hostProvisioner).provisionHosts(any(), any(), any(), any(), any(), any(), any(), any(), any()); } } |