summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorValerij Fredriksen <valerijf@yahooinc.com>2022-10-24 11:55:28 +0200
committerValerij Fredriksen <valerijf@yahooinc.com>2022-10-24 11:55:45 +0200
commit636a2c3824e4a12ec5d6050a2b308bfd17e6b0b8 (patch)
tree2b9268f80bf496619c910d2005c05c1815757472 /node-repository
parent904e79b6c0c52d8453698ddbad4f9c7a24c5c925 (diff)
Write provisioned hosts to ZK early
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java14
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java54
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java25
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java11
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java25
5 files changed, 72 insertions, 57 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 87b5719cc19..61589078b69 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -284,13 +284,13 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
try {
Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion);
List<Integer> provisionIndices = nodeRepository().database().readProvisionIndices(count);
- List<Node> hosts = hostProvisioner.provisionHosts(provisionIndices, NodeType.host, nodeResources,
- ApplicationId.defaultId(), osVersion, HostSharing.shared,
- Optional.empty(), Optional.empty())
- .stream()
- .map(ProvisionedHost::generateHost)
- .collect(Collectors.toList());
- nodeRepository().nodes().addNodes(hosts, Agent.DynamicProvisioningMaintainer);
+ List<Node> hosts = new ArrayList<>();
+ hostProvisioner.provisionHosts(provisionIndices, NodeType.host, nodeResources, ApplicationId.defaultId(),
+ osVersion, HostSharing.shared, Optional.empty(), Optional.empty(),
+ provisionedHosts -> {
+ hosts.addAll(provisionedHosts.stream().map(ProvisionedHost::generateHost).toList());
+ nodeRepository().nodes().addNodes(hosts, Agent.DynamicProvisioningMaintainer);
+ });
return hosts;
} catch (NodeAllocationException | IllegalArgumentException | IllegalStateException e) {
throw new NodeAllocationException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage(),
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
index 35f04683157..e1657ec3358 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/GroupPreparer.java
@@ -15,8 +15,10 @@ import com.yahoo.vespa.hosted.provision.NodesAndHosts;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing;
+import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
+import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -102,33 +104,35 @@ public class GroupPreparer {
NodeAllocation allocation = prepareAllocation(application, cluster, requestedNodes, surplusActiveNodes,
indices::next, wantedGroups, allNodesAndHosts);
NodeType hostType = allocation.nodeType().hostType();
- if (canProvisionDynamically(hostType)) {
+ if (canProvisionDynamically(hostType) && allocation.hostDeficit().isPresent()) {
HostSharing sharing = hostSharing(requestedNodes, hostType);
Version osVersion = nodeRepository.osVersions().targetFor(hostType).orElse(Version.emptyVersion);
- List<ProvisionedHost> provisionedHosts = allocation.hostDeficit()
- .map(deficit -> hostProvisioner.get().provisionHosts(allocation.provisionIndices(deficit.count()),
- hostType,
- deficit.resources(),
- application,
- osVersion,
- sharing,
- Optional.of(cluster.type()),
- requestedNodes.cloudAccount()))
- .orElseGet(List::of);
-
- // At this point we have started provisioning of the hosts, the first priority is to make sure that
- // the returned hosts are added to the node-repo so that they are tracked by the provision maintainers
- List<Node> hosts = provisionedHosts.stream()
- .map(ProvisionedHost::generateHost)
- .collect(Collectors.toList());
- nodeRepository.nodes().addNodes(hosts, Agent.application);
-
- // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit
- List<NodeCandidate> candidates = provisionedHosts.stream()
- .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(),
- host.generateHost()))
- .collect(Collectors.toList());
- allocation.offer(candidates);
+ NodeAllocation.HostDeficit deficit = allocation.hostDeficit().get();
+
+ List<Node> hosts = new ArrayList<>();
+ Consumer<List<ProvisionedHost>> provisionedHostsConsumer = provisionedHosts -> {
+ hosts.addAll(provisionedHosts.stream().map(ProvisionedHost::generateHost).toList());
+ nodeRepository.nodes().addNodes(hosts, Agent.application);
+
+ // Offer the nodes on the newly provisioned hosts, this should be enough to cover the deficit
+ List<NodeCandidate> candidates = provisionedHosts.stream()
+ .map(host -> NodeCandidate.createNewExclusiveChild(host.generateNode(),
+ host.generateHost()))
+ .collect(Collectors.toList());
+ allocation.offer(candidates);
+ };
+
+ try {
+ hostProvisioner.get().provisionHosts(
+ allocation.provisionIndices(deficit.count()), hostType, deficit.resources(), application,
+ osVersion, sharing, Optional.of(cluster.type()), requestedNodes.cloudAccount(), provisionedHostsConsumer);
+ } catch (NodeAllocationException e) {
+ // Mark the nodes that were written to ZK in the consumer for deprovisioning. While these hosts do
+ // not exist, we cannot remove them from ZK here because other nodes may already have been
+ // allocated on them, so let DynamicProvisioningMaintainer deal with it
+ hosts.forEach(host -> nodeRepository.nodes().deprovision(host.hostname(), Agent.system, nodeRepository.clock().instant()));
+ throw e;
+ }
}
if (! allocation.fulfilled() && requestedNodes.canFail())
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
index 9b765adca89..1d95cdc09df 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/HostProvisioner.java
@@ -6,6 +6,7 @@ import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.CloudAccount;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.HostEvent;
+import com.yahoo.config.provision.NodeAllocationException;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.vespa.hosted.provision.Node;
@@ -13,6 +14,7 @@ import com.yahoo.vespa.hosted.provision.Node;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.function.Consumer;
/**
* A service which supports provisioning container hosts dynamically.
@@ -45,16 +47,21 @@ public interface HostProvisioner {
* @param sharing puts requirements on sharing or exclusivity of the host to be provisioned.
* @param clusterType provision host exclusively for this cluster type
* @param cloudAccount the cloud account to use
- * @return list of {@link ProvisionedHost} describing the provisioned nodes
+ * @param provisionedHostConsumer consumer of {@link ProvisionedHost}s describing the provisioned nodes,
+ * the {@link Node} returned from {@link ProvisionedHost#generateHost()} must be
+ * written to ZK immediately in case the config server goes down while waiting
+ * for the provisioning to finish.
+ * @throws NodeAllocationException if the cloud provider cannot satisfy the request
*/
- List<ProvisionedHost> provisionHosts(List<Integer> provisionIndices,
- NodeType hostType,
- NodeResources resources,
- ApplicationId applicationId,
- Version osVersion,
- HostSharing sharing,
- Optional<ClusterSpec.Type> clusterType,
- Optional<CloudAccount> cloudAccount);
+ void provisionHosts(List<Integer> provisionIndices,
+ NodeType hostType,
+ NodeResources resources,
+ ApplicationId applicationId,
+ Version osVersion,
+ HostSharing sharing,
+ Optional<ClusterSpec.Type> clusterType,
+ Optional<CloudAccount> cloudAccount,
+ Consumer<List<ProvisionedHost>> provisionedHostConsumer) throws NodeAllocationException;
/**
* Continue provisioning of given list of Nodes.
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java
index 3ebaf764115..f9af176fc00 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java
@@ -26,6 +26,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
@@ -60,10 +61,10 @@ public class MockHostProvisioner implements HostProvisioner {
}
@Override
- public List<ProvisionedHost> provisionHosts(List<Integer> provisionIndices, NodeType hostType, NodeResources resources,
- ApplicationId applicationId, Version osVersion, HostSharing sharing,
- Optional<ClusterSpec.Type> clusterType,
- Optional<CloudAccount> cloudAccount) {
+ public void provisionHosts(List<Integer> provisionIndices, NodeType hostType, NodeResources resources,
+ ApplicationId applicationId, Version osVersion, HostSharing sharing,
+ Optional<ClusterSpec.Type> clusterType, Optional<CloudAccount> cloudAccount,
+ Consumer<List<ProvisionedHost>> provisionedHostsConsumer) {
Flavor hostFlavor = this.hostFlavor.orElseGet(() -> flavors.stream().filter(f -> compatible(f, resources))
.findFirst()
.orElseThrow(() -> new NodeAllocationException("No host flavor matches " + resources, true)));
@@ -82,7 +83,7 @@ public class MockHostProvisioner implements HostProvisioner {
cloudAccount));
}
provisionedHosts.addAll(hosts);
- return hosts;
+ provisionedHostsConsumer.accept(hosts);
}
@Override
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java
index 436cf880f1c..3570e56e196 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/DynamicProvisioningTest.java
@@ -30,6 +30,7 @@ import java.time.Instant;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
@@ -41,6 +42,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
@@ -72,8 +74,8 @@ public class DynamicProvisioningTest {
mockHostProvisioner(hostProvisioner, "large", 3, null); // Provision shared hosts
prepareAndActivate(application1, clusterSpec("mycluster"), 4, 1, resources);
- verify(hostProvisioner).provisionHosts(List.of(100, 101, 102, 103), NodeType.host, resources, application1,
- Version.emptyVersion, HostSharing.any, Optional.of(ClusterSpec.Type.content), Optional.empty());
+ verify(hostProvisioner).provisionHosts(eq(List.of(100, 101, 102, 103)), eq(NodeType.host), eq(resources), eq(application1),
+ eq(Version.emptyVersion), eq(HostSharing.any), eq(Optional.of(ClusterSpec.Type.content)), eq(Optional.empty()), any());
// Total of 8 nodes should now be in node-repo, 4 active hosts and 4 active nodes
assertEquals(8, tester.nodeRepository().nodes().list().size());
@@ -96,8 +98,8 @@ public class DynamicProvisioningTest {
ApplicationId application3 = ProvisioningTester.applicationId();
mockHostProvisioner(hostProvisioner, "large", 3, application3);
prepareAndActivate(application3, clusterSpec("mycluster", true), 4, 1, resources);
- verify(hostProvisioner).provisionHosts(List.of(104, 105, 106, 107), NodeType.host, resources, application3,
- Version.emptyVersion, HostSharing.exclusive, Optional.of(ClusterSpec.Type.content), Optional.empty());
+ verify(hostProvisioner).provisionHosts(eq(List.of(104, 105, 106, 107)), eq(NodeType.host), eq(resources), eq(application3),
+ eq(Version.emptyVersion), eq(HostSharing.exclusive), eq(Optional.of(ClusterSpec.Type.content)), eq(Optional.empty()), any());
// Total of 20 nodes should now be in node-repo, 8 active hosts and 12 active nodes
assertEquals(20, tester.nodeRepository().nodes().list().size());
@@ -450,14 +452,14 @@ public class DynamicProvisioningTest {
return new ClusterResources(nodes, groups, new NodeResources(vcpu, memory, disk, 0.1, diskSpeed, storageType));
}
- @SuppressWarnings("unchecked")
private void mockHostProvisioner(HostProvisioner hostProvisioner, String hostFlavorName, int numIps, ApplicationId exclusiveTo) {
doAnswer(invocation -> {
Flavor hostFlavor = tester.nodeRepository().flavors().getFlavorOrThrow(hostFlavorName);
- List<Integer> provisionIndexes = (List<Integer>) invocation.getArguments()[0];
- NodeResources nodeResources = (NodeResources) invocation.getArguments()[2];
+ List<Integer> provisionIndexes = invocation.getArgument(0);
+ NodeResources nodeResources = invocation.getArgument(2);
+ Consumer<List<ProvisionedHost>> provisionedHostConsumer = invocation.getArgument(8);
- return provisionIndexes.stream()
+ List<ProvisionedHost> provisionedHosts = provisionIndexes.stream()
.map(hostIndex -> {
String hostHostname = "host-" + hostIndex;
String hostIp = "::" + hostIndex + ":0";
@@ -475,9 +477,10 @@ public class DynamicProvisioningTest {
when(provisionedHost.generateHost()).thenReturn(parent);
when(provisionedHost.generateNode()).thenReturn(child);
return provisionedHost;
- })
- .collect(Collectors.toList());
- }).when(hostProvisioner).provisionHosts(any(), any(), any(), any(), any(), any(), any(), any());
+ }).toList();
+ provisionedHostConsumer.accept(provisionedHosts);
+ return null;
+ }).when(hostProvisioner).provisionHosts(any(), any(), any(), any(), any(), any(), any(), any(), any());
}
}