From 2a348d61213778f11c762de5f3570d8174f9f294 Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Wed, 2 Nov 2022 14:24:49 +0100 Subject: Move resume provisioning to separate maintainer --- .../maintenance/DynamicProvisioningMaintainer.java | 50 ------------ .../maintenance/HostResumeProvisioner.java | 89 ++++++++++++++++++++++ .../maintenance/NodeRepositoryMaintenance.java | 3 + .../provision/testutils/MockHostProvisioner.java | 8 +- 4 files changed, 96 insertions(+), 54 deletions(-) create mode 100644 node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostResumeProvisioner.java (limited to 'node-repository/src/main') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 6470e4fdb23..245bce1b9e8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -26,17 +26,13 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.NodesAndHosts; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.History; -import com.yahoo.vespa.hosted.provision.node.IP; -import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner.HostSharing; import com.yahoo.vespa.hosted.provision.provisioning.NodeCandidate; import com.yahoo.vespa.hosted.provision.provisioning.NodePrioritizer; import com.yahoo.vespa.hosted.provision.provisioning.NodeSpec; import com.yahoo.vespa.hosted.provision.provisioning.ProvisionedHost; -import com.yahoo.yolean.Exceptions; -import javax.naming.NamingException; import java.time.Duration; import java.time.Instant; import java.util.ArrayList; @@ -45,7 +41,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.function.Function; import java.util.logging.Level; import java.util.logging.Logger; @@ -77,46 +72,10 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { @Override protected double maintain() { NodeList nodes = nodeRepository().nodes().list(); - resumeProvisioning(nodes); convergeToCapacity(nodes); return 1.0; } - /** Resume provisioning of already provisioned hosts and their children */ - private void resumeProvisioning(NodeList nodes) { - Map> nodesByProvisionedParentHostname = - nodes.nodeType(NodeType.tenant, NodeType.config, NodeType.controller) - .asList() - .stream() - .filter(node -> node.parentHostname().isPresent()) - .collect(Collectors.groupingBy(node -> node.parentHostname().get(), Collectors.toSet())); - - nodes.state(Node.State.provisioned).nodeType(NodeType.host, NodeType.confighost, NodeType.controllerhost).forEach(host -> { - Set children = nodesByProvisionedParentHostname.getOrDefault(host.hostname(), Set.of()); - try { - try (var lock = nodeRepository().nodes().lockUnallocated()) { - List updatedNodes = hostProvisioner.provision(host, children); - verifyDns(updatedNodes); - nodeRepository().nodes().write(updatedNodes, lock); - } - } catch (IllegalArgumentException | IllegalStateException e) { - log.log(Level.INFO, "Could not provision " + host.hostname() + " with " + children.size() + " children, will retry in " + - interval() + ": " + Exceptions.toMessageString(e)); - } catch (FatalProvisioningException e) { - log.log(Level.SEVERE, "Failed to provision " + host.hostname() + " with " + children.size() + - " children, failing out the host recursively", e); - // Fail out as operator to force a quick redeployment - nodeRepository().nodes().failOrMarkRecursively( - host.hostname(), Agent.DynamicProvisioningMaintainer, "Failed by HostProvisioner due to provisioning failure"); - } catch (RuntimeException e) { - if (e.getCause() instanceof NamingException) - log.log(Level.INFO, "Could not provision " + host.hostname() + ", will retry in " + interval() + ": " + Exceptions.toMessageString(e)); - else - log.log(Level.WARNING, "Failed to provision " + host.hostname() + ", will retry in " + interval(), e); - } - }); - } - /** Converge zone to wanted capacity */ private void convergeToCapacity(NodeList nodes) { List excessHosts; @@ -337,13 +296,4 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { return new NodeResources(clusterCapacity.vcpu(), clusterCapacity.memoryGb(), clusterCapacity.diskGb(), clusterCapacity.bandwidthGbps()); } - - /** Verify DNS configuration of given nodes */ - private void verifyDns(List nodes) { - for (var node : nodes) { - for (var ipAddress : node.ipConfig().primary()) { - IP.verifyDns(node.hostname(), ipAddress, nodeRepository().nameResolver()); - } - } - } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostResumeProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostResumeProvisioner.java new file mode 100644 index 00000000000..5810e0a508f --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostResumeProvisioner.java @@ -0,0 +1,89 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.maintenance; + +import com.yahoo.config.provision.NodeType; +import com.yahoo.jdisc.Metric; +import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeList; +import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.IP; +import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; +import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; +import com.yahoo.yolean.Exceptions; + +import javax.naming.NamingException; +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.stream.Collectors; + +/** + * @author freva + * @author mpolden + */ +public class HostResumeProvisioner extends NodeRepositoryMaintainer { + + private static final Logger log = Logger.getLogger(HostResumeProvisioner.class.getName()); + + private final HostProvisioner hostProvisioner; + + HostResumeProvisioner(NodeRepository nodeRepository, Duration interval, Metric metric, HostProvisioner hostProvisioner) { + super(nodeRepository, interval, metric); + this.hostProvisioner = hostProvisioner; + } + + @Override + protected double maintain() { + NodeList allNodes = nodeRepository().nodes().list(); + Map> nodesByProvisionedParentHostname = + allNodes.nodeType(NodeType.tenant, NodeType.config, NodeType.controller) + .asList() + .stream() + .filter(node -> node.parentHostname().isPresent()) + .collect(Collectors.groupingBy(node -> node.parentHostname().get(), Collectors.toSet())); + + NodeList hosts = allNodes.state(Node.State.provisioned).nodeType(NodeType.host, NodeType.confighost, NodeType.controllerhost); + int failures = 0; + for (Node host : hosts) { + Set children = nodesByProvisionedParentHostname.getOrDefault(host.hostname(), Set.of()); + try { + try (var lock = nodeRepository().nodes().lockUnallocated()) { + List updatedNodes = hostProvisioner.provision(host, children); + verifyDns(updatedNodes); + nodeRepository().nodes().write(updatedNodes, lock); + } + } catch (IllegalArgumentException | IllegalStateException e) { + log.log(Level.INFO, "Could not provision " + host.hostname() + " with " + children.size() + " children, will retry in " + + interval() + ": " + Exceptions.toMessageString(e)); + } catch (FatalProvisioningException e) { + failures++; + log.log(Level.SEVERE, "Failed to provision " + host.hostname() + " with " + children.size() + + " children, failing out the host recursively", e); + // Fail out as operator to force a quick redeployment + nodeRepository().nodes().failOrMarkRecursively( + host.hostname(), Agent.DynamicProvisioningMaintainer, "Failed by HostProvisioner due to provisioning failure"); + } catch (RuntimeException e) { + if (e.getCause() instanceof NamingException) + log.log(Level.INFO, "Could not provision " + host.hostname() + ", will retry in " + interval() + ": " + Exceptions.toMessageString(e)); + else { + failures++; + log.log(Level.WARNING, "Failed to provision " + host.hostname() + ", will retry in " + interval(), e); + } + } + } + return asSuccessFactor(hosts.size(), failures); + } + + /** Verify DNS configuration of given nodes */ + private void verifyDns(List nodes) { + for (var node : nodes) { + for (var ipAddress : node.ipConfig().primary()) { + IP.verifyDns(node.hostname(), ipAddress, nodeRepository().nameResolver()); + } + } + } +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 6175531fc65..acc5dd66f7a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -71,6 +71,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { provisionServiceProvider.getHostProvisioner() .map(hostProvisioner -> List.of( new DynamicProvisioningMaintainer(nodeRepository, defaults.dynamicProvisionerInterval, hostProvisioner, flagSource, metric), + new HostResumeProvisioner(nodeRepository, defaults.hostResumeProvisionerInterval, metric, hostProvisioner), new HostRetirer(nodeRepository, defaults.hostRetirerInterval, metric, hostProvisioner), new DiskReplacer(nodeRepository, defaults.diskReplacerInterval, metric, hostProvisioner))) .ifPresent(maintainers::addAll); @@ -112,6 +113,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { private final Duration infrastructureProvisionInterval; private final Duration loadBalancerExpirerInterval; private final Duration dynamicProvisionerInterval; + private final Duration hostResumeProvisionerInterval; private final Duration diskReplacerInterval; private final Duration osUpgradeActivatorInterval; private final Duration rebalancerInterval; @@ -126,6 +128,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { DefaultTimes(Zone zone, Deployer deployer) { autoscalingInterval = Duration.ofMinutes(5); dynamicProvisionerInterval = Duration.ofMinutes(3); + hostResumeProvisionerInterval = Duration.ofMinutes(3); diskReplacerInterval = Duration.ofMinutes(3); failedExpirerInterval = Duration.ofMinutes(10); failGrace = Duration.ofMinutes(20); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java index 3aa841ecacf..76e16e9fbaf 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/MockHostProvisioner.java @@ -70,7 +70,7 @@ public class MockHostProvisioner implements HostProvisioner { .orElseThrow(() -> new NodeAllocationException("No host flavor matches " + resources, true))); List hosts = new ArrayList<>(); for (int index : provisionIndices) { - String hostHostname = hostType == NodeType.host ? "hostname" + index : hostType.name() + index; + String hostHostname = hostType == NodeType.host ? "host" + index : hostType.name() + index; hosts.add(new ProvisionedHost("id-of-" + hostType.name() + index, hostHostname, hostFlavor, @@ -173,11 +173,11 @@ public class MockHostProvisioner implements HostProvisioner { } private List
createAddressesForHost(NodeType hostType, Flavor flavor, int hostIndex) { - long numAddresses = Math.max(1, Math.round(flavor.resources().bandwidthGbps())); - return IntStream.range(0, (int) numAddresses) + long numAddresses = Math.max(2, Math.round(flavor.resources().bandwidthGbps())); + return IntStream.range(1, (int) numAddresses) .mapToObj(i -> { String hostname = hostType == NodeType.host - ? "nodename" + hostIndex + "_" + i + ? "host" + hostIndex + "-" + i : hostType.childNodeType().name() + i; return new Address(hostname); }) -- cgit v1.2.3