aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-11-30 20:20:22 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-11-30 20:20:22 +0100
commite43bf39e6ec973297649f7e462b37e5aec8f155f (patch)
treeab81b1a3aafc564d382dc51a6a809a2ccf43ee71 /node-repository
parentfa4302626526d3dcba9aa93825392ee24e2bf7e4 (diff)
Support lower bound on number of shared hosts
Adds a 'minCount' field to the shared host jackson flag, denoting the minimum number of "shared hosts" that must exist, otherwise the deficit will be provisioned by DynamicProvisioningMaintainer. A "shared host" is one that is considered for allocation if current tenant node allocations were removed: It must be a tenant host, cannot be an exclusiveTo host, etc. minCount requires the setting of (at least one) shared host.
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java78
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java45
2 files changed, 107 insertions, 16 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 22d1e0333fb..392bcc5aa08 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -15,13 +15,16 @@ import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.flags.BooleanFlag;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.flags.Flags;
+import com.yahoo.vespa.flags.JacksonFlag;
import com.yahoo.vespa.flags.ListFlag;
import com.yahoo.vespa.flags.custom.ClusterCapacity;
+import com.yahoo.vespa.flags.custom.SharedHost;
import com.yahoo.vespa.hosted.provision.LockedNodeList;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
+import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.hosted.provision.node.IP;
import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
@@ -35,6 +38,7 @@ import com.yahoo.yolean.Exceptions;
import javax.naming.NameNotFoundException;
import java.time.Duration;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
@@ -60,6 +64,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
private final HostProvisioner hostProvisioner;
private final ListFlag<ClusterCapacity> preprovisionCapacityFlag;
private final BooleanFlag compactPreprovisionCapacityFlag;
+ private final JacksonFlag<SharedHost> sharedHostFlag;
DynamicProvisioningMaintainer(NodeRepository nodeRepository,
Duration interval,
@@ -70,6 +75,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
this.hostProvisioner = hostProvisioner;
this.preprovisionCapacityFlag = Flags.PREPROVISION_CAPACITY.bindTo(flagSource);
this.compactPreprovisionCapacityFlag = Flags.COMPACT_PREPROVISION_CAPACITY.bindTo(flagSource);
+ this.sharedHostFlag = Flags.SHARED_HOST.bindTo(flagSource);
}
@Override
@@ -204,8 +210,36 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
}
private List<Node> findExcessHosts(NodeList nodeList) {
- final List<Node> nodes = provisionUntilNoDeficit(nodeList);
+ final List<Node> nodes = new ArrayList<>(provisionUntilNoDeficit(nodeList));
+
+ Map<String, Node> sharedHosts = new HashMap<>(findSharedHosts(nodeList));
+
+ int minCount = sharedHostFlag.value().getMinCount();
+ int deficit = minCount - sharedHosts.size();
+ if (deficit > 0) {
+ provisionHosts(deficit, NodeResources.unspecified())
+ .forEach(host -> {
+ sharedHosts.put(host.hostname(), host);
+ nodes.add(host);
+ });
+ }
+
+ return candidatesForRemoval(nodes).stream()
+ .sorted(Comparator.comparing(node -> node.history().events().stream()
+ .map(History.Event::at).min(Comparator.naturalOrder()).orElseGet(() -> Instant.MIN)))
+ .filter(node -> {
+ if (!sharedHosts.containsKey(node.hostname()) || sharedHosts.size() > minCount) {
+ sharedHosts.remove(node.hostname());
+ return true;
+ } else {
+ return false;
+ }
+ })
+ .collect(Collectors.toList());
+ }
+
+ private List<Node> candidatesForRemoval(List<Node> nodes) {
Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream()
.filter(node -> node.type() == NodeType.host)
.filter(host -> host.state() != Node.State.parked || host.status().wantToDeprovision())
@@ -220,6 +254,14 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
return List.copyOf(hostsByHostname.values());
}
+ private Map<String, Node> findSharedHosts(NodeList nodeList) {
+ return nodeList.stream()
+ .filter(node -> NodeRepository.canAllocateTenantNodeTo(node, true))
+ .filter(node -> node.reservedTo().isEmpty())
+ .filter(node -> node.exclusiveTo().isEmpty())
+ .collect(Collectors.toMap(Node::hostname, Function.identity()));
+ }
+
/**
* @return the nodes in {@code nodeList} plus all hosts provisioned, plus all preprovision capacity
* nodes that were allocated.
@@ -246,21 +288,25 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
throw new IllegalStateException("Have provisioned " + numProvisions + " times but there's still deficit: aborting");
}
- try {
- Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion);
- List<Integer> provisionIndexes = nodeRepository().database().getProvisionIndexes(deficit.get().count());
- List<Node> hosts = hostProvisioner.provisionHosts(provisionIndexes, toNodeResources(deficit.get()),
- ApplicationId.defaultId(), osVersion, HostSharing.shared)
- .stream()
- .map(ProvisionedHost::generateHost)
- .collect(Collectors.toList());
- nodeRepository().addNodes(hosts, Agent.DynamicProvisioningMaintainer);
- nodesPlusProvisioned.addAll(hosts);
- } catch (OutOfCapacityException | IllegalArgumentException | IllegalStateException e) {
- throw new OutOfCapacityException("Failed to pre-provision " + deficit.get() + ": " + e.getMessage());
- } catch (RuntimeException e) {
- throw new RuntimeException("Failed to pre-provision " + deficit.get() + ", will retry in " + interval(), e);
- }
+ nodesPlusProvisioned.addAll(provisionHosts(deficit.get().count(), toNodeResources(deficit.get())));
+ }
+ }
+
+ private List<Node> provisionHosts(int count, NodeResources nodeResources) {
+ try {
+ Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion);
+ List<Integer> provisionIndexes = nodeRepository().database().getProvisionIndexes(count);
+ List<Node> hosts = hostProvisioner.provisionHosts(provisionIndexes, nodeResources,
+ ApplicationId.defaultId(), osVersion, HostSharing.shared)
+ .stream()
+ .map(ProvisionedHost::generateHost)
+ .collect(Collectors.toList());
+ nodeRepository().addNodes(hosts, Agent.DynamicProvisioningMaintainer);
+ return hosts;
+ } catch (OutOfCapacityException | IllegalArgumentException | IllegalStateException e) {
+ throw new OutOfCapacityException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage());
+ } catch (RuntimeException e) {
+ throw new RuntimeException("Failed to provision " + count + " " + nodeResources + ", will retry in " + interval(), e);
}
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
index 292a6872bb0..c747d95d41f 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
@@ -18,6 +18,7 @@ import com.yahoo.config.provision.Zone;
import com.yahoo.vespa.flags.Flags;
import com.yahoo.vespa.flags.InMemoryFlagSource;
import com.yahoo.vespa.flags.custom.ClusterCapacity;
+import com.yahoo.vespa.flags.custom.SharedHost;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Address;
@@ -267,6 +268,50 @@ public class DynamicProvisioningMaintainerTest {
}
@Test
+ public void verify_min_count_of_shared_hosts() {
+ // What's going on here? We are trying to verify the impact of varying the minimum number of
+ // shared hosts (SharedHost.minCount()).
+ //
+ // addInitialNodes() adds 4 tenant hosts:
+ // host1 shared !removable # not removable because it got child nodes w/allocation
+ // host2 !shared removable # not counted as a shared host because it is failed
+ // host3 shared removable
+ // host4 shared !removable # not removable because it got child nodes w/allocation
+ //
+ // Hosts 1, 3, and 4 count as "shared hosts" with respect to the minCount lower boundary.
+ // Hosts 3 and 4 are removable, that is they will be deprovisioned as excess hosts unless
+ // prevented by minCount.
+
+ // minCount=0: All (2) removable hosts are deprovisioned
+ assertWithMinCount(0, 0, 2);
+ // minCount=1: The same thing happens, because there are 2 shared hosts left
+ assertWithMinCount(1, 0, 2);
+ assertWithMinCount(2, 0, 2);
+ // minCount=3: since we require 3 shared hosts, host3 is not deprovisioned.
+ assertWithMinCount(3, 0, 1);
+ // 4 shared hosts require we provision 1 shared host
+ assertWithMinCount(4, 1, 1);
+ // 5 shared hosts require we provision 2 shared hosts
+ assertWithMinCount(5, 2, 1);
+ assertWithMinCount(6, 3, 1);
+ }
+
+ private void assertWithMinCount(int minCount, int provisionCount, int deprovisionCount) {
+ var tester = new DynamicProvisioningTester().addInitialNodes();
+ tester.hostProvisioner.provisionSharedHost("host4");
+
+ tester.flagSource.withJacksonFlag(Flags.SHARED_HOST.id(), new SharedHost(null, minCount), SharedHost.class);
+ tester.maintainer.maintain();
+ assertEquals(provisionCount, tester.hostProvisioner.provisionedHosts.size());
+ assertEquals(deprovisionCount, tester.hostProvisioner.deprovisionedHosts);
+
+ // Verify next maintain is a no-op
+ tester.maintainer.maintain();
+ assertEquals(provisionCount, tester.hostProvisioner.provisionedHosts.size());
+ assertEquals(deprovisionCount, tester.hostProvisioner.deprovisionedHosts);
+ }
+
+ @Test
public void does_not_remove_if_host_provisioner_failed() {
var tester = new DynamicProvisioningTester();
Node host2 = tester.addNode("host2", Optional.empty(), NodeType.host, Node.State.failed, DynamicProvisioningTester.tenantApp);