diff options
5 files changed, 174 insertions, 23 deletions
diff --git a/flags/src/main/java/com/yahoo/vespa/flags/custom/SharedHost.java b/flags/src/main/java/com/yahoo/vespa/flags/custom/SharedHost.java index e463159eb8f..ab9127ebfe4 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/custom/SharedHost.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/custom/SharedHost.java @@ -2,6 +2,7 @@ package com.yahoo.vespa.flags.custom; import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonGetter; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonInclude; @@ -19,29 +20,50 @@ import java.util.Objects; @JsonIgnoreProperties(ignoreUnknown = true) @JsonInclude(value = JsonInclude.Include.NON_NULL) public class SharedHost { + private final int DEFAULT_MIN_COUNT = 0; + private final List<HostResources> resources; + private final int minCount; public static SharedHost createDisabled() { - return new SharedHost(null); + return new SharedHost(null, null); } + /** + * @param resourcesOrNull the resources of the shared host (or several to support e.g. tiers or + * fast/slow disks separately) + * @param minCountOrNull the minimum number of shared hosts + */ @JsonCreator - public SharedHost(@JsonProperty("resources") List<HostResources> resources) { - this.resources = resources == null ? List.of() : List.copyOf(resources); + public SharedHost(@JsonProperty("resources") List<HostResources> resourcesOrNull, + @JsonProperty("min-count") Integer minCountOrNull) { + this.resources = resourcesOrNull == null ? List.of() : List.copyOf(resourcesOrNull); + this.minCount = requireNonNegative(minCountOrNull, DEFAULT_MIN_COUNT, "min-count"); } - @JsonProperty("resources") + @JsonGetter("resources") public List<HostResources> getResourcesOrNull() { return resources.isEmpty() ? null : resources; } + @JsonGetter("min-count") + public Integer getMinCountOrNull() { + return minCount == DEFAULT_MIN_COUNT ? null : minCount; + } + + @JsonIgnore + public boolean isEnabled() { + return resources.size() > 0; + } + @JsonIgnore public List<HostResources> getHostResources() { return resources; } - public boolean isEnabled() { - return resources.size() > 0; + @JsonIgnore + public int getMinCount() { + return minCount; } @Override @@ -61,4 +83,16 @@ public class SharedHost { public int hashCode() { return Objects.hash(resources); } + + private int requireNonNegative(Integer integerOrNull, int defaultValue, String fieldName) { + if (integerOrNull == null) { + return defaultValue; + } + + if (integerOrNull < 0) { + throw new IllegalArgumentException(fieldName + " cannot be negative"); + } + + return integerOrNull; + } } diff --git a/flags/src/test/java/com/yahoo/vespa/flags/FlagsTest.java b/flags/src/test/java/com/yahoo/vespa/flags/FlagsTest.java index 636eea2c33c..6b043ea5a89 100644 --- a/flags/src/test/java/com/yahoo/vespa/flags/FlagsTest.java +++ b/flags/src/test/java/com/yahoo/vespa/flags/FlagsTest.java @@ -114,7 +114,8 @@ public class FlagsTest { SharedHost sharedHost = new SharedHost(List.of(new HostResources( 4.0, 16.0, 50.0, 0.3, "fast", "local", - 10))); + 10)), + null); testGeneric(Flags.SHARED_HOST, sharedHost); } diff --git a/flags/src/test/java/com/yahoo/vespa/flags/custom/SharedHostTest.java b/flags/src/test/java/com/yahoo/vespa/flags/custom/SharedHostTest.java new file mode 100644 index 00000000000..f0a11f244a4 --- /dev/null +++ b/flags/src/test/java/com/yahoo/vespa/flags/custom/SharedHostTest.java @@ -0,0 +1,25 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.flags.custom; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class SharedHostTest { + @Test + public void serialization() throws IOException { + verifySerialization(new SharedHost(List.of(new HostResources(1.0, 2.0, 3.0, 4.0, "fast", "remote", 5)), 6)); + verifySerialization(new SharedHost(List.of(new HostResources(1.0, 2.0, 3.0, 4.0, "fast", "remote", 5)), null)); + } + + private void verifySerialization(SharedHost sharedHost) throws IOException { + ObjectMapper mapper = new ObjectMapper(); + String json = mapper.writeValueAsString(sharedHost); + SharedHost deserialized = mapper.readValue(json, SharedHost.class); + assertEquals(sharedHost, deserialized); + } +}
\ No newline at end of file diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 22d1e0333fb..392bcc5aa08 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -15,13 +15,16 @@ import com.yahoo.transaction.Mutex; import com.yahoo.vespa.flags.BooleanFlag; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.flags.Flags; +import com.yahoo.vespa.flags.JacksonFlag; import com.yahoo.vespa.flags.ListFlag; import com.yahoo.vespa.flags.custom.ClusterCapacity; +import com.yahoo.vespa.flags.custom.SharedHost; import com.yahoo.vespa.hosted.provision.LockedNodeList; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Agent; +import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.FatalProvisioningException; import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner; @@ -35,6 +38,7 @@ import com.yahoo.yolean.Exceptions; import javax.naming.NameNotFoundException; import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; @@ -60,6 +64,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { private final HostProvisioner hostProvisioner; private final ListFlag<ClusterCapacity> preprovisionCapacityFlag; private final BooleanFlag compactPreprovisionCapacityFlag; + private final JacksonFlag<SharedHost> sharedHostFlag; DynamicProvisioningMaintainer(NodeRepository nodeRepository, Duration interval, @@ -70,6 +75,7 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { this.hostProvisioner = hostProvisioner; this.preprovisionCapacityFlag = Flags.PREPROVISION_CAPACITY.bindTo(flagSource); this.compactPreprovisionCapacityFlag = Flags.COMPACT_PREPROVISION_CAPACITY.bindTo(flagSource); + this.sharedHostFlag = Flags.SHARED_HOST.bindTo(flagSource); } @Override @@ -204,8 +210,36 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { } private List<Node> findExcessHosts(NodeList nodeList) { - final List<Node> nodes = provisionUntilNoDeficit(nodeList); + final List<Node> nodes = new ArrayList<>(provisionUntilNoDeficit(nodeList)); + + Map<String, Node> sharedHosts = new HashMap<>(findSharedHosts(nodeList)); + + int minCount = sharedHostFlag.value().getMinCount(); + int deficit = minCount - sharedHosts.size(); + if (deficit > 0) { + provisionHosts(deficit, NodeResources.unspecified()) + .forEach(host -> { + sharedHosts.put(host.hostname(), host); + nodes.add(host); + }); + } + + return candidatesForRemoval(nodes).stream() + .sorted(Comparator.comparing(node -> node.history().events().stream() + .map(History.Event::at).min(Comparator.naturalOrder()).orElseGet(() -> Instant.MIN))) + .filter(node -> { + if (!sharedHosts.containsKey(node.hostname()) || sharedHosts.size() > minCount) { + sharedHosts.remove(node.hostname()); + return true; + } else { + return false; + } + }) + .collect(Collectors.toList()); + } + + private List<Node> candidatesForRemoval(List<Node> nodes) { Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream() .filter(node -> node.type() == NodeType.host) .filter(host -> host.state() != Node.State.parked || host.status().wantToDeprovision()) @@ -220,6 +254,14 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { return List.copyOf(hostsByHostname.values()); } + private Map<String, Node> findSharedHosts(NodeList nodeList) { + return nodeList.stream() + .filter(node -> NodeRepository.canAllocateTenantNodeTo(node, true)) + .filter(node -> node.reservedTo().isEmpty()) + .filter(node -> node.exclusiveTo().isEmpty()) + .collect(Collectors.toMap(Node::hostname, Function.identity())); + } + /** * @return the nodes in {@code nodeList} plus all hosts provisioned, plus all preprovision capacity * nodes that were allocated. @@ -246,21 +288,25 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { throw new IllegalStateException("Have provisioned " + numProvisions + " times but there's still deficit: aborting"); } - try { - Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); - List<Integer> provisionIndexes = nodeRepository().database().getProvisionIndexes(deficit.get().count()); - List<Node> hosts = hostProvisioner.provisionHosts(provisionIndexes, toNodeResources(deficit.get()), - ApplicationId.defaultId(), osVersion, HostSharing.shared) - .stream() - .map(ProvisionedHost::generateHost) - .collect(Collectors.toList()); - nodeRepository().addNodes(hosts, Agent.DynamicProvisioningMaintainer); - nodesPlusProvisioned.addAll(hosts); - } catch (OutOfCapacityException | IllegalArgumentException | IllegalStateException e) { - throw new OutOfCapacityException("Failed to pre-provision " + deficit.get() + ": " + e.getMessage()); - } catch (RuntimeException e) { - throw new RuntimeException("Failed to pre-provision " + deficit.get() + ", will retry in " + interval(), e); - } + nodesPlusProvisioned.addAll(provisionHosts(deficit.get().count(), toNodeResources(deficit.get()))); + } + } + + private List<Node> provisionHosts(int count, NodeResources nodeResources) { + try { + Version osVersion = nodeRepository().osVersions().targetFor(NodeType.host).orElse(Version.emptyVersion); + List<Integer> provisionIndexes = nodeRepository().database().getProvisionIndexes(count); + List<Node> hosts = hostProvisioner.provisionHosts(provisionIndexes, nodeResources, + ApplicationId.defaultId(), osVersion, HostSharing.shared) + .stream() + .map(ProvisionedHost::generateHost) + .collect(Collectors.toList()); + nodeRepository().addNodes(hosts, Agent.DynamicProvisioningMaintainer); + return hosts; + } catch (OutOfCapacityException | IllegalArgumentException | IllegalStateException e) { + throw new OutOfCapacityException("Failed to provision " + count + " " + nodeResources + ": " + e.getMessage()); + } catch (RuntimeException e) { + throw new RuntimeException("Failed to provision " + count + " " + nodeResources + ", will retry in " + interval(), e); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index 292a6872bb0..c747d95d41f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -18,6 +18,7 @@ import com.yahoo.config.provision.Zone; import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.InMemoryFlagSource; import com.yahoo.vespa.flags.custom.ClusterCapacity; +import com.yahoo.vespa.flags.custom.SharedHost; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Address; @@ -267,6 +268,50 @@ public class DynamicProvisioningMaintainerTest { } @Test + public void verify_min_count_of_shared_hosts() { + // What's going on here? We are trying to verify the impact of varying the minimum number of + // shared hosts (SharedHost.minCount()). + // + // addInitialNodes() adds 4 tenant hosts: + // host1 shared !removable # not removable because it got child nodes w/allocation + // host2 !shared removable # not counted as a shared host because it is failed + // host3 shared removable + // host4 shared !removable # not removable because it got child nodes w/allocation + // + // Hosts 1, 3, and 4 count as "shared hosts" with respect to the minCount lower boundary. + // Hosts 3 and 4 are removable, that is they will be deprovisioned as excess hosts unless + // prevented by minCount. + + // minCount=0: All (2) removable hosts are deprovisioned + assertWithMinCount(0, 0, 2); + // minCount=1: The same thing happens, because there are 2 shared hosts left + assertWithMinCount(1, 0, 2); + assertWithMinCount(2, 0, 2); + // minCount=3: since we require 3 shared hosts, host3 is not deprovisioned. + assertWithMinCount(3, 0, 1); + // 4 shared hosts require we provision 1 shared host + assertWithMinCount(4, 1, 1); + // 5 shared hosts require we provision 2 shared hosts + assertWithMinCount(5, 2, 1); + assertWithMinCount(6, 3, 1); + } + + private void assertWithMinCount(int minCount, int provisionCount, int deprovisionCount) { + var tester = new DynamicProvisioningTester().addInitialNodes(); + tester.hostProvisioner.provisionSharedHost("host4"); + + tester.flagSource.withJacksonFlag(Flags.SHARED_HOST.id(), new SharedHost(null, minCount), SharedHost.class); + tester.maintainer.maintain(); + assertEquals(provisionCount, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(deprovisionCount, tester.hostProvisioner.deprovisionedHosts); + + // Verify next maintain is a no-op + tester.maintainer.maintain(); + assertEquals(provisionCount, tester.hostProvisioner.provisionedHosts.size()); + assertEquals(deprovisionCount, tester.hostProvisioner.deprovisionedHosts); + } + + @Test public void does_not_remove_if_host_provisioner_failed() { var tester = new DynamicProvisioningTester(); Node host2 = tester.addNode("host2", Optional.empty(), NodeType.host, Node.State.failed, DynamicProvisioningTester.tenantApp); |