diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2023-06-22 18:16:27 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-22 18:16:27 +0200 |
commit | fec5b4b5837826129e9e06bd68c1488dd93766df (patch) | |
tree | 9c34ea5d2cf6955ae16475d934bc2a0ea6a6e413 | |
parent | 71a4d6ec1ee9064db42c1a7405b3b96c11233956 (diff) | |
parent | fb3c8fb255f1caf4f9fb05dc65a54536a77b9c74 (diff) |
Merge pull request #27522 from vespa-engine/hmusum/node-repo-cleanup-1
Hmusum/node repo cleanup 1
5 files changed, 62 insertions, 51 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java index e9e3fd5179a..e8ba75113c6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainer.java @@ -85,16 +85,7 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { } private double markForRemoval(List<Node> provisionedSnapshot) { - // Group nodes by parent; no parent means it's a host. - Map<Optional<String>, List<Node>> nodesByParent = provisionedSnapshot.stream().collect(groupingBy(Node::parentHostname)); - - // Find all hosts that we once thought were empty (first clause), or whose children are now all removable (second clause). - List<Node> emptyHosts = nodesByParent.get(Optional.<String>empty()).stream() - .filter(host -> host.hostEmptyAt().isPresent() - || nodesByParent.getOrDefault(Optional.of(host.hostname()), List.of()) - .stream().allMatch(HostCapacityMaintainer::canDeprovision)) - .toList(); - + List<Node> emptyHosts = findEmptyOrRemovableHosts(provisionedSnapshot); if (emptyHosts.isEmpty()) return 1; int attempts = 0, success = 0; @@ -108,18 +99,16 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { // Re-read all nodes under lock and compute the candidates for removal. The actual nodes we want // to mark for removal is the intersection with typeEmptyHosts, which excludes the preprovisioned hosts. Map<Optional<String>, List<Node>> currentNodesByParent = nodeRepository().nodes().list().stream().collect(groupingBy(Node::parentHostname)); - List<Node> candidateHosts = new ArrayList<>(currentNodesByParent.get(Optional.<String>empty())); + List<Node> candidateHosts = new ArrayList<>(getHosts(currentNodesByParent)); candidateHosts.retainAll(typeEmptyHosts); for (Node host : candidateHosts) { attempts++; // Any hosts that are no longer empty should be marked as such, and excluded from removal. - if (currentNodesByParent.getOrDefault(Optional.of(host.hostname()), List.of()) - .stream().anyMatch(n -> ! canDeprovision(n))) { - if (host.hostEmptyAt().isPresent()) { - nodeRepository().nodes().write(host.withHostEmptyAt(null), lock); - } + if (currentNodesByParent.getOrDefault(Optional.of(host.hostname()), List.of()).stream().anyMatch(n -> ! canDeprovision(n)) + && host.hostEmptyAt().isPresent()) { + nodeRepository().nodes().write(host.withHostEmptyAt(null), lock); } // If the host is still empty, we can mark it as empty now, or mark it for removal if it has already expired. else { @@ -282,11 +271,38 @@ public class HostCapacityMaintainer extends NodeRepositoryMaintainer { nodeResources, nodeRepository().clock().instant())) .toList(); - } private static NodeResources toNodeResources(ClusterCapacity clusterCapacity) { - return new NodeResources(clusterCapacity.vcpu(), clusterCapacity.memoryGb(), clusterCapacity.diskGb(), - clusterCapacity.bandwidthGbps()); + return new NodeResources(clusterCapacity.vcpu(), + clusterCapacity.memoryGb(), + clusterCapacity.diskGb(), + clusterCapacity.bandwidthGbps(), + NodeResources.DiskSpeed.valueOf(clusterCapacity.diskSpeed()), + NodeResources.StorageType.valueOf(clusterCapacity.storageType()), + NodeResources.Architecture.valueOf(clusterCapacity.architecture())); } + + private static List<Node> findEmptyOrRemovableHosts(List<Node> provisionedSnapshot) { + // Group nodes by parent; no parent means it's a host. + var nodesByParent = provisionedSnapshot.stream().collect(groupingBy(Node::parentHostname)); + + // Find all hosts that we once thought were empty (first clause), or whose children are now all removable (second clause). + return getHosts(nodesByParent).stream() + .filter(host -> host.hostEmptyAt().isPresent() || allChildrenCanBeDeprovisioned(nodesByParent, host)) + .toList(); + } + + private static List<Node> getHosts(Map<Optional<String>, List<Node>> nodesByParent) { + return nodesByParent.get(Optional.<String>empty()); + } + + private static List<Node> getChildren(Map<Optional<String>, List<Node>> nodesByParent, Node host) { + return nodesByParent.getOrDefault(Optional.of(host.hostname()), List.of()); + } + + private static boolean allChildrenCanBeDeprovisioned(Map<Optional<String>, List<Node>> nodesByParent, Node host) { + return getChildren(nodesByParent, host).stream().allMatch(HostCapacityMaintainer::canDeprovision); + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java index 2bc5a0719d9..2e9cca21052 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/FlavorConfigBuilder.java @@ -5,12 +5,18 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeFlavors; import com.yahoo.config.provisioning.FlavorsConfig; +import static com.yahoo.config.provision.Flavor.Type.BARE_METAL; +import static com.yahoo.config.provision.Flavor.Type.DOCKER_CONTAINER; import static com.yahoo.config.provision.NodeResources.Architecture; +import static com.yahoo.config.provision.NodeResources.Architecture.arm64; +import static com.yahoo.config.provision.NodeResources.Architecture.x86_64; /** * Simplifies creation of a node-repository config containing flavors. * This is needed because the config builder API is inconvenient. * + * Note: Flavors added will have fast disk and remote storage unless explicitly specified. + * * @author bratseth */ public class FlavorConfigBuilder { @@ -27,7 +33,7 @@ public class FlavorConfigBuilder { double disk, double bandwidth, Flavor.Type type) { - return addFlavor(flavorName, cpu, mem, disk, bandwidth, true, true, type, Architecture.x86_64, 0, 0); + return addFlavor(flavorName, cpu, mem, disk, bandwidth, true, true, type, x86_64, 0, 0); } public FlavorsConfig.Flavor.Builder addFlavor(String flavorName, @@ -69,31 +75,20 @@ public class FlavorConfigBuilder { /** Convenience method which creates a node flavors instance from a list of flavor names */ public static NodeFlavors createDummies(String... flavors) { - - FlavorConfigBuilder flavorConfigBuilder = new FlavorConfigBuilder(); + FlavorConfigBuilder builder = new FlavorConfigBuilder(); for (String flavorName : flavors) { - if (flavorName.equals("docker")) - flavorConfigBuilder.addFlavor(flavorName, 1., 30., 20., 1.5, Flavor.Type.DOCKER_CONTAINER); - else if (flavorName.equals("docker2")) - flavorConfigBuilder.addFlavor(flavorName, 2., 40., 40., 0.5, Flavor.Type.DOCKER_CONTAINER); - else if (flavorName.equals("host")) - flavorConfigBuilder.addFlavor(flavorName, 7., 100., 120., 5, Flavor.Type.BARE_METAL); - else if (flavorName.equals("host2")) - flavorConfigBuilder.addFlavor(flavorName, 16, 24, 100, 1, Flavor.Type.BARE_METAL); - else if (flavorName.equals("host3")) - flavorConfigBuilder.addFlavor(flavorName, 24, 64, 100, 10, Flavor.Type.BARE_METAL); - else if (flavorName.equals("host4")) - flavorConfigBuilder.addFlavor(flavorName, 48, 128, 1000, 10, Flavor.Type.BARE_METAL); - else if (flavorName.equals("devhost")) - flavorConfigBuilder.addFlavor(flavorName, 4., 80., 100, 10, Flavor.Type.BARE_METAL); - else if (flavorName.equals("arm64")) - flavorConfigBuilder.addFlavor(flavorName,2., 30., 20., 3, Flavor.Type.BARE_METAL, Architecture.arm64); - else if (flavorName.equals("gpu")) - flavorConfigBuilder.addFlavor(flavorName,4, 16, 125, 10, true, false, Flavor.Type.BARE_METAL, Architecture.x86_64, 1, 16); - else - flavorConfigBuilder.addFlavor(flavorName, 1., 30., 20., 3, Flavor.Type.BARE_METAL); + switch (flavorName) { + case "docker" -> builder.addFlavor(flavorName, 1., 30., 20., 1.5, DOCKER_CONTAINER); + case "host" -> builder.addFlavor(flavorName, 7., 100., 120., 5, BARE_METAL); + case "host2" -> builder.addFlavor(flavorName, 16, 24, 100, 1, BARE_METAL); + case "host3" -> builder.addFlavor(flavorName, 24, 64, 100, 10, BARE_METAL); + case "host4" -> builder.addFlavor(flavorName, 48, 128, 1000, 10, BARE_METAL); + case "arm64" -> builder.addFlavor(flavorName, 2., 30., 20., 3, BARE_METAL, arm64); + case "gpu" -> builder.addFlavor(flavorName, 4, 16, 125, 10, true, false, BARE_METAL, x86_64, 1, 16); + default -> builder.addFlavor(flavorName, 1., 30., 20., 3, BARE_METAL); + } } - return new NodeFlavors(flavorConfigBuilder.build()); + return new NodeFlavors(builder.build()); } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java index 66d4b67c7c2..1a97906697f 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/HostCapacityMaintainerTest.java @@ -90,7 +90,7 @@ public class HostCapacityMaintainerTest { @Test public void does_not_deprovision_when_preprovisioning_enabled() { var tester = new DynamicProvisioningTester().addInitialNodes(); - tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), List.of(new ClusterCapacity(1, 1.0, 3.0, 2.0, 1.0, "fast", "local", "x86_64")), ClusterCapacity.class); + tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), List.of(new ClusterCapacity(1, 1.0, 3.0, 2.0, 1.0, "fast", "remote", "x86_64")), ClusterCapacity.class); Optional<Node> failedHost = tester.nodeRepository.nodes().node("host2"); assertTrue(failedHost.isPresent()); @@ -103,8 +103,8 @@ public class HostCapacityMaintainerTest { public void provision_deficit_and_deprovision_excess() { var tester = new DynamicProvisioningTester().addInitialNodes(); tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), - List.of(new ClusterCapacity(2, 48.0, 128.0, 1000.0, 10.0, "fast", "local", "x86_64"), - new ClusterCapacity(1, 16.0, 24.0, 100.0, 1.0, "fast", "local", "x86_64")), + List.of(new ClusterCapacity(2, 48.0, 128.0, 1000.0, 10.0, "fast", "remote", "x86_64"), + new ClusterCapacity(1, 16.0, 24.0, 100.0, 1.0, "fast", "remote", "x86_64")), ClusterCapacity.class); assertEquals(0, tester.hostProvisioner.provisionedHosts().size()); @@ -141,7 +141,7 @@ public class HostCapacityMaintainerTest { var tester = new DynamicProvisioningTester().addInitialNodes(); // Makes provisioned hosts 48-128-1000-10 tester.hostProvisioner.setHostFlavor("host4"); - var clusterCapacity = new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0, "fast", "local", "x86_64"); + var clusterCapacity = new ClusterCapacity(2, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64"); tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), List.of(clusterCapacity), ClusterCapacity.class); @@ -179,7 +179,7 @@ public class HostCapacityMaintainerTest { tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), List.of(clusterCapacity, - new ClusterCapacity(2, 24.0, 64.0, 100.0, 1.0, "fast", "local", "x86_64")), + new ClusterCapacity(2, 24.0, 64.0, 100.0, 1.0, "fast", "remote", "x86_64")), ClusterCapacity.class); tester.maintain(); @@ -194,7 +194,7 @@ public class HostCapacityMaintainerTest { // If the preprovision capacity is reduced, we should see shared hosts deprovisioned. tester.flagSource.withListFlag(PermanentFlags.PREPROVISION_CAPACITY.id(), - List.of(new ClusterCapacity(1, 1.0, 30.0, 20.0, 3.0, "fast", "local", "x86_64")), + List.of(new ClusterCapacity(1, 1.0, 30.0, 20.0, 3.0, "fast", "remote", "x86_64")), ClusterCapacity.class); tester.maintain(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index de2c060a0eb..7d90de1ccaf 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -182,7 +182,7 @@ public class MetricsReporterTest { @Test public void container_metrics() { - NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("host", "docker", "docker2"); + NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("host", "docker"); ProvisioningTester tester = new ProvisioningTester.Builder().flavors(nodeFlavors.getFlavors()).build(); NodeRepository nodeRepository = tester.nodeRepository(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java index ec8a8f637c8..35d6cdbf5d0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/HostCapacityTest.java @@ -45,7 +45,7 @@ public class HostCapacityTest { doAnswer(invocation -> ((Flavor)invocation.getArguments()[0]).resources()).when(hostResourcesCalculator).advertisedResourcesOf(any()); // Create flavors - NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("host", "docker", "docker2"); + NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("host", "docker"); // Create hosts host1 = Node.create("host1", IP.Config.of(Set.of("::1"), createIps(2, 4), List.of()), "host1", nodeFlavors.getFlavorOrThrow("host"), NodeType.host).build(); |