diff options
Diffstat (limited to 'node-repository/src')
7 files changed, 99 insertions, 26 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java index 085b89d1253..1460ce70686 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java @@ -26,16 +26,23 @@ import java.util.Set; */ public class NodeRepoStats { + private final double totalCost; + private final double totalAllocatedCost; private final Load load; private final Load activeLoad; private final List<ApplicationStats> applicationStats; - private NodeRepoStats(Load load, Load activeLoad, List<ApplicationStats> applicationStats) { + private NodeRepoStats(double totalCost, double totalAllocatedCost, Load load, Load activeLoad, List<ApplicationStats> applicationStats) { + this.totalCost = totalCost; + this.totalAllocatedCost = totalAllocatedCost; this.load = load; this.activeLoad = activeLoad; this.applicationStats = List.copyOf(applicationStats); } + public double totalCost() { return totalCost; } + public double totalAllocatedCost() { return totalAllocatedCost; } + /** * Returns the current average work-extracting utilization in this node repo over all nodes. * Capacity not allocated to active nodes are taken to have 0 utilization as it provides no useful work. @@ -50,11 +57,15 @@ public class NodeRepoStats { public static NodeRepoStats computeOver(NodeRepository nodeRepository) { NodeList allNodes = nodeRepository.nodes().list(); - List<NodeTimeseries> allNodeTimeseries = nodeRepository.metricsDb().getNodeTimeseries(Duration.ofHours(1), Set.of()); + double totalCost = allNodes.hosts().stream().mapToDouble(host -> host.resources().cost()).sum(); + double totalAllocatedCost = allNodes.not().hosts().stream() + .filter(node -> node.allocation().isPresent()) + .mapToDouble(node -> node.resources().cost()).sum(); + List<NodeTimeseries> allNodeTimeseries = nodeRepository.metricsDb().getNodeTimeseries(Duration.ofHours(1), Set.of()); Pair<Load, Load> load = computeLoad(allNodes, allNodeTimeseries); List<ApplicationStats> applicationStats = computeApplicationStats(allNodes, allNodeTimeseries); - return new NodeRepoStats(load.getFirst(), load.getSecond(), applicationStats); + return new NodeRepoStats(totalCost, totalAllocatedCost, load.getFirst(), load.getSecond(), applicationStats); } private static Pair<Load, Load> computeLoad(NodeList allNodes, List<NodeTimeseries> allNodeTimeseries) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 368a8da0f90..a9e7ded66e6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -186,24 +186,38 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { .collect(Collectors.toList()); } - private List<Node> candidatesForRemoval(List<Node> nodes) { - Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream() - .filter(node -> switch (node.type()) { - case host -> - // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here - node.state() != Node.State.parked || node.status().wantToDeprovision(); - case confighost, controllerhost -> node.state() == Node.State.parked && node.status().wantToDeprovision(); - default -> false; - }) - .collect(Collectors.toMap(Node::hostname, Function.identity()))); + private static List<Node> candidatesForRemoval(List<Node> nodes) { + Map<String, Node> removableHostsByHostname = new HashMap<>(); + for (var node : nodes) { + if (canRemoveHost(node)) { + removableHostsByHostname.put(node.hostname(), node); + } + } + for (var node : nodes) { + if (node.parentHostname().isPresent() && !canRemoveNode(node)) { + removableHostsByHostname.remove(node.parentHostname().get()); + } + } + return List.copyOf(removableHostsByHostname.values()); + } - nodes.stream() - .filter(node -> node.allocation().isPresent()) - .flatMap(node -> node.parentHostname().stream()) - .distinct() - .forEach(hostsByHostname::remove); + private static boolean canRemoveHost(Node host) { + return switch (host.type()) { + // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here + case host -> host.state() != Node.State.parked || host.status().wantToDeprovision(); + case confighost, controllerhost -> canDeprovision(host); + default -> false; + }; + } + + private static boolean canRemoveNode(Node node) { + if (node.type().isHost()) throw new IllegalArgumentException("Node " + node + " is not a child"); + return node.allocation().isEmpty() || canDeprovision(node); + } - return List.copyOf(hostsByHostname.values()); + private static boolean canDeprovision(Node node) { + return node.status().wantToDeprovision() && (node.state() == Node.State.parked || + node.state() == Node.State.failed); } private Map<String, Node> findSharedHosts(NodeList nodeList) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 829823913a9..aa1abb18d8c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -131,7 +131,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { infrastructureProvisionInterval = Duration.ofMinutes(3); loadBalancerExpirerInterval = Duration.ofMinutes(5); metricsInterval = Duration.ofMinutes(1); - nodeFailerInterval = Duration.ofMinutes(15); + nodeFailerInterval = Duration.ofMinutes(9); nodeFailureStatusUpdateInterval = Duration.ofMinutes(2); nodeMetricsCollectionInterval = Duration.ofMinutes(1); expeditedChangeRedeployInterval = Duration.ofMinutes(3); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java index c5d8b2518e5..dcfdb32e374 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java @@ -453,6 +453,8 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler { Slime slime = new Slime(); Cursor root = slime.setObject(); + root.setDouble("total-cost", stats.totalCost()); + root.setDouble("total-allocated-cost", stats.totalAllocatedCost()); toSlime(stats.load(), root.setObject("load")); toSlime(stats.activeLoad(), root.setObject("activeLoad")); Cursor applicationsArray = root.setArray("applications"); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java index 62c96af7629..788c56e08c6 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java @@ -31,9 +31,13 @@ public class NodeRepoStatsTest { @Test public void testEmpty() { var tester = new NodeRepositoryTester(); - assertLoad(Load.zero(), tester.nodeRepository().computeStats().load()); - assertLoad(Load.zero(), tester.nodeRepository().computeStats().activeLoad()); - assertTrue(tester.nodeRepository().computeStats().applicationStats().isEmpty()); + var stats = tester.nodeRepository().computeStats(); + + assertEquals(0, stats.totalCost(), delta); + assertEquals(0, stats.totalAllocatedCost(), delta); + assertLoad(Load.zero(), stats.load()); + assertLoad(Load.zero(), stats.activeLoad()); + assertTrue(stats.applicationStats().isEmpty()); } @Test @@ -42,9 +46,13 @@ public class NodeRepoStatsTest { tester.addHost("host1", "default"); tester.addHost("host2", "default"); tester.addHost("host3", "small"); - assertLoad(Load.zero(), tester.nodeRepository().computeStats().load()); - assertLoad(Load.zero(), tester.nodeRepository().computeStats().activeLoad()); - assertTrue(tester.nodeRepository().computeStats().applicationStats().isEmpty()); + var stats = tester.nodeRepository().computeStats(); + + assertEquals(0.76, stats.totalCost(), delta); + assertEquals(0, stats.totalAllocatedCost(), delta); + assertLoad(Load.zero(), stats.load()); + assertLoad(Load.zero(), stats.activeLoad()); + assertTrue(stats.applicationStats().isEmpty()); } @Test @@ -97,6 +105,9 @@ public class NodeRepoStatsTest { var stats = tester.nodeRepository().computeStats(); + assertEquals(26, stats.totalCost(), delta); + assertEquals(8.319999999999999, stats.totalAllocatedCost(), delta); + assertLoad(new Load(0.6180,0.5562,0.4944), stats.load()); assertLoad(new Load(0.4682,0.4214,0.3745), stats.activeLoad()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java index 905fdc57813..e5e361da379 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java @@ -570,6 +570,39 @@ public class DynamicProvisioningMaintainerTest { assertEquals(2, provisioningTester.activate(applicationId, prepared).size()); } + @Test + public void deprovision_parked_node_with_allocation() { + var tester = new DynamicProvisioningTester(); + tester.hostProvisioner.with(Behaviour.failProvisioning); + Node host4 = tester.addNode("host4", Optional.empty(), NodeType.host, Node.State.parked); + Node host41 = tester.addNode("host4-1", Optional.of("host4"), NodeType.tenant, Node.State.parked, DynamicProvisioningTester.tenantApp); + Node host42 = tester.addNode("host4-2", Optional.of("host4"), NodeType.tenant, Node.State.active, DynamicProvisioningTester.tenantApp); + Node host43 = tester.addNode("host4-3", Optional.of("host4"), NodeType.tenant, Node.State.failed, DynamicProvisioningTester.tenantApp); + + // Host and children are marked for deprovisioning + tester.nodeRepository.nodes().deprovision("host4", Agent.operator, Instant.now()); + for (var node : List.of(host4, host41, host42, host43)) { + assertTrue(tester.nodeRepository.nodes().node(node.hostname()).map(n -> n.status().wantToDeprovision()).get()); + } + + // Host and children remain parked because one child is still active + tester.maintainer.maintain(); + for (var node : List.of(host4, host41)) { + assertEquals(Node.State.parked, tester.nodeRepository.nodes().node(node.hostname()).get().state()); + } + assertEquals(Node.State.active, tester.nodeRepository.nodes().node(host42.hostname()).get().state()); + assertEquals(Node.State.failed, tester.nodeRepository.nodes().node(host43.hostname()).get().state()); + + // Last child is parked + tester.nodeRepository.nodes().park(host42.hostname(), true, Agent.system, getClass().getSimpleName()); + + // Host and children can now be removed + tester.maintainer.maintain(); + for (var node : List.of(host4, host41, host42, host43)) { + assertTrue(node.hostname() + " removed", tester.nodeRepository.nodes().node(node.hostname()).isEmpty()); + } + } + private void assertCfghost3IsActive(DynamicProvisioningTester tester) { assertEquals(5, tester.nodeRepository.nodes().list(Node.State.active).size()); assertEquals(3, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.confighost).size()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/stats.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/stats.json index 8a46f8115be..017a45d2bbe 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/stats.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/stats.json @@ -1,4 +1,6 @@ { + "total-cost" : 8.591999999999999, + "total-allocated-cost": 5.356, "load": { "cpu": 0.0, "memory": 0.0, |