summaryrefslogtreecommitdiffstats
path: root/node-repository/src
diff options
context:
space:
mode:
Diffstat (limited to 'node-repository/src')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java46
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java23
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java33
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/stats.json2
7 files changed, 99 insertions, 26 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java
index 085b89d1253..1460ce70686 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java
@@ -26,16 +26,23 @@ import java.util.Set;
*/
public class NodeRepoStats {
+ private final double totalCost;
+ private final double totalAllocatedCost;
private final Load load;
private final Load activeLoad;
private final List<ApplicationStats> applicationStats;
- private NodeRepoStats(Load load, Load activeLoad, List<ApplicationStats> applicationStats) {
+ private NodeRepoStats(double totalCost, double totalAllocatedCost, Load load, Load activeLoad, List<ApplicationStats> applicationStats) {
+ this.totalCost = totalCost;
+ this.totalAllocatedCost = totalAllocatedCost;
this.load = load;
this.activeLoad = activeLoad;
this.applicationStats = List.copyOf(applicationStats);
}
+ public double totalCost() { return totalCost; }
+ public double totalAllocatedCost() { return totalAllocatedCost; }
+
/**
* Returns the current average work-extracting utilization in this node repo over all nodes.
* Capacity not allocated to active nodes are taken to have 0 utilization as it provides no useful work.
@@ -50,11 +57,15 @@ public class NodeRepoStats {
public static NodeRepoStats computeOver(NodeRepository nodeRepository) {
NodeList allNodes = nodeRepository.nodes().list();
- List<NodeTimeseries> allNodeTimeseries = nodeRepository.metricsDb().getNodeTimeseries(Duration.ofHours(1), Set.of());
+ double totalCost = allNodes.hosts().stream().mapToDouble(host -> host.resources().cost()).sum();
+ double totalAllocatedCost = allNodes.not().hosts().stream()
+ .filter(node -> node.allocation().isPresent())
+ .mapToDouble(node -> node.resources().cost()).sum();
+ List<NodeTimeseries> allNodeTimeseries = nodeRepository.metricsDb().getNodeTimeseries(Duration.ofHours(1), Set.of());
Pair<Load, Load> load = computeLoad(allNodes, allNodeTimeseries);
List<ApplicationStats> applicationStats = computeApplicationStats(allNodes, allNodeTimeseries);
- return new NodeRepoStats(load.getFirst(), load.getSecond(), applicationStats);
+ return new NodeRepoStats(totalCost, totalAllocatedCost, load.getFirst(), load.getSecond(), applicationStats);
}
private static Pair<Load, Load> computeLoad(NodeList allNodes, List<NodeTimeseries> allNodeTimeseries) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
index 368a8da0f90..a9e7ded66e6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java
@@ -186,24 +186,38 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer {
.collect(Collectors.toList());
}
- private List<Node> candidatesForRemoval(List<Node> nodes) {
- Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream()
- .filter(node -> switch (node.type()) {
- case host ->
- // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here
- node.state() != Node.State.parked || node.status().wantToDeprovision();
- case confighost, controllerhost -> node.state() == Node.State.parked && node.status().wantToDeprovision();
- default -> false;
- })
- .collect(Collectors.toMap(Node::hostname, Function.identity())));
+ private static List<Node> candidatesForRemoval(List<Node> nodes) {
+ Map<String, Node> removableHostsByHostname = new HashMap<>();
+ for (var node : nodes) {
+ if (canRemoveHost(node)) {
+ removableHostsByHostname.put(node.hostname(), node);
+ }
+ }
+ for (var node : nodes) {
+ if (node.parentHostname().isPresent() && !canRemoveNode(node)) {
+ removableHostsByHostname.remove(node.parentHostname().get());
+ }
+ }
+ return List.copyOf(removableHostsByHostname.values());
+ }
- nodes.stream()
- .filter(node -> node.allocation().isPresent())
- .flatMap(node -> node.parentHostname().stream())
- .distinct()
- .forEach(hostsByHostname::remove);
+ private static boolean canRemoveHost(Node host) {
+ return switch (host.type()) {
+ // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here
+ case host -> host.state() != Node.State.parked || host.status().wantToDeprovision();
+ case confighost, controllerhost -> canDeprovision(host);
+ default -> false;
+ };
+ }
+
+ private static boolean canRemoveNode(Node node) {
+ if (node.type().isHost()) throw new IllegalArgumentException("Node " + node + " is not a child");
+ return node.allocation().isEmpty() || canDeprovision(node);
+ }
- return List.copyOf(hostsByHostname.values());
+ private static boolean canDeprovision(Node node) {
+ return node.status().wantToDeprovision() && (node.state() == Node.State.parked ||
+ node.state() == Node.State.failed);
}
private Map<String, Node> findSharedHosts(NodeList nodeList) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 829823913a9..aa1abb18d8c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -131,7 +131,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
infrastructureProvisionInterval = Duration.ofMinutes(3);
loadBalancerExpirerInterval = Duration.ofMinutes(5);
metricsInterval = Duration.ofMinutes(1);
- nodeFailerInterval = Duration.ofMinutes(15);
+ nodeFailerInterval = Duration.ofMinutes(9);
nodeFailureStatusUpdateInterval = Duration.ofMinutes(2);
nodeMetricsCollectionInterval = Duration.ofMinutes(1);
expeditedChangeRedeployInterval = Duration.ofMinutes(3);
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
index c5d8b2518e5..dcfdb32e374 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesV2ApiHandler.java
@@ -453,6 +453,8 @@ public class NodesV2ApiHandler extends ThreadedHttpRequestHandler {
Slime slime = new Slime();
Cursor root = slime.setObject();
+ root.setDouble("total-cost", stats.totalCost());
+ root.setDouble("total-allocated-cost", stats.totalAllocatedCost());
toSlime(stats.load(), root.setObject("load"));
toSlime(stats.activeLoad(), root.setObject("activeLoad"));
Cursor applicationsArray = root.setArray("applications");
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java
index 62c96af7629..788c56e08c6 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java
@@ -31,9 +31,13 @@ public class NodeRepoStatsTest {
@Test
public void testEmpty() {
var tester = new NodeRepositoryTester();
- assertLoad(Load.zero(), tester.nodeRepository().computeStats().load());
- assertLoad(Load.zero(), tester.nodeRepository().computeStats().activeLoad());
- assertTrue(tester.nodeRepository().computeStats().applicationStats().isEmpty());
+ var stats = tester.nodeRepository().computeStats();
+
+ assertEquals(0, stats.totalCost(), delta);
+ assertEquals(0, stats.totalAllocatedCost(), delta);
+ assertLoad(Load.zero(), stats.load());
+ assertLoad(Load.zero(), stats.activeLoad());
+ assertTrue(stats.applicationStats().isEmpty());
}
@Test
@@ -42,9 +46,13 @@ public class NodeRepoStatsTest {
tester.addHost("host1", "default");
tester.addHost("host2", "default");
tester.addHost("host3", "small");
- assertLoad(Load.zero(), tester.nodeRepository().computeStats().load());
- assertLoad(Load.zero(), tester.nodeRepository().computeStats().activeLoad());
- assertTrue(tester.nodeRepository().computeStats().applicationStats().isEmpty());
+ var stats = tester.nodeRepository().computeStats();
+
+ assertEquals(0.76, stats.totalCost(), delta);
+ assertEquals(0, stats.totalAllocatedCost(), delta);
+ assertLoad(Load.zero(), stats.load());
+ assertLoad(Load.zero(), stats.activeLoad());
+ assertTrue(stats.applicationStats().isEmpty());
}
@Test
@@ -97,6 +105,9 @@ public class NodeRepoStatsTest {
var stats = tester.nodeRepository().computeStats();
+ assertEquals(26, stats.totalCost(), delta);
+ assertEquals(8.319999999999999, stats.totalAllocatedCost(), delta);
+
assertLoad(new Load(0.6180,0.5562,0.4944), stats.load());
assertLoad(new Load(0.4682,0.4214,0.3745), stats.activeLoad());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
index 905fdc57813..e5e361da379 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainerTest.java
@@ -570,6 +570,39 @@ public class DynamicProvisioningMaintainerTest {
assertEquals(2, provisioningTester.activate(applicationId, prepared).size());
}
+ @Test
+ public void deprovision_parked_node_with_allocation() {
+ var tester = new DynamicProvisioningTester();
+ tester.hostProvisioner.with(Behaviour.failProvisioning);
+ Node host4 = tester.addNode("host4", Optional.empty(), NodeType.host, Node.State.parked);
+ Node host41 = tester.addNode("host4-1", Optional.of("host4"), NodeType.tenant, Node.State.parked, DynamicProvisioningTester.tenantApp);
+ Node host42 = tester.addNode("host4-2", Optional.of("host4"), NodeType.tenant, Node.State.active, DynamicProvisioningTester.tenantApp);
+ Node host43 = tester.addNode("host4-3", Optional.of("host4"), NodeType.tenant, Node.State.failed, DynamicProvisioningTester.tenantApp);
+
+ // Host and children are marked for deprovisioning
+ tester.nodeRepository.nodes().deprovision("host4", Agent.operator, Instant.now());
+ for (var node : List.of(host4, host41, host42, host43)) {
+ assertTrue(tester.nodeRepository.nodes().node(node.hostname()).map(n -> n.status().wantToDeprovision()).get());
+ }
+
+ // Host and children remain parked because one child is still active
+ tester.maintainer.maintain();
+ for (var node : List.of(host4, host41)) {
+ assertEquals(Node.State.parked, tester.nodeRepository.nodes().node(node.hostname()).get().state());
+ }
+ assertEquals(Node.State.active, tester.nodeRepository.nodes().node(host42.hostname()).get().state());
+ assertEquals(Node.State.failed, tester.nodeRepository.nodes().node(host43.hostname()).get().state());
+
+ // Last child is parked
+ tester.nodeRepository.nodes().park(host42.hostname(), true, Agent.system, getClass().getSimpleName());
+
+ // Host and children can now be removed
+ tester.maintainer.maintain();
+ for (var node : List.of(host4, host41, host42, host43)) {
+ assertTrue(node.hostname() + " removed", tester.nodeRepository.nodes().node(node.hostname()).isEmpty());
+ }
+ }
+
private void assertCfghost3IsActive(DynamicProvisioningTester tester) {
assertEquals(5, tester.nodeRepository.nodes().list(Node.State.active).size());
assertEquals(3, tester.nodeRepository.nodes().list(Node.State.active).nodeType(NodeType.confighost).size());
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/stats.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/stats.json
index 8a46f8115be..017a45d2bbe 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/stats.json
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/stats.json
@@ -1,4 +1,6 @@
{
+ "total-cost" : 8.591999999999999,
+ "total-allocated-cost": 5.356,
"load": {
"cpu": 0.0,
"memory": 0.0,