diff options
author | Valerij Fredriksen <freva@users.noreply.github.com> | 2021-03-23 15:32:25 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-23 15:32:25 +0100 |
commit | 566fb64efee643d6f6c8a2043021c8e5cf682c00 (patch) | |
tree | d699091ece2dfbb12071a882a9804db9dd63dac0 /node-repository | |
parent | 87e55bc908488c6529e1b470767d4d7793d5b135 (diff) | |
parent | 230cba9cda6c4e9d0d9aa0c4f4622fb1779a5d0a (diff) |
Merge pull request #17124 from vespa-engine/bratseth/node-repo-stats
Compute node repo utilization
Diffstat (limited to 'node-repository')
10 files changed, 219 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index bba4e93616e..197193fafa9 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -33,6 +33,11 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { super(nodes, negate, NodeList::new); } + /** Returns the node with the given hostname from this list, or empty if it is not present */ + public Optional<Node> node(String hostname) { + return matching(node -> node.hostname().equals(hostname)).first(); + } + /** Returns the subset of nodes which are retired */ public NodeList retired() { return matching(node -> node.allocation().isPresent() && node.allocation().get().membership().retired()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java new file mode 100644 index 00000000000..e11a57f04df --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepoStats.java @@ -0,0 +1,73 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision; + +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.autoscale.Load; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot; + +import java.time.Duration; +import java.util.Optional; +import java.util.Set; + +/** + * Stats about the current state known to this node repo + * + * @author bratseth + */ +public class NodeRepoStats { + + private final Load load; + private final Load activeLoad; + + private NodeRepoStats(Load load, Load activeLoad) { + this.load = load; + this.activeLoad = activeLoad; + } + + /** + * Returns the current average work-extracting utilization in this node repo over all nodes. + * Capacity not allocated to active nodes are taken to have 0 utilization as it provides no useful work. + */ + public Load load() { return load; } + + /** Returns the current average utilization in this node repo over all active nodes. */ + public Load activeLoad() { return activeLoad; } + + public static NodeRepoStats computeOver(NodeRepository nodeRepository) { + NodeList allNodes = nodeRepository.nodes().list(); + + NodeResources totalActiveResources = new NodeResources(0, 0, 0, 0); + double cpu = 0, memory = 0, disk = 0; + for (var nodeTimeseries : nodeRepository.metricsDb().getNodeTimeseries(Duration.ofHours(1), Set.of())) { + Optional<Node> node = allNodes.node(nodeTimeseries.hostname()); + if (node.isEmpty() || node.get().state() != Node.State.active) continue; + + Optional<NodeMetricSnapshot> snapshot = nodeTimeseries.last(); + if (snapshot.isEmpty()) continue; + + cpu += snapshot.get().cpu() * node.get().resources().vcpu(); + memory += snapshot.get().memory() * node.get().resources().memoryGb(); + disk += snapshot.get().disk() * node.get().resources().diskGb(); + totalActiveResources = totalActiveResources.add(node.get().resources().justNumbers()); + } + + NodeResources totalHostResources = new NodeResources(0, 0, 0, 0); + for (var host : allNodes.hosts()) { + totalHostResources = totalHostResources.add(host.resources().justNumbers()); + } + + Load load = new Load(divide(cpu, totalHostResources.vcpu()), + divide(memory, totalHostResources.memoryGb()), + divide(disk, totalHostResources.diskGb())); + Load activeLoad = new Load(divide(cpu, totalActiveResources.vcpu()), + divide(memory, totalActiveResources.memoryGb()), + divide(disk, totalActiveResources.diskGb())); + return new NodeRepoStats(load, activeLoad); + } + + private static double divide(double a, double b) { + if (a == 0 && b == 0) return 0; + return a / b; + } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index 530afabcea9..4cf531b55de 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -171,6 +171,8 @@ public class NodeRepository extends AbstractComponent { public MetricsDb metricsDb() { return metricsDb; } + public NodeRepoStats computeStats() { return NodeRepoStats.computeOver(this); } + /** Returns the time keeper of this system */ public Clock clock() { return clock; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java new file mode 100644 index 00000000000..1a13c1bb6d8 --- /dev/null +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/Load.java @@ -0,0 +1,38 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision.autoscale; + +/** + * The load of a node or system, measured as fractions of max (1.0) in three dimensions. + * + * @author bratseth + */ +public class Load { + + private final double cpu, memory, disk; + + public Load(double cpu, double memory, double disk) { + this.cpu = requireNormalized(cpu, "cpu"); + this.memory = requireNormalized(memory, "memory"); + this.disk = requireNormalized(disk, "disk"); + } + + public double cpu() { return cpu; } + public double memory() { return memory; } + public double disk() { return disk; } + + private double requireNormalized(double value, String name) { + if (Double.isNaN(value)) + throw new IllegalArgumentException(name + " must be a number between 0 and 1, but is NaN"); + if (value < 0 || value > 1) + throw new IllegalArgumentException(name + " must be between 0 and 1, but is " + value); + return value; + } + + @Override + public String toString() { + return "load: " + cpu + " cpu, " + memory + " memory, " + disk + " disk"; + } + + public static Load zero() { return new Load(0, 0, 0); } + +} diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java index 377374e8bc5..6b8ba3f7dc4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MemoryMetricsDb.java @@ -72,9 +72,12 @@ public class MemoryMetricsDb implements MetricsDb { public List<NodeTimeseries> getNodeTimeseries(Duration period, Set<String> hostnames) { Instant startTime = clock().instant().minus(period); synchronized (lock) { - return hostnames.stream() - .map(hostname -> nodeTimeseries.getOrDefault(hostname, new NodeTimeseries(hostname, List.of())).justAfter(startTime)) - .collect(Collectors.toList()); + if (hostnames.isEmpty()) + return nodeTimeseries.values().stream().map(ns -> ns.justAfter(startTime)).collect(Collectors.toList()); + else + return hostnames.stream() + .map(hostname -> nodeTimeseries.getOrDefault(hostname, new NodeTimeseries(hostname, List.of())).justAfter(startTime)) + .collect(Collectors.toList()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java index 672aad25b66..593b73e008e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsDb.java @@ -35,6 +35,7 @@ public interface MetricsDb { * the snapshots recorded after the given time (or an empty snapshot if none). * * @param period the duration into the past to return data for + * @param hostnames the host names to return timeseries for, or empty to return for all hostnames */ List<NodeTimeseries> getNodeTimeseries(Duration period, Set<String> hostnames); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java index cedc2edfe63..f8100677b10 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/NodeTimeseries.java @@ -5,6 +5,7 @@ import java.time.Instant; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -31,6 +32,12 @@ public class NodeTimeseries { public NodeMetricSnapshot get(int index) { return snapshots.get(index); } + /** Returns the last (newest) snapshot in this, or empty if there are none. */ + public Optional<NodeMetricSnapshot> last() { + if (snapshots.isEmpty()) return Optional.empty(); + return Optional.of(snapshots.get(snapshots.size() - 1)); + } + public List<NodeMetricSnapshot> asList() { return snapshots; } public String hostname() { return hostname; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java index fe97df0e876..459a7919bbe 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/QuestMetricsDb.java @@ -355,7 +355,7 @@ public class QuestMetricsDb extends AbstractComponent implements MetricsDb { Record record = cursor.getRecord(); while (cursor.hasNext()) { String hostname = record.getStr(0).toString(); - if (hostnames.contains(hostname)) { + if (hostnames.isEmpty() || hostnames.contains(hostname)) { snapshots.put(hostname, new NodeMetricSnapshot(Instant.ofEpochMilli(record.getTimestamp(1) / 1000), record.getFloat(2), diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java new file mode 100644 index 00000000000..5d136016a71 --- /dev/null +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepoStatsTest.java @@ -0,0 +1,72 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.vespa.hosted.provision; + +import com.yahoo.collections.Pair; +import com.yahoo.config.provision.NodeResources; +import com.yahoo.vespa.hosted.provision.autoscale.Load; +import com.yahoo.vespa.hosted.provision.autoscale.NodeMetricSnapshot; +import org.junit.Test; + +import java.time.Duration; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +/** + * @author bratseth + */ +public class NodeRepoStatsTest { + + private static final double delta = 0.0001; + + @Test + public void testEmpty() { + var tester = new NodeRepositoryTester(); + assertLoad(Load.zero(), tester.nodeRepository().computeStats().load()); + assertLoad(Load.zero(), tester.nodeRepository().computeStats().activeLoad()); + } + + @Test + public void testHostButNoNodes() { + var tester = new NodeRepositoryTester(); + tester.addHost("host1", "default"); + tester.addHost("host2", "default"); + tester.addHost("host3", "small"); + assertLoad(Load.zero(), tester.nodeRepository().computeStats().load()); + assertLoad(Load.zero(), tester.nodeRepository().computeStats().activeLoad()); + } + + @Test + public void testNodesAndMetrics() { + var tester = new NodeRepositoryTester(); + tester.addHost("host1", "default"); + tester.addHost("host2", "default"); + tester.addHost("host3", "small"); + tester.addNode("node1", "host1", new NodeResources(0.2, 0.5, 4, 1)); + tester.addNode("node2", "host1", new NodeResources(0.3, 1.0, 8, 1)); + tester.addNode("node3", "host3", new NodeResources(0.3, 1.5, 12, 1)); + tester.setNodeState("node1", Node.State.active); + tester.setNodeState("node2", Node.State.active); + tester.setNodeState("node3", Node.State.active); + assertLoad(Load.zero(), tester.nodeRepository().computeStats().load()); + assertLoad(Load.zero(), tester.nodeRepository().computeStats().activeLoad()); + + var before = tester.clock().instant(); + tester.clock().advance(Duration.ofMinutes(5)); + var now = tester.clock().instant(); + tester.nodeRepository().metricsDb().addNodeMetrics( + List.of(new Pair<>("node1", new NodeMetricSnapshot(before, 0, 0, 0, 1, true, true, 1.0)), + new Pair<>("node1", new NodeMetricSnapshot(now, 0.5, 0.1, 0.8, 1, true, true, 1.0)), + new Pair<>("node2", new NodeMetricSnapshot(now, 0.1, 0.8, 0.1, 1, true, true, 1.0)), + new Pair<>("node3", new NodeMetricSnapshot(now, 1.0, 0.1, 0.2, 1, true, true, 1.0)))); + assertLoad(new Load(0.0860, 0.1000, 0.0256), tester.nodeRepository().computeStats().load()); + assertLoad(new Load(0.5375, 0.3333, 0.2667), tester.nodeRepository().computeStats().activeLoad()); + } + + private static void assertLoad(Load expected, Load actual) { + assertEquals("cpu", expected.cpu(), actual.cpu(), delta); + assertEquals("memory", expected.memory(), actual.memory(), delta); + assertEquals("disk", expected.disk(), actual.disk(), delta); + } + +} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java index 2a375f49d78..c986a5df1d3 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTester.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision; import com.yahoo.config.provision.DockerImage; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeFlavors; +import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Zone; import com.yahoo.config.provisioning.FlavorsConfig; @@ -56,10 +57,18 @@ public class NodeRepositoryTester { return nodeRepository.nodes().list(inState).nodeType(type).asList(); } + public Node addHost(String id, String flavor) { + return addNode(id, id, null, nodeFlavors.getFlavorOrThrow(flavor), NodeType.host); + } + public Node addHost(String id, String hostname, String flavor, NodeType type) { return addNode(id, hostname, null, nodeFlavors.getFlavorOrThrow(flavor), type); } + public Node addNode(String id, String parentHostname, NodeResources resources) { + return addNode(id, id, parentHostname, new Flavor(resources), NodeType.tenant); + } + public Node addNode(String id, String hostname, String parentHostname, String flavor, NodeType type) { return addNode(id, hostname, parentHostname, nodeFlavors.getFlavorOrThrow(flavor), type); } @@ -71,13 +80,16 @@ public class NodeRepositoryTester { return nodeRepository.nodes().addNodes(List.of(node), Agent.system).get(0); } + public void setNodeState(String hostname, Node.State state) { + setNodeState(nodeRepository.nodes().node(hostname).orElseThrow(RuntimeException::new), state); + } + /** * Moves a node directly to the given state without doing any validation, useful * to create wanted test scenario without having to move every node through series * of valid state transitions */ - public void setNodeState(String hostname, Node.State state) { - Node node = nodeRepository.nodes().node(hostname).orElseThrow(RuntimeException::new); + public void setNodeState(Node node, Node.State state) { nodeRepository.database().writeTo(state, node, Agent.system, Optional.empty()); } |