aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/autoscale/MetricsResponse.java
blob: d6661b895360ee29b3121fd5cd2f1e490fee3921 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.autoscale;

import com.yahoo.collections.Pair;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.slime.ArrayTraverser;
import com.yahoo.slime.Inspector;
import com.yahoo.slime.ObjectTraverser;
import com.yahoo.slime.Slime;
import com.yahoo.slime.SlimeUtils;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.applications.Application;

import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

/**
 * A response containing metrics for a collection of nodes.
 *
 * @author bratseth
 */
public class MetricsResponse {

    private final Collection<Pair<String, MetricSnapshot>> nodeMetrics;

    /** Creates this from a metrics/V2 response */
    public MetricsResponse(String response, NodeList applicationNodes, NodeRepository nodeRepository) {
        this(SlimeUtils.jsonToSlime(response), applicationNodes, nodeRepository);
    }

    public MetricsResponse(Collection<Pair<String, MetricSnapshot>> metrics) {
        this.nodeMetrics = metrics;
    }

    private MetricsResponse(Slime response, NodeList applicationNodes, NodeRepository nodeRepository) {
        nodeMetrics = new ArrayList<>();
        Inspector root = response.get();
        Inspector nodes = root.field("nodes");
        nodes.traverse((ArrayTraverser)(__, node) -> consumeNode(node, applicationNodes, nodeRepository));
    }

    public Collection<Pair<String, MetricSnapshot>> metrics() { return nodeMetrics; }

    private void consumeNode(Inspector node, NodeList applicationNodes, NodeRepository nodeRepository) {
        String hostname = node.field("hostname").asString();
        consumeNodeMetrics(hostname, node.field("node"), applicationNodes, nodeRepository);
        // consumeServiceMetrics(hostname, node.field("services"));
    }

    private void consumeNodeMetrics(String hostname, Inspector nodeData, NodeList applicationNodes, NodeRepository nodeRepository) {
        Optional<Node> node = applicationNodes.stream().filter(n -> n.hostname().equals(hostname)).findAny();
        if (node.isEmpty()) return; // Node is not part of this cluster any more
        long timestampSecond = nodeData.field("timestamp").asLong();
        Map<String, Double> values = consumeMetrics(nodeData.field("metrics"));
        nodeMetrics.add(new Pair<>(hostname, new MetricSnapshot(Instant.ofEpochMilli(timestampSecond * 1000),
                                                                Metric.cpu.from(values),
                                                                Metric.memory.from(values),
                                                                Metric.disk.from(values),
                                                                (long)Metric.generation.from(values),
                                                                Metric.inService.from(values) > 0,
                                                                clusterIsStable(node.get(), applicationNodes, nodeRepository),
                                                                Metric.queryRate.from(values))));
    }

    private boolean clusterIsStable(Node node, NodeList applicationNodes, NodeRepository nodeRepository) {
        ClusterSpec cluster = node.allocation().get().membership().cluster();
        return Autoscaler.stable(applicationNodes.cluster(cluster.id()), nodeRepository);
    }

    private void consumeServiceMetrics(String hostname, Inspector node) {
        String name = node.field("name").asString();
        long timestamp = node.field("timestamp").asLong();
        Map<String, Double> values = consumeMetrics(node.field("metrics"));
    }

    private Map<String, Double> consumeMetrics(Inspector metrics) {
        Map<String, Double> values = new HashMap<>();
        metrics.traverse((ArrayTraverser) (__, item) -> consumeMetricsItem(item, values));
        return values;
    }

    private void consumeMetricsItem(Inspector item, Map<String, Double> values) {
        item.field("values").traverse((ObjectTraverser)(name, value) -> values.put(name, value.asDouble()));
    }

    public static MetricsResponse empty() { return new MetricsResponse(List.of()); }

    /** The metrics this can read */
    private enum Metric {

        cpu { // a node resource
            public String metricResponseName() { return "cpu.util"; }
            double convertValue(double metricValue) { return (float)metricValue / 100; } // % to ratio
        },
        memory { // a node resource
            public String metricResponseName() { return "mem.util"; }
            double convertValue(double metricValue) { return (float)metricValue / 100; } // % to ratio
        },
        disk { // a node resource
            public String metricResponseName() { return "disk.util"; }
            double convertValue(double metricValue) { return (float)metricValue / 100; } // % to ratio
        },
        generation { // application config generation active on the node
            public String metricResponseName() { return "application_generation"; }
            double convertValue(double metricValue) { return (float)metricValue; } // Really a long
            double defaultValue() { return -1.0; }
        },
        inService {
            public String metricResponseName() { return "in_service"; }
            double convertValue(double metricValue) { return (float)metricValue; } // Really a boolean
            double defaultValue() { return 1.0; }
        },
        queryRate { // queries per second
            public String metricResponseName() { return "queries.rate"; }
            double convertValue(double metricValue) { return (float)metricValue; }
            double defaultValue() { return 0.0; }
        };

        /** The name of this metric as emitted from its source */
        public abstract String metricResponseName();

        /** Convert from the emitted value of this metric to the value we want to use here */
        abstract double convertValue(double metricValue);

        double defaultValue() { return 0.0; }

        public double from(Map<String, Double> values) {
            return convertValue(values.getOrDefault(metricResponseName(), defaultValue()));
        }

    }

}