summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorLeandro Alves <ldalves@gmail.com>2017-10-24 14:16:51 +0200
committerGitHub <noreply@github.com>2017-10-24 14:16:51 +0200
commitbe6ac14ae41d548059715dea5278e8aeb2c9ca12 (patch)
treebd12d268171606925007a98496d5f42347d6e75b /node-repository
parent9b1870295c8a3a5236e75fda02c7126024886f07 (diff)
parent5939dfe6a7988b6db574999280740e7ae2670688 (diff)
Merge pull request #3861 from vespa-engine/hakonhall/add-hardware-failure-and-divergence-metrics
Add hardware failure and divergence metrics
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java11
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java2
2 files changed, 13 insertions, 0 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 1e4579d3ccd..1601b2e3205 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -61,6 +61,7 @@ public class MetricsReporter extends Maintainer {
"hostname", node.hostname(),
"tenantName", applicationId.tenant().value(),
"applicationId", applicationId.serializedForm().replace(':', '.'),
+ "app", toApp(applicationId),
"clustertype", allocation.get().membership().cluster().type().name(),
"clusterid", allocation.get().membership().cluster().id().value());
@@ -101,6 +102,12 @@ public class MetricsReporter extends Maintainer {
metric.set("wantToRetire", node.status().wantToRetire() ? 1 : 0, context);
metric.set("wantToDeprovision", node.status().wantToDeprovision() ? 1 : 0, context);
+ metric.set("hardwareFailure",
+ node.status().hardwareFailureDescription().isPresent() ? 1 : 0,
+ context);
+ metric.set("hardwareDivergence",
+ node.status().hardwareDivergence().isPresent() ? 1 : 0,
+ context);
try {
HostStatus status = orchestrator.getNodeStatus(new HostName(node.hostname()));
@@ -113,6 +120,10 @@ public class MetricsReporter extends Maintainer {
// TODO: Also add metric on whether some services are down on node?
}
+ private static String toApp(ApplicationId applicationId) {
+ return applicationId.application().value() + "." + applicationId.instance().value();
+ }
+
/**
* A version 6.163.20 will be returned as a number 163.020. The major
* version can normally be inferred. As long as the micro version stays
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
index b40e1f4923c..a3697e57482 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
@@ -78,6 +78,8 @@ public class MetricsReporterTest {
expectedMetrics.put("wantToReboot", 0);
expectedMetrics.put("wantToRetire", 0);
expectedMetrics.put("wantToDeprovision", 0);
+ expectedMetrics.put("hardwareFailure", 0);
+ expectedMetrics.put("hardwareDivergence", 0);
expectedMetrics.put("allowedToBeDown", 0);
Orchestrator orchestrator = mock(Orchestrator.class);