From d0a91438b3ea4531db2702a1de53fe26d081d16e Mon Sep 17 00:00:00 2001 From: Håkon Hallingstad Date: Tue, 24 Oct 2017 12:52:17 +0200 Subject: Add hardware failure and divergence metrics --- .../yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java | 6 ++++++ .../vespa/hosted/provision/monitoring/MetricsReporterTest.java | 2 ++ 2 files changed, 8 insertions(+) (limited to 'node-repository/src') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 4432ba89527..3cafbb9f20b 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -105,6 +105,12 @@ public class MetricsReporter extends Maintainer { metric.set("wantToRetire", node.status().wantToRetire() ? 1 : 0, context); metric.set("wantToDeprovision", node.status().wantToDeprovision() ? 1 : 0, context); + metric.set("hardwareFailure", + node.status().hardwareFailureDescription().isPresent() ? 1 : 0, + context); + metric.set("hardwareDivergence", + node.status().hardwareDivergence().isPresent() ? 1 : 0, + context); try { HostStatus status = orchestrator.getNodeStatus(new HostName(node.hostname())); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java index b40e1f4923c..a3697e57482 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java @@ -78,6 +78,8 @@ public class MetricsReporterTest { expectedMetrics.put("wantToReboot", 0); expectedMetrics.put("wantToRetire", 0); expectedMetrics.put("wantToDeprovision", 0); + expectedMetrics.put("hardwareFailure", 0); + expectedMetrics.put("hardwareDivergence", 0); expectedMetrics.put("allowedToBeDown", 0); Orchestrator orchestrator = mock(Orchestrator.class); -- cgit v1.2.3 From 5939dfe6a7988b6db574999280740e7ae2670688 Mon Sep 17 00:00:00 2001 From: Håkon Hallingstad Date: Tue, 24 Oct 2017 13:15:40 +0200 Subject: Add app tag --- .../yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'node-repository/src') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 3cafbb9f20b..58d6f64de53 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -65,6 +65,7 @@ public class MetricsReporter extends Maintainer { "hostname", node.hostname(), "tenantName", applicationId.tenant().value(), "applicationId", applicationId.serializedForm().replace(':', '.'), + "app", toApp(applicationId), "clustertype", allocation.get().membership().cluster().type().name(), "clusterid", allocation.get().membership().cluster().id().value()); @@ -123,6 +124,10 @@ public class MetricsReporter extends Maintainer { // TODO: Also add metric on whether some services are down on node? } + private static String toApp(ApplicationId applicationId) { + return applicationId.application().value() + "." + applicationId.instance().value(); + } + /** * A version 6.163.20 will be returned as a number 163.020. The major * version can normally be inferred. As long as the micro version stays -- cgit v1.2.3