From b078158829a362709acd2d9d738a3307037b582e Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Thu, 26 Oct 2017 23:42:02 +0200 Subject: Add service status metrics --- .../provision/maintenance/MetricsReporter.java | 36 ++++++++++++++++++++-- .../maintenance/NodeRepositoryMaintenance.java | 2 +- .../provision/testutils/ServiceMonitorStub.java | 5 +++ .../provision/monitoring/MetricsReporterTest.java | 21 +++++++++++-- 4 files changed, 58 insertions(+), 6 deletions(-) (limited to 'node-repository/src') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 1601b2e3205..471fe132e51 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -7,6 +7,8 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeType; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Allocation; @@ -14,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity; import com.yahoo.vespa.orchestrator.HostNameNotFoundException; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.time.Duration; import java.util.ArrayList; @@ -30,27 +33,34 @@ public class MetricsReporter extends Maintainer { private final Metric metric; private final Orchestrator orchestrator; + private final ServiceMonitor serviceMonitor; private final Map, Metric.Context> contextMap = new HashMap<>(); public MetricsReporter(NodeRepository nodeRepository, Metric metric, Orchestrator orchestrator, + ServiceMonitor serviceMonitor, Duration interval, JobControl jobControl) { super(nodeRepository, interval, jobControl); this.metric = metric; this.orchestrator = orchestrator; + this.serviceMonitor = serviceMonitor; } @Override public void maintain() { List nodes = nodeRepository().getNodes(); - nodes.forEach(this::updateNodeMetrics); + + Map> servicesByHost = + serviceMonitor.getServiceModelSnapshot().getServiceInstancesByHostName(); + + nodes.forEach(node -> updateNodeMetrics(node, servicesByHost)); updateStateMetrics(nodes); updateDockerMetrics(nodes); } - private void updateNodeMetrics(Node node) { + private void updateNodeMetrics(Node node, Map> servicesByHost) { Metric.Context context; Optional allocation = node.allocation(); @@ -117,7 +127,27 @@ public class MetricsReporter extends Maintainer { // Ignore } - // TODO: Also add metric on whether some services are down on node? + long numberOfServices = 0; + HostName hostName = new HostName(node.hostname()); + List services = servicesByHost.get(hostName); + if (services != null) { + Map servicesCount = services.stream().collect( + Collectors.groupingBy(ServiceInstance::serviceStatus, Collectors.counting())); + + metric.set( + "numberOfServicesUp", + servicesCount.getOrDefault(ServiceStatus.UP, 0L), context); + + metric.set( + "numberOfServicesNotChecked", + servicesCount.getOrDefault(ServiceStatus.NOT_CHECKED, 0L), context); + + metric.set( + "numberOfServicesDown", + servicesCount.getOrDefault(ServiceStatus.DOWN, 0L), context); + } + + metric.set("numberOfServices", numberOfServices, context); } private static String toApp(ApplicationId applicationId) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 7305b91f317..1fffde874fd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -76,7 +76,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dirtyExpirer = new DirtyExpirer(nodeRepository, clock, durationFromEnv("dirty_expiry").orElse(defaults.dirtyExpiry), jobControl); provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, durationFromEnv("provisioned_expiry").orElse(defaults.provisionedExpiry), jobControl); nodeRebooter = new NodeRebooter(nodeRepository, clock, durationFromEnv("reboot_interval").orElse(defaults.rebootInterval), jobControl); - metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, durationFromEnv("metrics_interval").orElse(defaults.metricsInterval), jobControl); + metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, durationFromEnv("metrics_interval").orElse(defaults.metricsInterval), jobControl); RetirementPolicy policy = new RetirementPolicyList(new RetireIPv4OnlyNodes(zone)); FlavorSpareChecker flavorSpareChecker = new FlavorSpareChecker( diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java index 56e5fcafbde..0c4f937637c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.service.monitor.ServiceModel; import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.util.Collections; @@ -89,4 +90,8 @@ public class ServiceMonitorStub implements ServiceMonitor { return status; } + @Override + public ServiceModel getServiceModelSnapshot() { + throw new IllegalStateException("getServicemodelSnapshot has not been implemented"); + } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java index a3697e57482..4404debff42 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java @@ -21,6 +21,8 @@ import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.service.monitor.ServiceModel; +import com.yahoo.vespa.service.monitor.ServiceMonitor; import org.junit.Test; import java.time.Clock; @@ -81,12 +83,23 @@ public class MetricsReporterTest { expectedMetrics.put("hardwareFailure", 0); expectedMetrics.put("hardwareDivergence", 0); expectedMetrics.put("allowedToBeDown", 0); + expectedMetrics.put("numberOfServices", 0L); Orchestrator orchestrator = mock(Orchestrator.class); + ServiceMonitor serviceMonitor = mock(ServiceMonitor.class); when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS); + ServiceModel serviceModel = mock(ServiceModel.class); + when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel); + when(serviceModel.getServiceInstancesByHostName()).thenReturn(Collections.EMPTY_MAP); TestMetric metric = new TestMetric(); - MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, Duration.ofMinutes(1), new JobControl(nodeRepository.database())); + MetricsReporter metricsReporter = new MetricsReporter( + nodeRepository, + metric, + orchestrator, + serviceMonitor, + Duration.ofMinutes(1), + new JobControl(nodeRepository.database())); metricsReporter.maintain(); assertEquals(expectedMetrics, metric.values); @@ -121,10 +134,14 @@ public class MetricsReporterTest { nodeRepository.addDockerNodes(Collections.singletonList(container2)); Orchestrator orchestrator = mock(Orchestrator.class); + ServiceMonitor serviceMonitor = mock(ServiceMonitor.class); when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS); + ServiceModel serviceModel = mock(ServiceModel.class); + when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel); + when(serviceModel.getServiceInstancesByHostName()).thenReturn(Collections.EMPTY_MAP); TestMetric metric = new TestMetric(); - MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, Duration.ofMinutes(1), new JobControl(nodeRepository.database())); + MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, Duration.ofMinutes(1), new JobControl(nodeRepository.database())); metricsReporter.maintain(); assertEquals(0L, metric.values.get("hostedVespa.readyHosts")); /** Only tenants counts **/ -- cgit v1.2.3