diff options
author | Bjørn Christian Seime <bjorn.christian@seime.no> | 2017-10-27 13:04:55 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-10-27 13:04:55 +0200 |
commit | 8e0ed0a6ee22cd0d5d11f56f4ae11fdee848d19d (patch) | |
tree | 16dfb1df20021092d03b9c8bb057b5653c0acbd1 /node-repository | |
parent | 23db8ec8a0c161414ec87604e3cbe8ca1a0c607e (diff) | |
parent | e855b5734cae2d68cf2b68d6a27ca5f7f2ca1127 (diff) |
Merge pull request #3913 from vespa-engine/hakonhall/add-service-status-metrics
Add service status metrics
Diffstat (limited to 'node-repository')
4 files changed, 63 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 1601b2e3205..2ed03e13f22 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -7,6 +7,8 @@ import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.NodeType; import com.yahoo.jdisc.Metric; import com.yahoo.vespa.applicationmodel.HostName; +import com.yahoo.vespa.applicationmodel.ServiceInstance; +import com.yahoo.vespa.applicationmodel.ServiceStatus; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Allocation; @@ -14,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity; import com.yahoo.vespa.orchestrator.HostNameNotFoundException; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.time.Duration; import java.util.ArrayList; @@ -30,27 +33,34 @@ public class MetricsReporter extends Maintainer { private final Metric metric; private final Orchestrator orchestrator; + private final ServiceMonitor serviceMonitor; private final Map<Map<String, String>, Metric.Context> contextMap = new HashMap<>(); public MetricsReporter(NodeRepository nodeRepository, Metric metric, Orchestrator orchestrator, + ServiceMonitor serviceMonitor, Duration interval, JobControl jobControl) { super(nodeRepository, interval, jobControl); this.metric = metric; this.orchestrator = orchestrator; + this.serviceMonitor = serviceMonitor; } @Override public void maintain() { List<Node> nodes = nodeRepository().getNodes(); - nodes.forEach(this::updateNodeMetrics); + + Map<HostName, List<ServiceInstance>> servicesByHost = + serviceMonitor.getServiceModelSnapshot().getServiceInstancesByHostName(); + + nodes.forEach(node -> updateNodeMetrics(node, servicesByHost)); updateStateMetrics(nodes); updateDockerMetrics(nodes); } - private void updateNodeMetrics(Node node) { + private void updateNodeMetrics(Node node, Map<HostName, List<ServiceInstance>> servicesByHost) { Metric.Context context; Optional<Allocation> allocation = node.allocation(); @@ -117,7 +127,32 @@ public class MetricsReporter extends Maintainer { // Ignore } - // TODO: Also add metric on whether some services are down on node? + long numberOfServices; + HostName hostName = new HostName(node.hostname()); + List<ServiceInstance> services = servicesByHost.get(hostName); + if (services == null) { + numberOfServices = 0; + } else { + Map<ServiceStatus, Long> servicesCount = services.stream().collect( + Collectors.groupingBy(ServiceInstance::serviceStatus, Collectors.counting())); + + numberOfServices = servicesCount.values().stream().mapToLong(Long::longValue).sum(); + + metric.set( + "numberOfServicesUp", + servicesCount.getOrDefault(ServiceStatus.UP, 0L), context); + + metric.set( + "numberOfServicesNotChecked", + servicesCount.getOrDefault(ServiceStatus.NOT_CHECKED, 0L), context); + + long numberOfServicesDown = servicesCount.getOrDefault(ServiceStatus.DOWN, 0L); + metric.set("numberOfServicesDown", numberOfServicesDown, context); + + metric.set("someServicesDown", (numberOfServicesDown > 0 ? 1 : 0), context); + } + + metric.set("numberOfServices", numberOfServices, context); } private static String toApp(ApplicationId applicationId) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 7305b91f317..1fffde874fd 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -76,7 +76,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dirtyExpirer = new DirtyExpirer(nodeRepository, clock, durationFromEnv("dirty_expiry").orElse(defaults.dirtyExpiry), jobControl); provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, durationFromEnv("provisioned_expiry").orElse(defaults.provisionedExpiry), jobControl); nodeRebooter = new NodeRebooter(nodeRepository, clock, durationFromEnv("reboot_interval").orElse(defaults.rebootInterval), jobControl); - metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, durationFromEnv("metrics_interval").orElse(defaults.metricsInterval), jobControl); + metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, durationFromEnv("metrics_interval").orElse(defaults.metricsInterval), jobControl); RetirementPolicy policy = new RetirementPolicyList(new RetireIPv4OnlyNodes(zone)); FlavorSpareChecker flavorSpareChecker = new FlavorSpareChecker( diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java index 56e5fcafbde..0c4f937637c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java @@ -16,6 +16,7 @@ import com.yahoo.vespa.applicationmodel.ServiceType; import com.yahoo.vespa.applicationmodel.TenantId; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeRepository; +import com.yahoo.vespa.service.monitor.ServiceModel; import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.util.Collections; @@ -89,4 +90,8 @@ public class ServiceMonitorStub implements ServiceMonitor { return status; } + @Override + public ServiceModel getServiceModelSnapshot() { + throw new IllegalStateException("getServicemodelSnapshot has not been implemented"); + } } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java index a3697e57482..6c0c344a72b 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java @@ -21,6 +21,8 @@ import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.status.HostStatus; +import com.yahoo.vespa.service.monitor.ServiceModel; +import com.yahoo.vespa.service.monitor.ServiceMonitor; import org.junit.Test; import java.time.Clock; @@ -81,12 +83,23 @@ public class MetricsReporterTest { expectedMetrics.put("hardwareFailure", 0); expectedMetrics.put("hardwareDivergence", 0); expectedMetrics.put("allowedToBeDown", 0); + expectedMetrics.put("numberOfServices", 0L); Orchestrator orchestrator = mock(Orchestrator.class); + ServiceMonitor serviceMonitor = mock(ServiceMonitor.class); when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS); + ServiceModel serviceModel = mock(ServiceModel.class); + when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel); + when(serviceModel.getServiceInstancesByHostName()).thenReturn(Collections.emptyMap()); TestMetric metric = new TestMetric(); - MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, Duration.ofMinutes(1), new JobControl(nodeRepository.database())); + MetricsReporter metricsReporter = new MetricsReporter( + nodeRepository, + metric, + orchestrator, + serviceMonitor, + Duration.ofMinutes(1), + new JobControl(nodeRepository.database())); metricsReporter.maintain(); assertEquals(expectedMetrics, metric.values); @@ -121,10 +134,14 @@ public class MetricsReporterTest { nodeRepository.addDockerNodes(Collections.singletonList(container2)); Orchestrator orchestrator = mock(Orchestrator.class); + ServiceMonitor serviceMonitor = mock(ServiceMonitor.class); when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS); + ServiceModel serviceModel = mock(ServiceModel.class); + when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel); + when(serviceModel.getServiceInstancesByHostName()).thenReturn(Collections.emptyMap()); TestMetric metric = new TestMetric(); - MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, Duration.ofMinutes(1), new JobControl(nodeRepository.database())); + MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, Duration.ofMinutes(1), new JobControl(nodeRepository.database())); metricsReporter.maintain(); assertEquals(0L, metric.values.get("hostedVespa.readyHosts")); /** Only tenants counts **/ |