summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@oath.com>2017-10-26 23:42:02 +0200
committerHåkon Hallingstad <hakon@oath.com>2017-10-26 23:42:02 +0200
commitb078158829a362709acd2d9d738a3307037b582e (patch)
tree4a467c12ae24331919e89c419d91d3333adde62d /node-repository
parenta37e4edd39a8807da12219d68874fcc67bd934f0 (diff)
Add service status metrics
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java36
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java21
4 files changed, 58 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 1601b2e3205..471fe132e51 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -7,6 +7,8 @@ import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Allocation;
@@ -14,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity;
import com.yahoo.vespa.orchestrator.HostNameNotFoundException;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.service.monitor.ServiceMonitor;
import java.time.Duration;
import java.util.ArrayList;
@@ -30,27 +33,34 @@ public class MetricsReporter extends Maintainer {
private final Metric metric;
private final Orchestrator orchestrator;
+ private final ServiceMonitor serviceMonitor;
private final Map<Map<String, String>, Metric.Context> contextMap = new HashMap<>();
public MetricsReporter(NodeRepository nodeRepository,
Metric metric,
Orchestrator orchestrator,
+ ServiceMonitor serviceMonitor,
Duration interval,
JobControl jobControl) {
super(nodeRepository, interval, jobControl);
this.metric = metric;
this.orchestrator = orchestrator;
+ this.serviceMonitor = serviceMonitor;
}
@Override
public void maintain() {
List<Node> nodes = nodeRepository().getNodes();
- nodes.forEach(this::updateNodeMetrics);
+
+ Map<HostName, List<ServiceInstance>> servicesByHost =
+ serviceMonitor.getServiceModelSnapshot().getServiceInstancesByHostName();
+
+ nodes.forEach(node -> updateNodeMetrics(node, servicesByHost));
updateStateMetrics(nodes);
updateDockerMetrics(nodes);
}
- private void updateNodeMetrics(Node node) {
+ private void updateNodeMetrics(Node node, Map<HostName, List<ServiceInstance>> servicesByHost) {
Metric.Context context;
Optional<Allocation> allocation = node.allocation();
@@ -117,7 +127,27 @@ public class MetricsReporter extends Maintainer {
// Ignore
}
- // TODO: Also add metric on whether some services are down on node?
+ long numberOfServices = 0;
+ HostName hostName = new HostName(node.hostname());
+ List<ServiceInstance> services = servicesByHost.get(hostName);
+ if (services != null) {
+ Map<ServiceStatus, Long> servicesCount = services.stream().collect(
+ Collectors.groupingBy(ServiceInstance::serviceStatus, Collectors.counting()));
+
+ metric.set(
+ "numberOfServicesUp",
+ servicesCount.getOrDefault(ServiceStatus.UP, 0L), context);
+
+ metric.set(
+ "numberOfServicesNotChecked",
+ servicesCount.getOrDefault(ServiceStatus.NOT_CHECKED, 0L), context);
+
+ metric.set(
+ "numberOfServicesDown",
+ servicesCount.getOrDefault(ServiceStatus.DOWN, 0L), context);
+ }
+
+ metric.set("numberOfServices", numberOfServices, context);
}
private static String toApp(ApplicationId applicationId) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 7305b91f317..1fffde874fd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -76,7 +76,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
dirtyExpirer = new DirtyExpirer(nodeRepository, clock, durationFromEnv("dirty_expiry").orElse(defaults.dirtyExpiry), jobControl);
provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, durationFromEnv("provisioned_expiry").orElse(defaults.provisionedExpiry), jobControl);
nodeRebooter = new NodeRebooter(nodeRepository, clock, durationFromEnv("reboot_interval").orElse(defaults.rebootInterval), jobControl);
- metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, durationFromEnv("metrics_interval").orElse(defaults.metricsInterval), jobControl);
+ metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, durationFromEnv("metrics_interval").orElse(defaults.metricsInterval), jobControl);
RetirementPolicy policy = new RetirementPolicyList(new RetireIPv4OnlyNodes(zone));
FlavorSpareChecker flavorSpareChecker = new FlavorSpareChecker(
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
index 56e5fcafbde..0c4f937637c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.service.monitor.ServiceModel;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
import java.util.Collections;
@@ -89,4 +90,8 @@ public class ServiceMonitorStub implements ServiceMonitor {
return status;
}
+ @Override
+ public ServiceModel getServiceModelSnapshot() {
+ throw new IllegalStateException("getServicemodelSnapshot has not been implemented");
+ }
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
index a3697e57482..4404debff42 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
@@ -21,6 +21,8 @@ import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.service.monitor.ServiceModel;
+import com.yahoo.vespa.service.monitor.ServiceMonitor;
import org.junit.Test;
import java.time.Clock;
@@ -81,12 +83,23 @@ public class MetricsReporterTest {
expectedMetrics.put("hardwareFailure", 0);
expectedMetrics.put("hardwareDivergence", 0);
expectedMetrics.put("allowedToBeDown", 0);
+ expectedMetrics.put("numberOfServices", 0L);
Orchestrator orchestrator = mock(Orchestrator.class);
+ ServiceMonitor serviceMonitor = mock(ServiceMonitor.class);
when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS);
+ ServiceModel serviceModel = mock(ServiceModel.class);
+ when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel);
+ when(serviceModel.getServiceInstancesByHostName()).thenReturn(Collections.EMPTY_MAP);
TestMetric metric = new TestMetric();
- MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, Duration.ofMinutes(1), new JobControl(nodeRepository.database()));
+ MetricsReporter metricsReporter = new MetricsReporter(
+ nodeRepository,
+ metric,
+ orchestrator,
+ serviceMonitor,
+ Duration.ofMinutes(1),
+ new JobControl(nodeRepository.database()));
metricsReporter.maintain();
assertEquals(expectedMetrics, metric.values);
@@ -121,10 +134,14 @@ public class MetricsReporterTest {
nodeRepository.addDockerNodes(Collections.singletonList(container2));
Orchestrator orchestrator = mock(Orchestrator.class);
+ ServiceMonitor serviceMonitor = mock(ServiceMonitor.class);
when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS);
+ ServiceModel serviceModel = mock(ServiceModel.class);
+ when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel);
+ when(serviceModel.getServiceInstancesByHostName()).thenReturn(Collections.EMPTY_MAP);
TestMetric metric = new TestMetric();
- MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, Duration.ofMinutes(1), new JobControl(nodeRepository.database()));
+ MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, Duration.ofMinutes(1), new JobControl(nodeRepository.database()));
metricsReporter.maintain();
assertEquals(0L, metric.values.get("hostedVespa.readyHosts")); /** Only tenants counts **/