summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorBjørn Christian Seime <bjorn.christian@seime.no>2017-10-27 13:04:55 +0200
committerGitHub <noreply@github.com>2017-10-27 13:04:55 +0200
commit8e0ed0a6ee22cd0d5d11f56f4ae11fdee848d19d (patch)
tree16dfb1df20021092d03b9c8bb057b5653c0acbd1 /node-repository
parent23db8ec8a0c161414ec87604e3cbe8ca1a0c607e (diff)
parente855b5734cae2d68cf2b68d6a27ca5f7f2ca1127 (diff)
Merge pull request #3913 from vespa-engine/hakonhall/add-service-status-metrics
Add service status metrics
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java41
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java2
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java5
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java21
4 files changed, 63 insertions, 6 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 1601b2e3205..2ed03e13f22 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -7,6 +7,8 @@ import com.yahoo.config.provision.Flavor;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.applicationmodel.HostName;
+import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Allocation;
@@ -14,6 +16,7 @@ import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity;
import com.yahoo.vespa.orchestrator.HostNameNotFoundException;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.service.monitor.ServiceMonitor;
import java.time.Duration;
import java.util.ArrayList;
@@ -30,27 +33,34 @@ public class MetricsReporter extends Maintainer {
private final Metric metric;
private final Orchestrator orchestrator;
+ private final ServiceMonitor serviceMonitor;
private final Map<Map<String, String>, Metric.Context> contextMap = new HashMap<>();
public MetricsReporter(NodeRepository nodeRepository,
Metric metric,
Orchestrator orchestrator,
+ ServiceMonitor serviceMonitor,
Duration interval,
JobControl jobControl) {
super(nodeRepository, interval, jobControl);
this.metric = metric;
this.orchestrator = orchestrator;
+ this.serviceMonitor = serviceMonitor;
}
@Override
public void maintain() {
List<Node> nodes = nodeRepository().getNodes();
- nodes.forEach(this::updateNodeMetrics);
+
+ Map<HostName, List<ServiceInstance>> servicesByHost =
+ serviceMonitor.getServiceModelSnapshot().getServiceInstancesByHostName();
+
+ nodes.forEach(node -> updateNodeMetrics(node, servicesByHost));
updateStateMetrics(nodes);
updateDockerMetrics(nodes);
}
- private void updateNodeMetrics(Node node) {
+ private void updateNodeMetrics(Node node, Map<HostName, List<ServiceInstance>> servicesByHost) {
Metric.Context context;
Optional<Allocation> allocation = node.allocation();
@@ -117,7 +127,32 @@ public class MetricsReporter extends Maintainer {
// Ignore
}
- // TODO: Also add metric on whether some services are down on node?
+ long numberOfServices;
+ HostName hostName = new HostName(node.hostname());
+ List<ServiceInstance> services = servicesByHost.get(hostName);
+ if (services == null) {
+ numberOfServices = 0;
+ } else {
+ Map<ServiceStatus, Long> servicesCount = services.stream().collect(
+ Collectors.groupingBy(ServiceInstance::serviceStatus, Collectors.counting()));
+
+ numberOfServices = servicesCount.values().stream().mapToLong(Long::longValue).sum();
+
+ metric.set(
+ "numberOfServicesUp",
+ servicesCount.getOrDefault(ServiceStatus.UP, 0L), context);
+
+ metric.set(
+ "numberOfServicesNotChecked",
+ servicesCount.getOrDefault(ServiceStatus.NOT_CHECKED, 0L), context);
+
+ long numberOfServicesDown = servicesCount.getOrDefault(ServiceStatus.DOWN, 0L);
+ metric.set("numberOfServicesDown", numberOfServicesDown, context);
+
+ metric.set("someServicesDown", (numberOfServicesDown > 0 ? 1 : 0), context);
+ }
+
+ metric.set("numberOfServices", numberOfServices, context);
}
private static String toApp(ApplicationId applicationId) {
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 7305b91f317..1fffde874fd 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -76,7 +76,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
dirtyExpirer = new DirtyExpirer(nodeRepository, clock, durationFromEnv("dirty_expiry").orElse(defaults.dirtyExpiry), jobControl);
provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, durationFromEnv("provisioned_expiry").orElse(defaults.provisionedExpiry), jobControl);
nodeRebooter = new NodeRebooter(nodeRepository, clock, durationFromEnv("reboot_interval").orElse(defaults.rebootInterval), jobControl);
- metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, durationFromEnv("metrics_interval").orElse(defaults.metricsInterval), jobControl);
+ metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, durationFromEnv("metrics_interval").orElse(defaults.metricsInterval), jobControl);
RetirementPolicy policy = new RetirementPolicyList(new RetireIPv4OnlyNodes(zone));
FlavorSpareChecker flavorSpareChecker = new FlavorSpareChecker(
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
index 56e5fcafbde..0c4f937637c 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
@@ -16,6 +16,7 @@ import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.service.monitor.ServiceModel;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
import java.util.Collections;
@@ -89,4 +90,8 @@ public class ServiceMonitorStub implements ServiceMonitor {
return status;
}
+ @Override
+ public ServiceModel getServiceModelSnapshot() {
+ throw new IllegalStateException("getServicemodelSnapshot has not been implemented");
+ }
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
index a3697e57482..6c0c344a72b 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
@@ -21,6 +21,8 @@ import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.status.HostStatus;
+import com.yahoo.vespa.service.monitor.ServiceModel;
+import com.yahoo.vespa.service.monitor.ServiceMonitor;
import org.junit.Test;
import java.time.Clock;
@@ -81,12 +83,23 @@ public class MetricsReporterTest {
expectedMetrics.put("hardwareFailure", 0);
expectedMetrics.put("hardwareDivergence", 0);
expectedMetrics.put("allowedToBeDown", 0);
+ expectedMetrics.put("numberOfServices", 0L);
Orchestrator orchestrator = mock(Orchestrator.class);
+ ServiceMonitor serviceMonitor = mock(ServiceMonitor.class);
when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS);
+ ServiceModel serviceModel = mock(ServiceModel.class);
+ when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel);
+ when(serviceModel.getServiceInstancesByHostName()).thenReturn(Collections.emptyMap());
TestMetric metric = new TestMetric();
- MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, Duration.ofMinutes(1), new JobControl(nodeRepository.database()));
+ MetricsReporter metricsReporter = new MetricsReporter(
+ nodeRepository,
+ metric,
+ orchestrator,
+ serviceMonitor,
+ Duration.ofMinutes(1),
+ new JobControl(nodeRepository.database()));
metricsReporter.maintain();
assertEquals(expectedMetrics, metric.values);
@@ -121,10 +134,14 @@ public class MetricsReporterTest {
nodeRepository.addDockerNodes(Collections.singletonList(container2));
Orchestrator orchestrator = mock(Orchestrator.class);
+ ServiceMonitor serviceMonitor = mock(ServiceMonitor.class);
when(orchestrator.getNodeStatus(any())).thenReturn(HostStatus.NO_REMARKS);
+ ServiceModel serviceModel = mock(ServiceModel.class);
+ when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel);
+ when(serviceModel.getServiceInstancesByHostName()).thenReturn(Collections.emptyMap());
TestMetric metric = new TestMetric();
- MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, Duration.ofMinutes(1), new JobControl(nodeRepository.database()));
+ MetricsReporter metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, Duration.ofMinutes(1), new JobControl(nodeRepository.database()));
metricsReporter.maintain();
assertEquals(0L, metric.values.get("hostedVespa.readyHosts")); /** Only tenants counts **/