diff options
author | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-01-29 10:09:05 +0100 |
---|---|---|
committer | Håkon Hallingstad <hakon@verizonmedia.com> | 2020-01-29 10:09:05 +0100 |
commit | a2a316c1086dffad1686063dd181f8616443766c (patch) | |
tree | 991233c598a52165bcfacf67f0a6877e8641580a /node-repository | |
parent | 0f351480627ee6c1ab034b5791e4d67350c79015 (diff) |
New metric: seconds node has been suspended
Diffstat (limited to 'node-repository')
3 files changed, 30 insertions, 11 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index d8e077a96a6..970972a5fe6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -16,9 +16,9 @@ import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.orchestrator.Orchestrator; import com.yahoo.vespa.orchestrator.status.HostInfo; -import com.yahoo.vespa.orchestrator.status.HostStatus; import com.yahoo.vespa.service.monitor.ServiceMonitor; +import java.time.Clock; import java.time.Duration; import java.util.HashMap; import java.util.List; @@ -40,18 +40,21 @@ public class MetricsReporter extends Maintainer { private final ServiceMonitor serviceMonitor; private final Map<Map<String, String>, Metric.Context> contextMap = new HashMap<>(); private final Supplier<Integer> pendingRedeploymentsSupplier; + private final Clock clock; MetricsReporter(NodeRepository nodeRepository, Metric metric, Orchestrator orchestrator, ServiceMonitor serviceMonitor, Supplier<Integer> pendingRedeploymentsSupplier, - Duration interval) { + Duration interval, + Clock clock) { super(nodeRepository, interval); this.metric = metric; this.orchestrator = orchestrator.getNodeStatuses(); this.serviceMonitor = serviceMonitor; this.pendingRedeploymentsSupplier = pendingRedeploymentsSupplier; + this.clock = clock; } @Override @@ -127,8 +130,16 @@ public class MetricsReporter extends Maintainer { metric.set("failReport", NodeFailer.reasonsToFailParentHost(node).isEmpty() ? 0 : 1, context); orchestrator.apply(new HostName(node.hostname())) - .map(info -> info.status().isSuspended() ? 1 : 0) - .ifPresent(allowedToBeDown -> metric.set("allowedToBeDown", allowedToBeDown, context)); + .ifPresent(info -> { + int suspended = info.status().isSuspended() ? 1 : 0; + metric.set("suspended", suspended, context); + metric.set("allowedToBeDown", suspended, context); // remove summer 2020. + + info.suspendedSince().ifPresent(suspendedSince -> { + Duration duration = Duration.between(suspendedSince, clock.instant()); + metric.set("suspendedSeconds", duration.getSeconds(), context); + }); + }); long numberOfServices; HostName hostName = new HostName(node.hostname()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java index 10aff833584..6ca8b78b48d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java @@ -76,7 +76,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent { dirtyExpirer = new DirtyExpirer(nodeRepository, clock, defaults.dirtyExpiry); provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, defaults.provisionedExpiry); nodeRebooter = new NodeRebooter(nodeRepository, clock, flagSource); - metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval); + metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval, null); infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval); loadBalancerExpirer = provisionServiceProvider.getLoadBalancerService().map(lbService -> new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index 3ca884540f1..287842e56f0 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -10,6 +10,7 @@ import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.Zone; import com.yahoo.jdisc.Metric; +import com.yahoo.test.ManualClock; import com.yahoo.vespa.curator.Curator; import com.yahoo.vespa.curator.mock.MockCurator; import com.yahoo.vespa.hosted.provision.LockedNodeList; @@ -30,6 +31,7 @@ import org.junit.Test; import java.time.Clock; import java.time.Duration; +import java.time.Instant; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -83,12 +85,17 @@ public class MetricsReporterTest { expectedMetrics.put("wantToRetire", 0); expectedMetrics.put("wantToDeprovision", 0); expectedMetrics.put("failReport", 0); - expectedMetrics.put("allowedToBeDown", 0); + expectedMetrics.put("allowedToBeDown", 1); + expectedMetrics.put("suspended", 1); + expectedMetrics.put("suspendedSeconds", 123L); expectedMetrics.put("numberOfServices", 0L); + ManualClock clock = new ManualClock(Instant.ofEpochSecond(124)); Orchestrator orchestrator = mock(Orchestrator.class); ServiceMonitor serviceMonitor = mock(ServiceMonitor.class); - when(orchestrator.getNodeStatuses()).thenReturn(hostName -> Optional.of(HostInfo.createNoRemarks())); + when(orchestrator.getNodeStatuses()).thenReturn(hostName -> + Optional.of(HostInfo.createSuspended(HostStatus.ALLOWED_TO_BE_DOWN, Instant.ofEpochSecond(1))) + ); ServiceModel serviceModel = mock(ServiceModel.class); when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel); when(serviceModel.getServiceInstancesByHostName()).thenReturn(Map.of()); @@ -100,8 +107,8 @@ public class MetricsReporterTest { orchestrator, serviceMonitor, () -> 42, - Duration.ofMinutes(1) - ); + Duration.ofMinutes(1), + clock); metricsReporter.maintain(); assertEquals(expectedMetrics, metric.values); @@ -143,14 +150,15 @@ public class MetricsReporterTest { when(serviceModel.getServiceInstancesByHostName()).thenReturn(Map.of()); TestMetric metric = new TestMetric(); + ManualClock clock = new ManualClock(); MetricsReporter metricsReporter = new MetricsReporter( nodeRepository, metric, orchestrator, serviceMonitor, () -> 42, - Duration.ofMinutes(1) - ); + Duration.ofMinutes(1), + clock); metricsReporter.maintain(); assertEquals(0, metric.values.get("hostedVespa.readyHosts")); // Only tenants counts |