summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorHåkon Hallingstad <hakon@verizonmedia.com>2020-01-29 10:09:05 +0100
committerHåkon Hallingstad <hakon@verizonmedia.com>2020-01-29 10:09:05 +0100
commita2a316c1086dffad1686063dd181f8616443766c (patch)
tree991233c598a52165bcfacf67f0a6877e8641580a /node-repository
parent0f351480627ee6c1ab034b5791e4d67350c79015 (diff)
New metric: seconds node has been suspended
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java19
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java20
3 files changed, 30 insertions, 11 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index d8e077a96a6..970972a5fe6 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -16,9 +16,9 @@ import com.yahoo.vespa.hosted.provision.node.Allocation;
import com.yahoo.vespa.hosted.provision.node.History;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.status.HostInfo;
-import com.yahoo.vespa.orchestrator.status.HostStatus;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
+import java.time.Clock;
import java.time.Duration;
import java.util.HashMap;
import java.util.List;
@@ -40,18 +40,21 @@ public class MetricsReporter extends Maintainer {
private final ServiceMonitor serviceMonitor;
private final Map<Map<String, String>, Metric.Context> contextMap = new HashMap<>();
private final Supplier<Integer> pendingRedeploymentsSupplier;
+ private final Clock clock;
MetricsReporter(NodeRepository nodeRepository,
Metric metric,
Orchestrator orchestrator,
ServiceMonitor serviceMonitor,
Supplier<Integer> pendingRedeploymentsSupplier,
- Duration interval) {
+ Duration interval,
+ Clock clock) {
super(nodeRepository, interval);
this.metric = metric;
this.orchestrator = orchestrator.getNodeStatuses();
this.serviceMonitor = serviceMonitor;
this.pendingRedeploymentsSupplier = pendingRedeploymentsSupplier;
+ this.clock = clock;
}
@Override
@@ -127,8 +130,16 @@ public class MetricsReporter extends Maintainer {
metric.set("failReport", NodeFailer.reasonsToFailParentHost(node).isEmpty() ? 0 : 1, context);
orchestrator.apply(new HostName(node.hostname()))
- .map(info -> info.status().isSuspended() ? 1 : 0)
- .ifPresent(allowedToBeDown -> metric.set("allowedToBeDown", allowedToBeDown, context));
+ .ifPresent(info -> {
+ int suspended = info.status().isSuspended() ? 1 : 0;
+ metric.set("suspended", suspended, context);
+ metric.set("allowedToBeDown", suspended, context); // remove summer 2020.
+
+ info.suspendedSince().ifPresent(suspendedSince -> {
+ Duration duration = Duration.between(suspendedSince, clock.instant());
+ metric.set("suspendedSeconds", duration.getSeconds(), context);
+ });
+ });
long numberOfServices;
HostName hostName = new HostName(node.hostname());
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
index 10aff833584..6ca8b78b48d 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRepositoryMaintenance.java
@@ -76,7 +76,7 @@ public class NodeRepositoryMaintenance extends AbstractComponent {
dirtyExpirer = new DirtyExpirer(nodeRepository, clock, defaults.dirtyExpiry);
provisionedExpirer = new ProvisionedExpirer(nodeRepository, clock, defaults.provisionedExpiry);
nodeRebooter = new NodeRebooter(nodeRepository, clock, flagSource);
- metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval);
+ metricsReporter = new MetricsReporter(nodeRepository, metric, orchestrator, serviceMonitor, periodicApplicationMaintainer::pendingDeployments, defaults.metricsInterval, null);
infrastructureProvisioner = new InfrastructureProvisioner(nodeRepository, infraDeployer, defaults.infrastructureProvisionInterval);
loadBalancerExpirer = provisionServiceProvider.getLoadBalancerService().map(lbService ->
new LoadBalancerExpirer(nodeRepository, defaults.loadBalancerExpirerInterval, lbService));
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
index 3ca884540f1..287842e56f0 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java
@@ -10,6 +10,7 @@ import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.Zone;
import com.yahoo.jdisc.Metric;
+import com.yahoo.test.ManualClock;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.mock.MockCurator;
import com.yahoo.vespa.hosted.provision.LockedNodeList;
@@ -30,6 +31,7 @@ import org.junit.Test;
import java.time.Clock;
import java.time.Duration;
+import java.time.Instant;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -83,12 +85,17 @@ public class MetricsReporterTest {
expectedMetrics.put("wantToRetire", 0);
expectedMetrics.put("wantToDeprovision", 0);
expectedMetrics.put("failReport", 0);
- expectedMetrics.put("allowedToBeDown", 0);
+ expectedMetrics.put("allowedToBeDown", 1);
+ expectedMetrics.put("suspended", 1);
+ expectedMetrics.put("suspendedSeconds", 123L);
expectedMetrics.put("numberOfServices", 0L);
+ ManualClock clock = new ManualClock(Instant.ofEpochSecond(124));
Orchestrator orchestrator = mock(Orchestrator.class);
ServiceMonitor serviceMonitor = mock(ServiceMonitor.class);
- when(orchestrator.getNodeStatuses()).thenReturn(hostName -> Optional.of(HostInfo.createNoRemarks()));
+ when(orchestrator.getNodeStatuses()).thenReturn(hostName ->
+ Optional.of(HostInfo.createSuspended(HostStatus.ALLOWED_TO_BE_DOWN, Instant.ofEpochSecond(1)))
+ );
ServiceModel serviceModel = mock(ServiceModel.class);
when(serviceMonitor.getServiceModelSnapshot()).thenReturn(serviceModel);
when(serviceModel.getServiceInstancesByHostName()).thenReturn(Map.of());
@@ -100,8 +107,8 @@ public class MetricsReporterTest {
orchestrator,
serviceMonitor,
() -> 42,
- Duration.ofMinutes(1)
- );
+ Duration.ofMinutes(1),
+ clock);
metricsReporter.maintain();
assertEquals(expectedMetrics, metric.values);
@@ -143,14 +150,15 @@ public class MetricsReporterTest {
when(serviceModel.getServiceInstancesByHostName()).thenReturn(Map.of());
TestMetric metric = new TestMetric();
+ ManualClock clock = new ManualClock();
MetricsReporter metricsReporter = new MetricsReporter(
nodeRepository,
metric,
orchestrator,
serviceMonitor,
() -> 42,
- Duration.ofMinutes(1)
- );
+ Duration.ofMinutes(1),
+ clock);
metricsReporter.maintain();
assertEquals(0, metric.values.get("hostedVespa.readyHosts")); // Only tenants counts