summaryrefslogtreecommitdiffstats
path: root/node-repository/src
diff options
context:
space:
mode:
Diffstat (limited to 'node-repository/src')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java17
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java12
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/package-info.java8
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java6
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java32
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java2
6 files changed, 50 insertions, 27 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
index 3ac5b496f56..1601b2e3205 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java
@@ -51,10 +51,6 @@ public class MetricsReporter extends Maintainer {
}
private void updateNodeMetrics(Node node) {
- // Dimensions automatically added: host, vespaVersion, zone, role, and colo.
- // 'vespaVersion' is the vespaVersion for the config server and not related
- // to the node we're making metric for now.
-
Metric.Context context;
Optional<Allocation> allocation = node.allocation();
@@ -65,12 +61,13 @@ public class MetricsReporter extends Maintainer {
"hostname", node.hostname(),
"tenantName", applicationId.tenant().value(),
"applicationId", applicationId.serializedForm().replace(':', '.'),
+ "app", toApp(applicationId),
"clustertype", allocation.get().membership().cluster().type().name(),
"clusterid", allocation.get().membership().cluster().id().value());
long wantedRestartGeneration = allocation.get().restartGeneration().wanted();
metric.set("wantedRestartGeneration", wantedRestartGeneration, context);
- long currentRestartGeneration = allocation.get().restartGeneration().wanted();
+ long currentRestartGeneration = allocation.get().restartGeneration().current();
metric.set("currentRestartGeneration", currentRestartGeneration, context);
boolean wantToRestart = currentRestartGeneration < wantedRestartGeneration;
metric.set("wantToRestart", wantToRestart ? 1 : 0, context);
@@ -105,6 +102,12 @@ public class MetricsReporter extends Maintainer {
metric.set("wantToRetire", node.status().wantToRetire() ? 1 : 0, context);
metric.set("wantToDeprovision", node.status().wantToDeprovision() ? 1 : 0, context);
+ metric.set("hardwareFailure",
+ node.status().hardwareFailureDescription().isPresent() ? 1 : 0,
+ context);
+ metric.set("hardwareDivergence",
+ node.status().hardwareDivergence().isPresent() ? 1 : 0,
+ context);
try {
HostStatus status = orchestrator.getNodeStatus(new HostName(node.hostname()));
@@ -117,6 +120,10 @@ public class MetricsReporter extends Maintainer {
// TODO: Also add metric on whether some services are down on node?
}
+ private static String toApp(ApplicationId applicationId) {
+ return applicationId.application().value() + "." + applicationId.instance().value();
+ }
+
/**
* A version 6.163.20 will be returned as a number 163.020. The major
* version can normally be inferred. As long as the micro version stays
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
index d90b558a6eb..1c81d97ddea 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java
@@ -10,6 +10,7 @@ import com.yahoo.transaction.Mutex;
import com.yahoo.vespa.applicationmodel.ApplicationInstance;
import com.yahoo.vespa.applicationmodel.ServiceCluster;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
@@ -18,7 +19,6 @@ import com.yahoo.vespa.orchestrator.ApplicationIdNotFoundException;
import com.yahoo.vespa.orchestrator.Orchestrator;
import com.yahoo.vespa.orchestrator.status.ApplicationInstanceStatus;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
-import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
import java.time.Clock;
import java.time.Duration;
@@ -187,15 +187,15 @@ public class NodeFailer extends Maintainer {
*/
private List<Node> determineActiveNodeDownStatus() {
List<Node> downNodes = new ArrayList<>();
- for (ApplicationInstance<ServiceMonitorStatus> application : serviceMonitor.queryStatusOfAllApplicationInstances().values()) {
- for (ServiceCluster<ServiceMonitorStatus> cluster : application.serviceClusters()) {
- for (ServiceInstance<ServiceMonitorStatus> service : cluster.serviceInstances()) {
+ for (ApplicationInstance application : serviceMonitor.getAllApplicationInstances().values()) {
+ for (ServiceCluster cluster : application.serviceClusters()) {
+ for (ServiceInstance service : cluster.serviceInstances()) {
Optional<Node> node = nodeRepository().getNode(service.hostName().s(), Node.State.active);
if ( ! node.isPresent()) continue; // we also get status from infrastructure nodes, which are not in the repo. TODO: remove when proxy nodes are in node repo everywhere
- if (service.serviceStatus().equals(ServiceMonitorStatus.DOWN))
+ if (service.serviceStatus().equals(ServiceStatus.DOWN))
downNodes.add(recordAsDown(node.get()));
- else if (service.serviceStatus().equals(ServiceMonitorStatus.UP))
+ else if (service.serviceStatus().equals(ServiceStatus.UP))
clearDownRecord(node.get());
// else: we don't know current status; don't take any action until we have positive information
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/package-info.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/package-info.java
new file mode 100644
index 00000000000..5d0a3cc6093
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/package-info.java
@@ -0,0 +1,8 @@
+// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+/**
+ * @author bjorncs
+ */
+@ExportPackage
+package com.yahoo.vespa.hosted.provision.node;
+
+import com.yahoo.osgi.annotation.ExportPackage;
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
index 19882f0a508..46d72974718 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/OrchestratorMock.java
@@ -7,6 +7,7 @@ import com.yahoo.vespa.orchestrator.ApplicationIdNotFoundException;
import com.yahoo.vespa.orchestrator.ApplicationStateChangeDeniedException;
import com.yahoo.vespa.orchestrator.BatchHostNameNotFoundException;
import com.yahoo.vespa.orchestrator.BatchInternalErrorException;
+import com.yahoo.vespa.orchestrator.Host;
import com.yahoo.vespa.orchestrator.HostNameNotFoundException;
import com.yahoo.vespa.orchestrator.OrchestrationException;
import com.yahoo.vespa.orchestrator.Orchestrator;
@@ -29,6 +30,11 @@ public class OrchestratorMock implements Orchestrator {
Set<ApplicationId> suspendedApplications = new HashSet<>();
@Override
+ public Host getHost(HostName hostName) throws HostNameNotFoundException {
+ return null;
+ }
+
+ @Override
public HostStatus getNodeStatus(HostName hostName) throws HostNameNotFoundException {
return null;
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
index e38802234bf..56e5fcafbde 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/testutils/ServiceMonitorStub.java
@@ -11,12 +11,12 @@ import com.yahoo.vespa.applicationmodel.ConfigId;
import com.yahoo.vespa.applicationmodel.HostName;
import com.yahoo.vespa.applicationmodel.ServiceCluster;
import com.yahoo.vespa.applicationmodel.ServiceInstance;
+import com.yahoo.vespa.applicationmodel.ServiceStatus;
import com.yahoo.vespa.applicationmodel.ServiceType;
import com.yahoo.vespa.applicationmodel.TenantId;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.service.monitor.ServiceMonitor;
-import com.yahoo.vespa.service.monitor.ServiceMonitorStatus;
import java.util.Collections;
import java.util.HashMap;
@@ -60,31 +60,31 @@ public class ServiceMonitorStub implements ServiceMonitor {
this.statusIsKnown = statusIsKnown;
}
- private ServiceMonitorStatus getHostStatus(String hostname) {
- if (!statusIsKnown) return ServiceMonitorStatus.NOT_CHECKED;
- if (downHosts.contains(hostname)) return ServiceMonitorStatus.DOWN;
- return ServiceMonitorStatus.UP;
+ private ServiceStatus getHostStatus(String hostname) {
+ if (!statusIsKnown) return ServiceStatus.NOT_CHECKED;
+ if (downHosts.contains(hostname)) return ServiceStatus.DOWN;
+ return ServiceStatus.UP;
}
@Override
- public Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> queryStatusOfAllApplicationInstances() {
+ public Map<ApplicationInstanceReference, ApplicationInstance> getAllApplicationInstances() {
// Convert apps information to the response payload to return
- Map<ApplicationInstanceReference, ApplicationInstance<ServiceMonitorStatus>> status = new HashMap<>();
+ Map<ApplicationInstanceReference, ApplicationInstance> status = new HashMap<>();
for (Map.Entry<ApplicationId, MockDeployer.ApplicationContext> app : apps.entrySet()) {
- Set<ServiceInstance<ServiceMonitorStatus>> serviceInstances = new HashSet<>();
+ Set<ServiceInstance> serviceInstances = new HashSet<>();
for (Node node : nodeRepository.getNodes(app.getValue().id(), Node.State.active)) {
- serviceInstances.add(new ServiceInstance<>(new ConfigId("configid"),
- new HostName(node.hostname()),
- getHostStatus(node.hostname())));
+ serviceInstances.add(new ServiceInstance(new ConfigId("configid"),
+ new HostName(node.hostname()),
+ getHostStatus(node.hostname())));
}
- Set<ServiceCluster<ServiceMonitorStatus>> serviceClusters = new HashSet<>();
- serviceClusters.add(new ServiceCluster<>(new ClusterId(app.getValue().clusterContexts().get(0).cluster().id().value()),
- new ServiceType("serviceType"),
- serviceInstances));
+ Set<ServiceCluster> serviceClusters = new HashSet<>();
+ serviceClusters.add(new ServiceCluster(new ClusterId(app.getValue().clusterContexts().get(0).cluster().id().value()),
+ new ServiceType("serviceType"),
+ serviceInstances));
TenantId tenantId = new TenantId(app.getKey().tenant().value());
ApplicationInstanceId applicationInstanceId = new ApplicationInstanceId(app.getKey().application().value());
status.put(new ApplicationInstanceReference(tenantId, applicationInstanceId),
- new ApplicationInstance<>(tenantId, applicationInstanceId, serviceClusters));
+ new ApplicationInstance(tenantId, applicationInstanceId, serviceClusters));
}
return status;
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
index b40e1f4923c..a3697e57482 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/monitoring/MetricsReporterTest.java
@@ -78,6 +78,8 @@ public class MetricsReporterTest {
expectedMetrics.put("wantToReboot", 0);
expectedMetrics.put("wantToRetire", 0);
expectedMetrics.put("wantToDeprovision", 0);
+ expectedMetrics.put("hardwareFailure", 0);
+ expectedMetrics.put("hardwareDivergence", 0);
expectedMetrics.put("allowedToBeDown", 0);
Orchestrator orchestrator = mock(Orchestrator.class);