diff options
Diffstat (limited to 'node-repository')
4 files changed, 87 insertions, 29 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index 3b846351b36..15913fec5ed 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -21,11 +21,13 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.ClusterId; +import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.persistence.CacheStats; import com.yahoo.vespa.service.monitor.ServiceModel; import com.yahoo.vespa.service.monitor.ServiceMonitor; import java.time.Duration; +import java.time.Instant; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -64,7 +66,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { @Override public double maintain() { // Sort by hostname to get deterministic metric reporting order (and hopefully avoid changes - // to metric reporting time so we get double reporting or no reporting within a minute) + // to metric reporting time, so we get double reporting or no reporting within a minute) NodeList nodes = nodeRepository().nodes().list().sortedBy(Comparator.comparing(Node::hostname)); ServiceModel serviceModel = serviceMonitor.getServiceModelSnapshot(); @@ -79,6 +81,7 @@ public class MetricsReporter extends NodeRepositoryMaintainer { updateRepairTicketMetrics(nodes); updateAllocationMetrics(nodes); updateClusterMetrics(nodes); + updateEmptyExclusiveHosts(nodes); return 1.0; } @@ -386,6 +389,19 @@ public class MetricsReporter extends NodeRepositoryMaintainer { .forEach((status, number) -> metric.set(ConfigServerMetrics.HOSTED_VESPA_BREAKFIXED_HOSTS.baseName(), number, getContext(Map.of("status", status)))); } + private void updateEmptyExclusiveHosts(NodeList nodes) { + Instant now = nodeRepository().clock().instant(); + Duration minActivePeriod = Duration.ofMinutes(10); + int emptyHosts = nodes.parents().state(State.active) + .matching(node -> (node.type() != NodeType.host && node.type().isHost()) || + node.exclusiveToApplicationId().isPresent()) + .matching(host -> host.history().hasEventBefore(History.Event.Type.activated, + now.minus(minActivePeriod))) + .matching(host -> nodes.childrenOf(host).state(State.active).isEmpty()) + .size(); + metric.set(ConfigServerMetrics.NODES_EMPTY_EXCLUSIVE.baseName(), emptyHosts, null); + } + static Map<String, String> dimensions(ApplicationId application, ClusterSpec.Id cluster) { Map<String, String> dimensions = new HashMap<>(dimensions(application)); dimensions.put("clusterid", cluster.value()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index a16290361fb..585a7f341b5 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -195,39 +195,48 @@ public class NodeFailer extends NodeRepositoryMaintainer { /** * Called when a node should be moved to the failed state: Do that if it seems safe, * which is when the node repo has available capacity to replace the node (and all its tenant nodes if host). - * Otherwise not replacing the node ensures (by Orchestrator check) that no further action will be taken. + * Otherwise, not replacing the node ensures (by Orchestrator check) that no further action will be taken. */ private void failActive(FailingNode failing) { Optional<Deployment> deployment = deployer.deployFromLocalActive(failing.node().allocation().get().owner(), Duration.ofMinutes(5)); if (deployment.isEmpty()) return; + boolean redeploy = false; // If the active node that we are trying to fail is of type host, we need to successfully fail all // the children nodes running on it before we fail the host. Failing a child node in a dynamically // provisioned zone may require provisioning new hosts that require the host application lock to be held, // so we must release ours before failing the children. - List<FailingNode> activeChildrenToFail = new ArrayList<>(); - boolean redeploy = false; - try (NodeMutex lock = nodeRepository().nodes().lockAndGetRequired(failing.node())) { // TODO: recursive lock for right order, only for hosts though - // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail - if (!Objects.equals(failing.node().allocation().map(Allocation::owner), lock.node().allocation().map(Allocation::owner))) - return; - if (lock.node().state() == Node.State.failed) - return; - if (!Objects.equals(failing.node().state(), lock.node().state())) - return; - failing = new FailingNode(lock.node(), failing.reason); - - String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason(); - for (Node failingTenantNode : nodeRepository().nodes().list().childrenOf(failing.node())) { - if (failingTenantNode.state() == Node.State.active) { - activeChildrenToFail.add(new FailingNode(failingTenantNode, reasonForChildFailure)); - } else if (failingTenantNode.state() != Node.State.failed) { - nodeRepository().nodes().fail(failingTenantNode.hostname(), Agent.NodeFailer, reasonForChildFailure); + if (failing.node.type().isHost()) { + List<FailingNode> activeChildrenToFail = new ArrayList<>(); + try (var lock = nodeRepository().nodes().lockAndGetRecursively(failing.node.hostname(), Optional.empty())) { + failing = shouldFail(lock.parent().node(), failing); + if (failing == null) return; + + String reasonForChildFailure = "Failing due to parent host " + failing.node().hostname() + " failure: " + failing.reason(); + for (var failingTenantNode : lock.children()) { + if (failingTenantNode.node().state() == Node.State.active) { + activeChildrenToFail.add(new FailingNode(failingTenantNode.node(), reasonForChildFailure)); + } else if (failingTenantNode.node().state() != Node.State.failed) { + nodeRepository().nodes().fail(failingTenantNode.node().hostname(), Agent.NodeFailer, reasonForChildFailure); + } + } + + if (activeChildrenToFail.isEmpty()) { + log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason); + markWantToFail(failing.node(), true, lock.parent()); + redeploy = true; } } + // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned, + // so failActive() may take a long time to complete, but the remaining children should be fast. + activeChildrenToFail.forEach(this::failActive); + } + else { + try (var lock = nodeRepository().nodes().lockAndGetRequired(failing.node)) { + failing = shouldFail(lock.node(), failing); + if (failing == null) return; - if (activeChildrenToFail.isEmpty()) { log.log(Level.INFO, "Failing out " + failing.node + ": " + failing.reason); markWantToFail(failing.node(), true, lock); redeploy = true; @@ -237,13 +246,19 @@ public class NodeFailer extends NodeRepositoryMaintainer { // Redeploy to replace failing node if (redeploy) { redeploy(deployment.get(), failing); - return; } + } - // In a dynamically provisioned zone the failing of the first child may require a new host to be provisioned, - // so failActive() may take a long time to complete, but the remaining children should be fast. - activeChildrenToFail.forEach(this::failActive); - + // Returns an updated FailingNode if we should still fail the node, otherwise null + private static FailingNode shouldFail(Node fresh, FailingNode stale) { + // Now that we have gotten the node object under the proper lock, sanity-check it still makes sense to fail + if (!Objects.equals(stale.node.allocation().map(Allocation::owner), fresh.allocation().map(Allocation::owner))) + return null; + if (fresh.state() == Node.State.failed) + return null; + if (!Objects.equals(stale.node.state(), fresh.state())) + return null; + return new FailingNode(fresh, stale.reason); } private void redeploy(Deployment deployment, FailingNode failing) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index bf046c09899..1ae9b00d794 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -72,7 +72,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { } boolean redeploy = false; List<String> nodesToDeactivate = new ArrayList<>(); - try (var lock = nodeRepository().applications().lock(application)) { // TODO: take recusrive lock for right order + try (var lock = nodeRepository().applications().lock(application)) { NodeList activeNodes = nodeRepository().nodes().list(Node.State.active); Map<Removal, NodeList> nodesByRemovalReason = activeNodes.owner(application) .retired() diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java index 2637666e643..3091f82143d 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporterTest.java @@ -26,7 +26,6 @@ import com.yahoo.vespa.hosted.provision.autoscale.Autoscaling; import com.yahoo.vespa.hosted.provision.autoscale.Load; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.Allocation; -import com.yahoo.vespa.hosted.provision.node.ClusterId; import com.yahoo.vespa.hosted.provision.node.Generation; import com.yahoo.vespa.hosted.provision.node.IP; import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder; @@ -140,6 +139,7 @@ public class MetricsReporterTest { expectedMetrics.put("cache.curator.hitRate", 3D/5D); expectedMetrics.put("cache.curator.evictionCount", 0L); expectedMetrics.put("cache.curator.size", 2L); + expectedMetrics.put("nodes.emptyExclusive", 0); nodeRepository.nodes().list(); tester.clock().setInstant(Instant.ofEpochSecond(124)); @@ -278,7 +278,6 @@ public class MetricsReporterTest { assertEquals(4, getMetric("nodes.active", metric, dimensions)); assertEquals(0, getMetric("nodes.nonActive", metric, dimensions)); - Map<String, String> clusterDimensions = Map.of("applicationId", applicationId.toFullString(), "clusterid", clusterSpec.id().value()); assertEquals(1.392, getMetric("cluster.cost", metric, clusterDimensions)); @@ -341,6 +340,34 @@ public class MetricsReporterTest { assertEquals(1D, getMetric("nodes.exclusiveSwitchFraction", metric, MetricsReporter.dimensions(app, spec2.id())).doubleValue(), Double.MIN_VALUE); } + @Test + public void empty_exclusive_hosts() { + ProvisioningTester tester = new ProvisioningTester.Builder().build(); + ApplicationId app = ApplicationId.from("t1", "a1", "default"); + TestMetric metric = new TestMetric(); + MetricsReporter metricsReporter = metricsReporter(metric, tester); + NodeResources resources = new NodeResources(8, 32, 100, 10); + List<Node> hosts = tester.makeReadyNodes(4, resources, NodeType.host, 5); + tester.activateTenantHosts(); + tester.patchNodes(hosts, (host) -> host.withExclusiveToApplicationId(app)); + + // Hosts are not considered empty until enough time passes + metricsReporter.maintain(); + assertEquals(0, metric.values.get("nodes.emptyExclusive").intValue()); + tester.clock().advance(Duration.ofMinutes(10)); + metricsReporter.maintain(); + assertEquals(hosts.size(), metric.values.get("nodes.emptyExclusive").intValue()); + + // Deploy application + ClusterSpec spec = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("c1")).vespaVersion("1").build(); + Capacity capacity = Capacity.from(new ClusterResources(4, 1, resources)); + tester.deploy(app, spec, capacity); + + // Host are now in use + metricsReporter.maintain(); + assertEquals(0, metric.values.get("nodes.emptyExclusive").intValue()); + } + private Number getMetric(String name, TestMetric metric, Map<String, String> dimensions) { List<TestMetric.TestContext> metrics = metric.context.get(name).stream() .filter(ctx -> ctx.properties.entrySet().containsAll(dimensions.entrySet())) |