From d74a7a32ec7eed3c65b27a723f9a60f773d53e8e Mon Sep 17 00:00:00 2001 From: Valerij Fredriksen Date: Fri, 11 Sep 2020 11:51:08 +0200 Subject: Retire nodes on overcommited hosts --- .../hosted/provision/maintenance/Rebalancer.java | 2 +- .../maintenance/SpareCapacityMaintainer.java | 52 ++++++++++++++++++--- .../maintenance/SpareCapacityMaintainerTest.java | 53 ++++++++++++++-------- 3 files changed, 82 insertions(+), 25 deletions(-) diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java index 9b9c7df5d0d..b1d3551c1b6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java @@ -63,7 +63,7 @@ public class Rebalancer extends NodeRepositoryMaintainer { metric.set("hostedVespa.docker.skew", totalSkew/hostCount, null); } - private boolean zoneIsStable(NodeList allNodes) { + static boolean zoneIsStable(NodeList allNodes) { NodeList active = allNodes.state(Node.State.active); if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false; if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java index dd2f3dfd6c0..0861473a7c0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainer.java @@ -1,6 +1,8 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.maintenance; +import com.yahoo.config.provision.ApplicationId; +import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Deployer; import com.yahoo.config.provision.NodeResources; import com.yahoo.jdisc.Metric; @@ -10,6 +12,7 @@ import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.maintenance.MaintenanceDeployment.Move; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.provisioning.HostCapacity; +import com.yahoo.vespa.hosted.provision.provisioning.NodeResourceComparator; import java.time.Duration; import java.util.ArrayList; @@ -67,15 +70,12 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { boolean success = true; if ( ! nodeRepository().zone().getCloud().allowHostSharing()) return success; - CapacityChecker capacityChecker = new CapacityChecker(nodeRepository().list()); + NodeList allNodes = nodeRepository().list(); + CapacityChecker capacityChecker = new CapacityChecker(allNodes); List overcommittedHosts = capacityChecker.findOvercommittedHosts(); - if (overcommittedHosts.size() != 0) { - log.log(Level.WARNING, String.format("%d nodes are overcommitted! [ %s ]", - overcommittedHosts.size(), - overcommittedHosts.stream().map(Node::hostname).collect(Collectors.joining(", ")))); - } metric.set("overcommittedHosts", overcommittedHosts.size(), null); + retireOvercommitedHosts(allNodes, overcommittedHosts); Optional failurePath = capacityChecker.worstCaseHostLossLeadingToFailure(); if (failurePath.isPresent()) { @@ -133,6 +133,46 @@ public class SpareCapacityMaintainer extends NodeRepositoryMaintainer { return shortestMitigation; } + private int retireOvercomittedComparator(Node n1, Node n2) { + ClusterSpec.Type t1 = n1.allocation().get().membership().cluster().type(); + ClusterSpec.Type t2 = n2.allocation().get().membership().cluster().type(); + + // Prefer to container nodes for faster retirement + if (t1 == ClusterSpec.Type.container && t2 != ClusterSpec.Type.container) return -1; + if (t1 != ClusterSpec.Type.container && t2 == ClusterSpec.Type.container) return 1; + + return NodeResourceComparator.memoryDiskCpuOrder().compare(n1.resources(), n2.resources()); + } + + private void retireOvercommitedHosts(NodeList allNodes, List overcommittedHosts) { + if (overcommittedHosts.isEmpty()) return; + log.log(Level.WARNING, String.format("%d hosts are overcommitted: %s", + overcommittedHosts.size(), + overcommittedHosts.stream().map(Node::hostname).collect(Collectors.joining(", ")))); + + if (!Rebalancer.zoneIsStable(allNodes)) return; + + // Find an active node on a overcommited host and retire it + Optional nodeToRetire = overcommittedHosts.stream().flatMap(parent -> allNodes.childrenOf(parent).stream()) + .filter(node -> node.state() == Node.State.active) + .min(this::retireOvercomittedComparator); + if (nodeToRetire.isEmpty()) return; + + ApplicationId application = nodeToRetire.get().allocation().get().owner(); + try (MaintenanceDeployment deployment = new MaintenanceDeployment(application, deployer, metric, nodeRepository())) { + if ( ! deployment.isValid()) return; // this will be done at another config server + + Optional nodeWithWantToRetire = nodeRepository().getNode(nodeToRetire.get().hostname()) + .map(node -> node.withWantToRetire(true, Agent.SpareCapacityMaintainer, nodeRepository().clock().instant())); + if (nodeWithWantToRetire.isEmpty()) return; + + nodeRepository().write(nodeWithWantToRetire.get(), deployment.applicationLock().get()); + log.log(Level.INFO, String.format("Redeploying %s to relocate %s from overcommited host", + application, nodeToRetire.get().hostname())); + deployment.activate(); + } + } + private static class CapacitySolver { private final HostCapacity hostCapacity; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java index 6cd206cd5b9..5766a5bed01 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SpareCapacityMaintainerTest.java @@ -200,6 +200,22 @@ public class SpareCapacityMaintainerTest { assertEquals(0, tester.metric.values.get("spareHostCapacity")); } + @Test + public void retireFromOvercommitedHosts() { + var tester = new SpareCapacityMaintainerTester(5); + + tester.addHosts(7, new NodeResources(10, 100, 1000, 1)); + + tester.addNodes(0, 5, new NodeResources( 7, 70, 700, 0.7), 0); + tester.addNodes(1, 4, new NodeResources( 2, 20, 200, 0.2), 0); + tester.addNodes(2, 2, new NodeResources( 1.1, 10, 100, 0.1), 1); + + tester.maintainer.maintain(); + assertEquals(2, tester.metric.values.get("overcommittedHosts")); + assertEquals(1, tester.deployer.redeployments); + assertEquals(List.of(new NodeResources( 1.1, 10, 100, 0.1)), tester.nodeRepository.list().retired().mapToList(Node::resources)); + } + /** Microbenchmark */ @Test @Ignore @@ -267,22 +283,18 @@ public class SpareCapacityMaintainerTest { hosts.add(host); hostIndex++; } - hosts = nodeRepository.addNodes(hosts, Agent.system); - hosts = nodeRepository.setReady(hosts, Agent.system, "Test"); - var transaction = new NestedTransaction(); - nodeRepository.activate(hosts, transaction); - transaction.commit(); + + ApplicationId application = ApplicationId.from("vespa", "tenant-host", "default"); + ClusterSpec clusterSpec = ClusterSpec.specification(ClusterSpec.Type.content, ClusterSpec.Id.from("tenant-host")) + .group(ClusterSpec.Group.from(0)) + .vespaVersion("7") + .build(); + allocate(application, clusterSpec, hosts); } private void addNodes(int id, int count, NodeResources resources, int hostOffset) { List nodes = new ArrayList<>(); - ApplicationId application = ApplicationId.from("tenant" + id, "application" + id, "default"); for (int i = 0; i < count; i++) { - ClusterMembership membership = ClusterMembership.from(ClusterSpec.specification(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster" + id)) - .group(ClusterSpec.Group.from(0)) - .vespaVersion("7") - .build(), - i); Node node = nodeRepository.createNode("node" + nodeIndex, "node" + nodeIndex + ".yahoo.com", ipConfig(hostIndex + nodeIndex, false), @@ -290,18 +302,23 @@ public class SpareCapacityMaintainerTest { new Flavor(resources), Optional.empty(), NodeType.tenant); - node = node.allocate(application, membership, node.resources(), Instant.now()); nodes.add(node); nodeIndex++; } + + ApplicationId application = ApplicationId.from("tenant" + id, "application" + id, "default"); + ClusterSpec clusterSpec = ClusterSpec.specification(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster" + id)) + .group(ClusterSpec.Group.from(0)) + .vespaVersion("7") + .build(); + allocate(application, clusterSpec, nodes); + } + + private void allocate(ApplicationId application, ClusterSpec clusterSpec, List nodes) { nodes = nodeRepository.addNodes(nodes, Agent.system); - for (int i = 0; i < count; i++) { + for (int i = 0; i < nodes.size(); i++) { Node node = nodes.get(i); - ClusterMembership membership = ClusterMembership.from(ClusterSpec.specification(ClusterSpec.Type.content, ClusterSpec.Id.from("cluster" + id)) - .group(ClusterSpec.Group.from(0)) - .vespaVersion("7") - .build(), - i); + ClusterMembership membership = ClusterMembership.from(clusterSpec, i); node = node.allocate(application, membership, node.resources(), Instant.now()); nodes.set(i, node); } -- cgit v1.2.3