aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2020-05-18 12:07:23 +0200
committerHarald Musum <musum@verizonmedia.com>2020-05-18 12:07:23 +0200
commit842397db112010171b0be7e0d14498f32c341eeb (patch)
tree61c0965a4f9003546cc1a236635b6a39c7f11a6d /node-repository
parent33cc8d8a84b90139098aa21703c82bd012a58643 (diff)
Skip rebalancing of app if it has been recently deployed.
Wait some time after deployment with doing rebalancing of node resources.
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java14
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java86
2 files changed, 74 insertions, 26 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
index 7ffb541be2a..c7356f33d6b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
@@ -13,7 +13,6 @@ import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.DockerHostCapacity;
import com.yahoo.vespa.hosted.provision.provisioning.HostProvisioner;
-import com.yahoo.vespa.hosted.provision.provisioning.HostResourcesCalculator;
import java.time.Clock;
import java.time.Duration;
@@ -23,6 +22,7 @@ import java.util.Optional;
* @author bratseth
*/
public class Rebalancer extends NodeRepositoryMaintainer {
+ static final Duration waitTimeAfterPreviousDeployment = Duration.ofMinutes(30);
private final Deployer deployer;
private final Optional<HostProvisioner> hostProvisioner;
@@ -87,7 +87,9 @@ public class Rebalancer extends NodeRepositoryMaintainer {
Move bestMove = Move.none;
for (Node node : allNodes.nodeType(NodeType.tenant).state(Node.State.active)) {
if (node.parentHostname().isEmpty()) continue;
- if (node.allocation().get().owner().instance().isTester()) continue;
+ ApplicationId applicationId = node.allocation().get().owner();
+ if (applicationId.instance().isTester()) continue;
+ if (deployedRecently(applicationId)) continue;
for (Node toHost : allNodes.filter(nodeRepository()::canAllocateTenantNodeTo)) {
if (toHost.hostname().equals(node.parentHostname().get())) continue;
if ( ! capacity.freeCapacityOf(toHost).satisfies(node.flavor().resources())) continue;
@@ -169,6 +171,14 @@ public class Rebalancer extends NodeRepositoryMaintainer {
return skewBefore - skewAfter;
}
+ protected boolean deployedRecently(ApplicationId application) {
+ return deployer.lastDeployTime(application)
+ .map(lastDeployTime -> lastDeployTime.isAfter(clock.instant().minus(waitTimeAfterPreviousDeployment)))
+ // We only know last deploy time for applications that were deployed on this config server,
+ // the rest will be deployed on another config server
+ .orElse(true);
+ }
+
private static class Move {
static final Move none = new Move(null, null, 0);
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
index a61496e8aa2..9e384dc162b 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/RebalancerTest.java
@@ -7,7 +7,6 @@ import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.Flavor;
-import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
@@ -19,14 +18,15 @@ import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
+import org.junit.Before;
import org.junit.Test;
import java.time.Duration;
-import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import static com.yahoo.vespa.hosted.provision.maintenance.Rebalancer.waitTimeAfterPreviousDeployment;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@@ -35,36 +35,45 @@ import static org.junit.Assert.assertTrue;
* @author bratseth
*/
public class RebalancerTest {
-
- @Test
- public void testRebalancing() {
+ ApplicationId cpuApp;
+ ApplicationId memApp;
+ NodeResources cpuResources;
+ NodeResources memResources;
+ TestMetric metric = new TestMetric();
+ ProvisioningTester tester;
+ MockDeployer deployer;
+ private Rebalancer rebalancer;
+
+ @Before
+ public void setup() {
// --- Setup
- ApplicationId cpuApp = makeApplicationId("t1", "a1");
- ApplicationId memApp = makeApplicationId("t2", "a2");
- NodeResources cpuResources = new NodeResources(8, 4, 10, 0.1);
- NodeResources memResources = new NodeResources(4, 9, 10, 0.1);
+ cpuApp = makeApplicationId("t1", "a1");
+ memApp = makeApplicationId("t2", "a2");
+ cpuResources = new NodeResources(8, 4, 10, 0.1);
+ memResources = new NodeResources(4, 9, 10, 0.1);
- ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.perf, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
- TestMetric metric = new TestMetric();
+ tester = new ProvisioningTester.Builder().zone(new Zone(Environment.perf, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build();
Map<ApplicationId, MockDeployer.ApplicationContext> apps = Map.of(
cpuApp, new MockDeployer.ApplicationContext(cpuApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, cpuResources))),
memApp, new MockDeployer.ApplicationContext(memApp, clusterSpec("c"), Capacity.from(new ClusterResources(1, 1, memResources))));
- MockDeployer deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps);
-
- Rebalancer rebalancer = new Rebalancer(deployer,
- tester.nodeRepository(),
- Optional.empty(),
- metric,
- tester.clock(),
- Duration.ofMinutes(1));
+ deployer = new MockDeployer(tester.provisioner(), tester.clock(), apps);
+ rebalancer = new Rebalancer(deployer,
+ tester.nodeRepository(),
+ Optional.empty(),
+ metric,
+ tester.clock(),
+ Duration.ofMinutes(1));
tester.makeReadyNodes(3, "flt", NodeType.host, 8);
tester.deployZoneApp();
+ }
+ @Test
+ public void testRebalancing() {
// --- Deploying a cpu heavy application - causing 1 of these nodes to be skewed
- deployApp(cpuApp, clusterSpec("c"), cpuResources, tester, 1);
+ deployApp(cpuApp);
Node cpuSkewedNode = tester.nodeRepository().getNodes(cpuApp).get(0);
rebalancer.maintain();
@@ -85,7 +94,7 @@ public class RebalancerTest {
0.00244, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001);
// --- Deploying a mem heavy application - allocated to the best option and causing increased skew
- deployApp(memApp, clusterSpec("c"), memResources, tester, 1);
+ deployApp(memApp);
assertEquals("Assigned to a flat node as that causes least skew", "flt",
tester.nodeRepository().list().parentOf(tester.nodeRepository().getNodes(memApp).get(0)).get().flavor().name());
rebalancer.maintain();
@@ -126,6 +135,33 @@ public class RebalancerTest {
0.00587, metric.values.get("hostedVespa.docker.skew").doubleValue(), 0.00001);
}
+
+ @Test
+ public void testNoRebalancingIfRecentlyDeployed() {
+ // --- Deploying a cpu heavy application - causing 1 of these nodes to be skewed
+ deployApp(cpuApp);
+ Node cpuSkewedNode = tester.nodeRepository().getNodes(cpuApp).get(0);
+ rebalancer.maintain();
+ assertFalse("No better place to move the skewed node, so no action is taken",
+ tester.nodeRepository().getNode(cpuSkewedNode.hostname()).get().status().wantToRetire());
+
+ // --- Making a more suitable node configuration available causes rebalancing
+ Node newCpuHost = tester.makeReadyNodes(1, "cpu", NodeType.host, 8).get(0);
+ tester.deployZoneApp();
+
+ deployApp(cpuApp, false /* skip advancing clock after deployment */);
+ rebalancer.maintain();
+ assertFalse("No action, since app was recently deployed",
+ tester.nodeRepository().getNode(cpuSkewedNode.hostname()).get().allocation().get().membership().retired());
+
+ tester.clock().advance(waitTimeAfterPreviousDeployment);
+ rebalancer.maintain();
+ assertTrue("Rebalancer retired the node we wanted to move away from",
+ tester.nodeRepository().getNode(cpuSkewedNode.hostname()).get().allocation().get().membership().retired());
+ assertTrue("... and added a node on the new host instead",
+ tester.nodeRepository().getNodes(cpuApp, Node.State.active).stream().anyMatch(node -> node.hasParent(newCpuHost.hostname())));
+ }
+
private ClusterSpec clusterSpec(String clusterId) {
return ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from(clusterId)).vespaVersion("6.42").build();
}
@@ -134,9 +170,11 @@ public class RebalancerTest {
return ApplicationId.from(tenant, appName, "default");
}
- private void deployApp(ApplicationId id, ClusterSpec spec, NodeResources flavor, ProvisioningTester tester, int nodeCount) {
- List<HostSpec> hostSpec = tester.prepare(id, spec, nodeCount, 1, flavor);
- tester.activate(id, new HashSet<>(hostSpec));
+ private void deployApp(ApplicationId id) { deployApp(id, true); }
+
+ private void deployApp(ApplicationId id, boolean advanceClock) {
+ deployer.deployFromLocalActive(id).get().activate();
+ if (advanceClock) tester.clock().advance(waitTimeAfterPreviousDeployment);
}
private FlavorsConfig flavorsConfig() {