summaryrefslogtreecommitdiffstats
path: root/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/SwitchRebalancerTest.java
blob: 8675b55a27a039c1951cfa6416fb0ea58eb7c338 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;

import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterResources;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.Zone;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.provisioning.ProvisioningTester;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ApplicationContext;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer.ClusterContext;
import org.junit.Test;

import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.junit.Assert.assertEquals;

/**
 * @author mpolden
 */
public class SwitchRebalancerTest {

    private static final ApplicationId app = ApplicationId.from("t1", "a1", "i1");

    @Test
    public void rebalance() {
        ClusterSpec.Id cluster1 = ClusterSpec.Id.from("c1");
        ClusterSpec.Id cluster2 = ClusterSpec.Id.from("c2");
        ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).build();
        MockDeployer deployer = deployer(tester, cluster1, cluster2);
        SwitchRebalancer rebalancer = new SwitchRebalancer(tester.nodeRepository(), Duration.ofDays(1), new TestMetric(), deployer);

        // Provision initial hosts on same switch
        NodeResources hostResources = new NodeResources(48, 128, 500, 10);
        List<Node> hosts0 = tester.makeReadyNodes(3, hostResources, NodeType.host, 5);
        tester.activateTenantHosts();
        String switch0 = "switch0";
        tester.patchNodes(hosts0, (host) -> host.withSwitchHostname(switch0));

        // Deploy application
        deployer.deployFromLocalActive(app).get().activate();
        tester.assertSwitches(Set.of(switch0), app, cluster1);
        tester.assertSwitches(Set.of(switch0), app, cluster2);

        // Rebalancing does nothing as there are no better moves to perform
        tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment);
        assertNoMoves(rebalancer, tester);

        // Provision hosts on distinct switches
        List<Node> hosts1 = tester.makeReadyNodes(3, hostResources, NodeType.host, 5);
        tester.activateTenantHosts();
        for (int i = 0; i < hosts1.size(); i++) {
            String switchHostname = "switch" + (i + 1);
            tester.patchNode(hosts1.get(i), (host) -> host.withSwitchHostname(switchHostname));
        }

        // Application is redeployed
        deployer.deployFromLocalActive(app).get().activate();

        // Rebalancer does nothing as not enough time has passed since previous deployment
        assertNoMoves(rebalancer, tester);

        // Rebalancer retires one node from non-exclusive switch in each cluster, and allocates a new one
        for (var cluster : List.of(cluster1, cluster2)) {
            tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment);
            rebalancer.maintain();
            NodeList allNodes = tester.nodeRepository().list();
            NodeList clusterNodes = allNodes.owner(app).cluster(cluster).state(Node.State.active);
            assertEquals("Node is retired in " + cluster, 1, clusterNodes.retired().size());
            assertEquals("Cluster " + cluster + " allocates nodes on distinct switches", 2,
                         tester.switchesOf(clusterNodes, allNodes).size());

            // Retired node becomes inactive and makes zone stable
            try (var lock = tester.provisioner().lock(app)) {
                NestedTransaction removeTransaction = new NestedTransaction();
                tester.nodeRepository().deactivate(clusterNodes.retired().asList(), removeTransaction, lock);
                removeTransaction.commit();
            }
        }

        // Next run does nothing
        tester.clock().advance(SwitchRebalancer.waitTimeAfterPreviousDeployment);
        assertNoMoves(rebalancer, tester);
    }

    private void assertNoMoves(SwitchRebalancer rebalancer, ProvisioningTester tester) {
        NodeList nodes0 = tester.nodeRepository().list(Node.State.active).owner(app);
        rebalancer.maintain();
        NodeList nodes1 = tester.nodeRepository().list(Node.State.active).owner(app);
        assertEquals("Node allocation is unchanged", nodes0.asList(), nodes1.asList());
        assertEquals("No nodes are retired", List.of(), nodes1.retired().asList());
    }

    private static MockDeployer deployer(ProvisioningTester tester, ClusterSpec.Id cluster1, ClusterSpec.Id cluster2) {
        NodeResources resources = new NodeResources(2, 4, 50, 1);
        Capacity capacity = Capacity.from(new ClusterResources(2, 1, resources));
        ClusterSpec spec1 = ClusterSpec.request(ClusterSpec.Type.container, cluster1).vespaVersion("1").build();
        ClusterSpec spec2 = ClusterSpec.request(ClusterSpec.Type.content, cluster2).vespaVersion("1").build();
        List<ClusterContext> clusterContexts = List.of(new ClusterContext(app, spec1, capacity),
                                                       new ClusterContext(app, spec2, capacity));
        ApplicationContext context = new ApplicationContext(app, clusterContexts);
        return new MockDeployer(tester.provisioner(), tester.clock(), Map.of(app, context));
    }

}