aboutsummaryrefslogtreecommitdiffstats
path: root/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/PeriodicApplicationMaintainerTest.java
blob: 360e8de8d11cf10bce80e05cef78f9ac9b561190 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;

import com.yahoo.component.Version;
import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.ApplicationName;
import com.yahoo.config.provision.Capacity;
import com.yahoo.config.provision.ClusterSpec;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.DockerImage;
import com.yahoo.config.provision.Environment;
import com.yahoo.config.provision.HostSpec;
import com.yahoo.config.provision.InstanceName;
import com.yahoo.config.provision.NodeFlavors;
import com.yahoo.config.provision.NodeType;
import com.yahoo.config.provision.RegionName;
import com.yahoo.config.provision.TenantName;
import com.yahoo.config.provision.Zone;
import com.yahoo.test.ManualClock;
import com.yahoo.transaction.NestedTransaction;
import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.curator.mock.MockCurator;
import com.yahoo.vespa.curator.transaction.CuratorTransaction;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.FlavorConfigBuilder;
import com.yahoo.vespa.hosted.provision.provisioning.NodeRepositoryProvisioner;
import com.yahoo.vespa.hosted.provision.testutils.MockDeployer;
import com.yahoo.vespa.hosted.provision.testutils.MockNameResolver;
import org.junit.Before;
import org.junit.Test;

import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static org.junit.Assert.assertEquals;

/**
 * @author bratseth
 */
public class PeriodicApplicationMaintainerTest {

    private static final NodeFlavors nodeFlavors = FlavorConfigBuilder.createDummies("default");

    private NodeRepository nodeRepository;
    private Fixture fixture;

    @Before
    public void before() {
        Curator curator = new MockCurator();
        Zone zone = new Zone(Environment.prod, RegionName.from("us-east"));
        this.nodeRepository = new NodeRepository(nodeFlavors, curator, new ManualClock(), zone,
                                                 new MockNameResolver().mockAnyLookup(),
                                                 new DockerImage("docker-registry.domain.tld:8080/dist/vespa"),
                                                 true);
        this.fixture = new Fixture(zone, nodeRepository, nodeFlavors, curator);
    }

    @Test
    public void test_application_maintenance() {
        createReadyNodes(15, nodeRepository, nodeFlavors);
        createHostNodes(2, nodeRepository, nodeFlavors);

        // Create applications
        fixture.activate();

        // Fail and park some nodes
        nodeRepository.fail(nodeRepository.getNodes(fixture.app1).get(3).hostname(), Agent.system, "Failing to unit test");
        nodeRepository.fail(nodeRepository.getNodes(fixture.app2).get(0).hostname(), Agent.system, "Failing to unit test");
        nodeRepository.park(nodeRepository.getNodes(fixture.app2).get(4).hostname(), Agent.system, "Parking to unit test");
        int failedInApp1 = 1;
        int failedOrParkedInApp2 = 2;
        assertEquals(fixture.wantedNodesApp1 - failedInApp1, nodeRepository.getNodes(fixture.app1, Node.State.active).size());
        assertEquals(fixture.wantedNodesApp2 - failedOrParkedInApp2, nodeRepository.getNodes(fixture.app2, Node.State.active).size());
        assertEquals(failedInApp1 + failedOrParkedInApp2, nodeRepository.getNodes(NodeType.tenant, Node.State.failed, Node.State.parked).size());
        assertEquals(3, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());
        assertEquals(2, nodeRepository.getNodes(NodeType.host, Node.State.ready).size());

        // Cause maintenance deployment which will allocate replacement nodes
        fixture.runApplicationMaintainer();
        assertEquals(fixture.wantedNodesApp1, nodeRepository.getNodes(fixture.app1, Node.State.active).size());
        assertEquals(fixture.wantedNodesApp2, nodeRepository.getNodes(fixture.app2, Node.State.active).size());
        assertEquals(0, nodeRepository.getNodes(NodeType.tenant, Node.State.ready).size());

        // Reactivate the previously failed nodes
        nodeRepository.reactivate(nodeRepository.getNodes(NodeType.tenant, Node.State.failed).get(0).hostname(), Agent.system, getClass().getSimpleName());
        nodeRepository.reactivate(nodeRepository.getNodes(NodeType.tenant, Node.State.failed).get(0).hostname(), Agent.system, getClass().getSimpleName());
        nodeRepository.reactivate(nodeRepository.getNodes(NodeType.tenant, Node.State.parked).get(0).hostname(), Agent.system, getClass().getSimpleName());
        int reactivatedInApp1 = 1;
        int reactivatedInApp2 = 2;
        assertEquals(0, nodeRepository.getNodes(NodeType.tenant, Node.State.failed).size());
        assertEquals(fixture.wantedNodesApp1 + reactivatedInApp1, nodeRepository.getNodes(fixture.app1, Node.State.active).size());
        assertEquals(fixture.wantedNodesApp2 + reactivatedInApp2, nodeRepository.getNodes(fixture.app2, Node.State.active).size());
        assertEquals("The reactivated nodes are now active but not part of the application",
                     0, fixture.getNodes(Node.State.active).retired().size());

        // Cause maintenance deployment which will update the applications with the re-activated nodes
        ((ManualClock)nodeRepository.clock()).advance(Duration.ofMinutes(35)); // Otherwise redeploys are inhibited
        fixture.runApplicationMaintainer();
        assertEquals("Superflous content nodes are retired",
                     reactivatedInApp2, fixture.getNodes(Node.State.active).retired().size());
        assertEquals("Superflous container nodes are deactivated (this makes little point for container nodes)",
                     reactivatedInApp1, fixture.getNodes(Node.State.inactive).size());
    }

    @Test
    public void deleted_application_is_not_reactivated() {
        createReadyNodes(15, nodeRepository, nodeFlavors);
        createHostNodes(2, nodeRepository, nodeFlavors);

        // Create applications
        fixture.activate();

        // Freeze active nodes to simulate an application being deleted during a maintenance run
        List<Node> frozenActiveNodes = nodeRepository.getNodes(Node.State.active);

        // Remove one application without letting the application maintainer know about it
        fixture.remove(fixture.app2);
        assertEquals(fixture.wantedNodesApp2, nodeRepository.getNodes(fixture.app2, Node.State.inactive).size());

        // Nodes belonging to app2 are inactive after maintenance
        fixture.runApplicationMaintainer(Optional.of(frozenActiveNodes));
        assertEquals("Inactive nodes were incorrectly activated after maintenance", fixture.wantedNodesApp2,
                     nodeRepository.getNodes(fixture.app2, Node.State.inactive).size());
    }

    @Test
    public void application_deploy_inhibits_redeploy_for_a_while() {
        ManualClock clock = (ManualClock)nodeRepository.clock();
        createReadyNodes(15, nodeRepository, nodeFlavors);
        createHostNodes(2, nodeRepository, nodeFlavors);

        // Create applications
        fixture.activate();
        fixture.runApplicationMaintainer();
        Instant firstDeployTime = clock.instant();
        assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app1).get());
        assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app2).get());
        ((ManualClock) nodeRepository.clock()).advance(Duration.ofMinutes(5));
        fixture.runApplicationMaintainer();
        // Too soo: Not redeployed:
        assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app1).get());
        assertEquals(firstDeployTime, fixture.deployer.lastDeployTime(fixture.app2).get());

        ((ManualClock) nodeRepository.clock()).advance(Duration.ofMinutes(30));
        fixture.runApplicationMaintainer();
        // Redeployed:
        assertEquals(clock.instant(), fixture.deployer.lastDeployTime(fixture.app1).get());
        assertEquals(clock.instant(), fixture.deployer.lastDeployTime(fixture.app2).get());
    }

    private void createReadyNodes(int count, NodeRepository nodeRepository, NodeFlavors nodeFlavors) {
        List<Node> nodes = new ArrayList<>(count);
        for (int i = 0; i < count; i++)
            nodes.add(nodeRepository.createNode("node" + i, "host" + i, Optional.empty(), nodeFlavors.getFlavorOrThrow("default"), NodeType.tenant));
        nodes = nodeRepository.addNodes(nodes);
        nodes = nodeRepository.setDirty(nodes, Agent.system, getClass().getSimpleName());
        nodeRepository.setReady(nodes, Agent.system, getClass().getSimpleName());
    }

    private void createHostNodes(int count, NodeRepository nodeRepository, NodeFlavors nodeFlavors) {
        List<Node> nodes = new ArrayList<>(count);
        for (int i = 0; i < count; i++)
            nodes.add(nodeRepository.createNode("hostNode" + i, "realHost" + i, Optional.empty(), nodeFlavors.getFlavorOrThrow("default"), NodeType.host));
        nodes = nodeRepository.addNodes(nodes);
        nodes = nodeRepository.setDirty(nodes, Agent.system, getClass().getSimpleName());
        nodeRepository.setReady(nodes, Agent.system, getClass().getSimpleName());
    }

    private class Fixture {

        final NodeRepository nodeRepository;
        final NodeRepositoryProvisioner provisioner;
        final Curator curator;
        final Deployer deployer;

        final ApplicationId app1 = ApplicationId.from(TenantName.from("foo1"), ApplicationName.from("bar"), InstanceName.from("fuz"));
        final ApplicationId app2 = ApplicationId.from(TenantName.from("foo2"), ApplicationName.from("bar"), InstanceName.from("fuz"));
        final ClusterSpec clusterApp1 = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test"), Version.fromString("6.42"), false);
        final ClusterSpec clusterApp2 = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test"), Version.fromString("6.42"), false);
        final int wantedNodesApp1 = 5;
        final int wantedNodesApp2 = 7;

        Fixture(Zone zone, NodeRepository nodeRepository, NodeFlavors flavors, Curator curator) {
            this.nodeRepository = nodeRepository;
            this.curator = curator;
            this.provisioner =  new NodeRepositoryProvisioner(nodeRepository, flavors, zone);

            Map<ApplicationId, MockDeployer.ApplicationContext> apps = new HashMap<>();
            apps.put(app1, new MockDeployer.ApplicationContext(app1, clusterApp1,
                                                               Capacity.fromNodeCount(wantedNodesApp1, Optional.of("default"), false, true), 1));
            apps.put(app2, new MockDeployer.ApplicationContext(app2, clusterApp2,
                                                               Capacity.fromNodeCount(wantedNodesApp2, Optional.of("default"), false, true), 1));
            this.deployer = new MockDeployer(provisioner, nodeRepository.clock(), apps);
        }

        void activate() {
            activate(app1, clusterApp1, wantedNodesApp1, provisioner);
            activate(app2, clusterApp2, wantedNodesApp2, provisioner);
            assertEquals(wantedNodesApp1, nodeRepository.getNodes(app1, Node.State.active).size());
            assertEquals(wantedNodesApp2, nodeRepository.getNodes(app2, Node.State.active).size());
        }

        private void activate(ApplicationId applicationId, ClusterSpec cluster, int nodeCount, NodeRepositoryProvisioner provisioner) {
            List<HostSpec> hosts = provisioner.prepare(applicationId, cluster, Capacity.fromNodeCount(nodeCount), 1, null);
            NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator));
            provisioner.activate(transaction, applicationId, hosts);
            transaction.commit();
        }

        void remove(ApplicationId application) {
            NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(curator));
            provisioner.remove(transaction, application);
            transaction.commit();
        }

        void runApplicationMaintainer() {
            runApplicationMaintainer(Optional.empty());
        }

        void runApplicationMaintainer(Optional<List<Node>> overriddenNodesNeedingMaintenance) {
            new TestablePeriodicApplicationMaintainer(deployer, nodeRepository, Duration.ofMinutes(30), overriddenNodesNeedingMaintenance).run();
        }

        NodeList getNodes(Node.State ... states) {
            return new NodeList(nodeRepository.getNodes(NodeType.tenant, states));
        }

    }
    
    public static class TestablePeriodicApplicationMaintainer extends PeriodicApplicationMaintainer {

        private Optional<List<Node>> overriddenNodesNeedingMaintenance;
        
        TestablePeriodicApplicationMaintainer(Deployer deployer, NodeRepository nodeRepository, Duration interval,
                                              Optional<List<Node>> overriddenNodesNeedingMaintenance) {
            super(deployer, nodeRepository, interval, new JobControl(nodeRepository.database()));
            this.overriddenNodesNeedingMaintenance = overriddenNodesNeedingMaintenance;
        }

        @Override
        protected void deploy(ApplicationId application) {
            deployWithLock(application);
        }

        protected void throttle(int applicationCount) { }

        @Override
        protected List<Node> nodesNeedingMaintenance() {
            if (overriddenNodesNeedingMaintenance.isPresent())
                return overriddenNodesNeedingMaintenance.get();
            return super.nodesNeedingMaintenance();
        }

    }

}