summaryrefslogtreecommitdiffstats
path: root/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/CapacityCheckerTest.java
blob: 9d79af804cebfd6ba6f89bccdc3b08b098537d86 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
// Copyright 2020 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;

import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.List;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

/**
 * @author mgimle
 */
public class CapacityCheckerTest {
    private CapacityCheckerTester tester;

    @Before
    public void setup() {
        tester = new CapacityCheckerTester();
    }

    @Test
    public void testWithRealData() throws IOException {
        String path = "./src/test/resources/zookeeper_dump.json";

        tester.cleanRepository();
        tester.restoreNodeRepositoryFromJsonFile(Paths.get(path));
        var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        assertTrue(tester.nodeRepository.getNodes(NodeType.host).containsAll(failurePath.get().hostsCausingFailure));
    }

    @Test
    public void testOvercommittedHosts() {
        tester.createNodes(7, 4,
               10, new NodeResources(-1, 10, 100, 1), 10,
                0, new NodeResources(1, 10, 100, 1), 10);
        int overcommittedHosts = tester.capacityChecker.findOvercommittedHosts().size();
        assertEquals(tester.nodeRepository.getNodes(NodeType.host).size(), overcommittedHosts);
    }

    @Test
    public void testEdgeCaseFailurePaths() {
        tester.createNodes(1, 1,
                0, new NodeResources(1, 10, 100, 1), 10,
                0, new NodeResources(1, 10, 100, 1), 10);
        var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertFalse("Computing worst case host loss with no hosts should return an empty optional.", failurePath.isPresent());

        // Odd edge case that should never be able to occur in prod
        tester.createNodes(1, 10,
                10, new NodeResources(10, 1000, 10000, 1), 100,
                1, new NodeResources(10, 1000, 10000, 1), 100);
        failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        assertTrue("Computing worst case host loss if all hosts have to be removed should result in an non-empty failureReason with empty nodes.",
                failurePath.get().failureReason.tenant.isEmpty() && failurePath.get().failureReason.host.isEmpty());
        assertEquals(tester.nodeRepository.getNodes(NodeType.host).size(), failurePath.get().hostsCausingFailure.size());

        tester.createNodes(3, 30,
                10, new NodeResources(0, 0, 10000, 1), 1000,
                0, new NodeResources(0, 0, 0, 0), 0);
        failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        if (failurePath.get().failureReason.tenant.isPresent()) {
            var failureReasons = failurePath.get().failureReason.allocationFailures;
            assertEquals("When there are multiple lacking resources, all failures are multipleReasonFailures",
                    failureReasons.size(), failureReasons.multipleReasonFailures().size());
            assertEquals(0, failureReasons.singularReasonFailures().size());
        } else fail();
    }

    @Test
    public void testIpFailurePaths() {
        tester.createNodes(1, 10,
                10, new NodeResources(10, 1000, 10000, 1), 1,
                10, new NodeResources(10, 1000, 10000, 1), 1);
        var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        if (failurePath.get().failureReason.tenant.isPresent()) {
            var failureReasons = failurePath.get().failureReason.allocationFailures;
            assertEquals("All failures should be due to hosts having a lack of available ip addresses.",
                    failureReasons.singularReasonFailures().insufficientAvailableIps(), failureReasons.size());
        } else fail();

    }

    @Test
    public void testNodeResourceFailurePaths() {
        tester.createNodes(1, 10,
                10, new NodeResources(1, 100, 1000, 1), 100,
                10, new NodeResources(0, 100, 1000, 1), 100);
        var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        if (failurePath.get().failureReason.tenant.isPresent()) {
            var failureReasons = failurePath.get().failureReason.allocationFailures;
            assertEquals("All failures should be due to hosts lacking cpu cores.",
                    failureReasons.singularReasonFailures().insufficientVcpu(), failureReasons.size());
        } else fail();

        tester.createNodes(1, 10,
                10, new NodeResources(10, 1, 1000, 1), 100,
                10, new NodeResources(10, 0, 1000, 1), 100);
        failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        if (failurePath.get().failureReason.tenant.isPresent()) {
            var failureReasons = failurePath.get().failureReason.allocationFailures;
            assertEquals("All failures should be due to hosts lacking memory.",
                    failureReasons.singularReasonFailures().insufficientMemoryGb(), failureReasons.size());
        } else fail();

        tester.createNodes(1, 10,
                10, new NodeResources(10, 100, 10, 1), 100,
                10, new NodeResources(10, 100, 0, 1), 100);
        failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        if (failurePath.get().failureReason.tenant.isPresent()) {
            var failureReasons = failurePath.get().failureReason.allocationFailures;
            assertEquals("All failures should be due to hosts lacking disk space.",
                    failureReasons.singularReasonFailures().insufficientDiskGb(), failureReasons.size());
        } else fail();

        int emptyHostsWithSlowDisk = 10;
        tester.createNodes(1, 10, List.of(new NodeResources(1, 10, 100, 1)),
                10, new NodeResources(0, 0, 0, 0), 100,
                10, new NodeResources(10, 1000, 10000, 1, NodeResources.DiskSpeed.slow), 100);
        failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        if (failurePath.get().failureReason.tenant.isPresent()) {
            var failureReasons = failurePath.get().failureReason.allocationFailures;
            assertEquals("All empty hosts should be invalid due to having incompatible disk speed.",
                    failureReasons.singularReasonFailures().incompatibleDiskSpeed(), emptyHostsWithSlowDisk);
        } else fail();

    }


    @Test
    public void testParentHostPolicyIntegrityFailurePaths() {
        tester.createNodes(1, 1,
                10, new NodeResources(1, 100, 1000, 1), 100,
                10, new NodeResources(10, 1000, 10000, 1), 100);
        var failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        if (failurePath.get().failureReason.tenant.isPresent()) {
            var failureReasons = failurePath.get().failureReason.allocationFailures;
            assertEquals("With only one type of tenant, all failures should be due to violation of the parent host policy.",
                    failureReasons.singularReasonFailures().violatesParentHostPolicy(), failureReasons.size());
        } else fail();

        tester.createNodes(1, 2,
                10, new NodeResources(10, 100, 1000, 1), 1,
                0, new NodeResources(0, 0, 0, 0), 0);
        failurePath = tester.capacityChecker.worstCaseHostLossLeadingToFailure();
        assertTrue(failurePath.isPresent());
        if (failurePath.get().failureReason.tenant.isPresent()) {
            var failureReasons = failurePath.get().failureReason.allocationFailures;
            assertNotEquals("Fewer distinct children than hosts should result in some parent host policy violations.",
                    failureReasons.size(), failureReasons.singularReasonFailures().violatesParentHostPolicy());
            assertNotEquals(0, failureReasons.singularReasonFailures().violatesParentHostPolicy());
        } else fail();
    }
}