aboutsummaryrefslogtreecommitdiffstats
path: root/clustercontroller-core/src/main/java/com/yahoo/vespa/clustercontroller/core/GroupAvailabilityCalculator.java
blob: eab242b78f22dc78d537be225d345f839f5be2c6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.clustercontroller.core;

import com.yahoo.vdslib.distribution.ConfiguredNode;
import com.yahoo.vdslib.distribution.Distribution;
import com.yahoo.vdslib.distribution.Group;
import com.yahoo.vdslib.distribution.GroupVisitor;
import com.yahoo.vdslib.state.ClusterState;
import com.yahoo.vdslib.state.Node;
import com.yahoo.vdslib.state.NodeState;
import com.yahoo.vdslib.state.NodeType;
import com.yahoo.vdslib.state.State;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Stream;

class GroupAvailabilityCalculator {
    private final Distribution distribution;
    private final double minNodeRatioPerGroup;

    private GroupAvailabilityCalculator(Distribution distribution,
                                        double minNodeRatioPerGroup)
    {
        this.distribution = distribution;
        this.minNodeRatioPerGroup = minNodeRatioPerGroup;
    }

    public static class Builder {
        private Distribution distribution;
        private double minNodeRatioPerGroup = 1.0;

        Builder withDistribution(Distribution distribution) {
            this.distribution = distribution;
            return this;
        }
        Builder withMinNodeRatioPerGroup(double minRatio) {
            this.minNodeRatioPerGroup = minRatio;
            return this;
        }
        GroupAvailabilityCalculator build() {
            return new GroupAvailabilityCalculator(distribution, minNodeRatioPerGroup);
        }
    }

    public static Builder builder() {
        return new Builder();
    }

    private class InsufficientAvailabilityGroupVisitor implements GroupVisitor {
        private final Set<Integer> implicitlyDown = new HashSet<>();
        private final ClusterState clusterState;

        public InsufficientAvailabilityGroupVisitor(ClusterState clusterState) {
            this.clusterState = clusterState;
        }

        private boolean nodeIsAvailableInState(final int index, final String states) {
            return clusterState.getNodeState(new Node(NodeType.STORAGE, index)).getState().oneOf(states);
        }

        private Stream<ConfiguredNode> availableNodesIn(Group g) {
            // We consider nodes in states (u)p, (i)nitializing, (m)aintenance as being
            // available from the perspective of taking entire groups down (even though
            // maintenance mode is a half-truth in this regard).
            return g.getNodes().stream().filter(n -> nodeIsAvailableInState(n.index(), "uim"));
        }

        private Stream<ConfiguredNode> candidateNodesForSettingDown(Group g) {
            // We don't implicitly set (m)aintenance nodes down, as these are usually set
            // in maintenance for a good reason (e.g. orchestration or manual reboot).
            // Similarly, we don't take down (r)etired nodes as these may contain data
            // that the rest of the cluster needs.
            return g.getNodes().stream().filter(n -> nodeIsAvailableInState(n.index(), "ui"));
        }

        private double computeGroupAvailability(Group g) {
            // TODO also look at distributors
            final long availableNodes = availableNodesIn(g).count();
            // Model should make it impossible to deploy with zero nodes in a group,
            // so no div by zero risk.
            return availableNodes / (double)g.getNodes().size();
        }

        private void markAllAvailableGroupNodeIndicesAsDown(Group group) {
            candidateNodesForSettingDown(group).forEach(n -> implicitlyDown.add(n.index()));
        }

        @Override
        public boolean visitGroup(Group group) {
            if (group.isLeafGroup()) {
                if (computeGroupAvailability(group) < minNodeRatioPerGroup) {
                    markAllAvailableGroupNodeIndicesAsDown(group);
                }
            }
            return true;
        }

        Set<Integer> implicitlyDownNodeIndices() {
            return implicitlyDown;
        }
    }

    private static boolean isFlatCluster(Group root) {
        return root.isLeafGroup();
    }

    public Set<Integer> nodesThatShouldBeDown(ClusterState state) {
        if (distribution == null) { // FIXME: for tests that don't set distribution properly!
            return Collections.emptySet();
        }
        if (isFlatCluster(distribution.getRootGroup())) {
            // Implicit group takedown only applies to hierarchic cluster setups.
            return new HashSet<>();
        }
        InsufficientAvailabilityGroupVisitor visitor = new InsufficientAvailabilityGroupVisitor(state);
        distribution.visitGroups(visitor);
        return visitor.implicitlyDownNodeIndices();
    }

}