summaryrefslogtreecommitdiffstats
path: root/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Rebalancer.java
blob: b1d3551c1b6643d7da324308453bc063a159f8ff (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.vespa.hosted.provision.maintenance;

import com.yahoo.config.provision.ApplicationId;
import com.yahoo.config.provision.Deployer;
import com.yahoo.config.provision.NodeResources;
import com.yahoo.config.provision.NodeType;
import com.yahoo.jdisc.Metric;
import com.yahoo.vespa.hosted.provision.Node;
import com.yahoo.vespa.hosted.provision.NodeList;
import com.yahoo.vespa.hosted.provision.NodeRepository;
import com.yahoo.vespa.hosted.provision.node.Agent;
import com.yahoo.vespa.hosted.provision.provisioning.HostCapacity;

import java.time.Clock;
import java.time.Duration;

/**
 * @author bratseth
 */
public class Rebalancer extends NodeRepositoryMaintainer {

    static final Duration waitTimeAfterPreviousDeployment = Duration.ofMinutes(10);

    private final Deployer deployer;
    private final Metric metric;
    private final Clock clock;

    public Rebalancer(Deployer deployer,
                      NodeRepository nodeRepository,
                      Metric metric,
                      Clock clock,
                      Duration interval) {
        super(nodeRepository, interval, metric);
        this.deployer = deployer;
        this.metric = metric;
        this.clock = clock;
    }

    @Override
    protected boolean maintain() {
        boolean success = true;
        if ( ! nodeRepository().zone().getCloud().allowHostSharing()) return success; // Rebalancing not necessary
        if (nodeRepository().zone().environment().isTest()) return success; // Short lived deployments; no need to rebalance

        // Work with an unlocked snapshot as this can take a long time and full consistency is not needed
        NodeList allNodes = nodeRepository().list();
        updateSkewMetric(allNodes);
        if ( ! zoneIsStable(allNodes)) return success;
        findBestMove(allNodes).execute(true, Agent.Rebalancer, deployer, metric, nodeRepository());
        return success;
   }

    /** We do this here rather than in MetricsReporter because it is expensive and frequent updates are unnecessary */
    private void updateSkewMetric(NodeList allNodes) {
        HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
        double totalSkew = 0;
        int hostCount = 0;
        for (Node host : allNodes.nodeType((NodeType.host)).state(Node.State.active)) {
            hostCount++;
            totalSkew += Node.skew(host.flavor().resources(), capacity.freeCapacityOf(host));
        }
        metric.set("hostedVespa.docker.skew", totalSkew/hostCount, null);
    }

    static boolean zoneIsStable(NodeList allNodes) {
        NodeList active = allNodes.state(Node.State.active);
        if (active.stream().anyMatch(node -> node.allocation().get().membership().retired())) return false;
        if (active.stream().anyMatch(node -> node.status().wantToRetire())) return false;
        return true;
    }

    /**
     * Find the best move to reduce allocation skew and returns it.
     * Returns Move.none if no moves can be made to reduce skew.
     */
    private Move findBestMove(NodeList allNodes) {
        HostCapacity capacity = new HostCapacity(allNodes, nodeRepository().resourcesCalculator());
        Move bestMove = Move.empty();
        for (Node node : allNodes.nodeType(NodeType.tenant).state(Node.State.active)) {
            if (node.parentHostname().isEmpty()) continue;
            ApplicationId applicationId = node.allocation().get().owner();
            if (applicationId.instance().isTester()) continue;
            if (deployedRecently(applicationId)) continue;
            for (Node toHost : allNodes.matching(nodeRepository()::canAllocateTenantNodeTo)) {
                if (toHost.hostname().equals(node.parentHostname().get())) continue;
                if ( ! capacity.freeCapacityOf(toHost).satisfies(node.resources())) continue;

                double skewReductionAtFromHost = skewReductionByRemoving(node, allNodes.parentOf(node).get(), capacity);
                double skewReductionAtToHost = skewReductionByAdding(node, toHost, capacity);
                double netSkewReduction = skewReductionAtFromHost + skewReductionAtToHost;
                if (netSkewReduction > bestMove.netSkewReduction)
                    bestMove = new Move(node, nodeRepository().getNode(node.parentHostname().get()).get(), toHost, netSkewReduction);
            }
        }
        return bestMove;
    }

    private double skewReductionByRemoving(Node node, Node fromHost, HostCapacity capacity) {
        NodeResources freeHostCapacity = capacity.freeCapacityOf(fromHost);
        double skewBefore = Node.skew(fromHost.flavor().resources(), freeHostCapacity);
        double skewAfter = Node.skew(fromHost.flavor().resources(), freeHostCapacity.add(node.flavor().resources().justNumbers()));
        return skewBefore - skewAfter;
    }

    private double skewReductionByAdding(Node node, Node toHost, HostCapacity capacity) {
        NodeResources freeHostCapacity = capacity.freeCapacityOf(toHost);
        double skewBefore = Node.skew(toHost.flavor().resources(), freeHostCapacity);
        double skewAfter = Node.skew(toHost.flavor().resources(), freeHostCapacity.subtract(node.resources().justNumbers()));
        return skewBefore - skewAfter;
    }

    protected boolean deployedRecently(ApplicationId application) {
        return deployer.lastDeployTime(application)
                .map(lastDeployTime -> lastDeployTime.isAfter(clock.instant().minus(waitTimeAfterPreviousDeployment)))
                // We only know last deploy time for applications that were deployed on this config server,
                // the rest will be deployed on another config server
                .orElse(true);
    }

    private static class Move extends MaintenanceDeployment.Move {

        final double netSkewReduction;

        Move(Node node, Node fromHost, Node toHost, double netSkewReduction) {
            super(node, fromHost, toHost);
            this.netSkewReduction = netSkewReduction;
        }

        @Override
        public String toString() {
            if (isEmpty()) return "move none";
            return super.toString() + " [skew reduction "  + netSkewReduction + "]";
        }

        public static Move empty() {
            return new Move(null, null, null, 0);
        }

    }

}