summaryrefslogtreecommitdiffstats
path: root/node-repository
diff options
context:
space:
mode:
authortoby <smorgrav@yahoo-inc.com>2017-07-20 11:17:34 +0200
committertoby <smorgrav@yahoo-inc.com>2017-08-14 11:27:08 +0200
commit571cece4fd144723513946898333419d5f825a91 (patch)
treedc4dfe338ccec0dcdaa5e6d81c9d3b0912efb78c /node-repository
parentbe257d41f011bbe6d8bf9838ac4cf01f9dae7b00 (diff)
Add NodePrioritizer files
Diffstat (limited to 'node-repository')
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java263
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePriority.java93
2 files changed, 356 insertions, 0 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
new file mode 100644
index 00000000000..331fe6f403d
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java
@@ -0,0 +1,263 @@
+package com.yahoo.vespa.hosted.provision.provisioning;
+
+import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.ClusterSpec;
+import com.yahoo.config.provision.Flavor;
+import com.yahoo.config.provision.NodeFlavors;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Builds up a priority queue of which nodes should be offered to the allocation.
+ *
+ * Builds up a list of NodePriority objects and sorts them according to the
+ * NodePriority::compare method.
+ *
+ * @author smorgrav
+ */
+public class NodePrioritizer {
+
+ private final Map<Node, NodePriority> nodes = new HashMap<>();
+ private final List<Node> allNodes;
+ private final DockerHostCapacity capacity;
+ private final NodeSpec requestedNodes;
+ private final ApplicationId appId;
+ private final int maxRetires;
+ private final ClusterSpec clusterSpec;
+
+ private final List<Node> spareHosts;
+ private final List<Node> headroomViolatedHosts;
+ private final long failedNodesInCluster;
+
+ int nofViolations = 0;
+
+ NodePrioritizer(List<Node> allNodes, ApplicationId appId, ClusterSpec clusterSpec, NodeSpec nodeSpec, NodeFlavors nodeFlavors, int maxRetires, int spares) {
+ this.allNodes = Collections.unmodifiableList(allNodes);
+ this.requestedNodes = nodeSpec;
+ this.maxRetires = maxRetires;
+ this.clusterSpec = clusterSpec;
+ this.appId = appId;
+
+ // Add spare and headroom allocations
+ spareHosts = DockerCapacityConstraints.findSpareHosts(allNodes, spares);
+ List<Node> nodesWithHeadroomAndSpares =
+ DockerCapacityConstraints.addHeadroomAndSpareNodes(allNodes, nodeFlavors, spares);
+
+ this.capacity = new DockerHostCapacity(nodesWithHeadroomAndSpares);
+
+ failedNodesInCluster = allNodes.stream()
+ .filter(node -> node.state().equals(Node.State.failed))
+ .filter(node -> node.allocation().isPresent())
+ .filter(node -> node.allocation().get().owner().equals(appId))
+ .filter(node -> node.allocation().get().membership().cluster().id().equals(clusterSpec.id()))
+ .count();
+
+ // TODO Find hosts where we have headroom violations
+ headroomViolatedHosts = new ArrayList<>();
+
+
+ }
+
+ void initNodes(List<Node> surplusNodes, boolean dynamicAllocationEnabled) {
+ addApplicationNodes();
+ addSurplusNodes(surplusNodes);
+ addReadyNodes();
+ if (dynamicAllocationEnabled && getDockerFlavor() != null) {
+ addNewDockerNodes();
+ }
+ }
+
+ private void addSurplusNodes(List<Node> surplusNodes) {
+ //TODO change group index if content
+ for (Node node : surplusNodes) {
+ nodes.put(node, toNodePriority(node, true, false));
+ }
+ }
+
+ private void addNewDockerNodes() {
+ DockerHostCapacity capacity = new DockerHostCapacity(allNodes);
+
+ for (Node node : allNodes) {
+ if (node.type() == NodeType.host) {
+ boolean conflictingCluster = false;
+ NodeList list = new NodeList(allNodes);
+ NodeList childrenWithSameApp = list.childNodes(node).owner(appId);
+ for (Node child : childrenWithSameApp.asList()) {
+ // Look for nodes from the same cluster
+ if (child.allocation().get().membership().cluster().id().equals(clusterSpec.id())) {
+ conflictingCluster = true;
+ break;
+ }
+ }
+
+ if (!conflictingCluster && capacity.hasCapacity(node, getDockerFlavor())) {
+ Set<String> ipAddresses = DockerHostCapacity.findFreeIps(node, allNodes);
+ if (ipAddresses.isEmpty()) continue;
+ String ipAddress = ipAddresses.stream().findFirst().get();
+ String hostname = lookupHostname(ipAddress);
+ if (hostname == null) continue;
+ Node newNode = Node.createDockerNode("fake-" + hostname, Collections.singleton(ipAddress),
+ Collections.emptySet(), hostname, Optional.of(node.hostname()), getDockerFlavor(), NodeType.tenant);
+ nodes.put(newNode, toNodePriority(newNode, false, true));
+ }
+ }
+ }
+ }
+
+ /**
+ * From ipAddress - get hostname
+ *
+ * @return hostname or null if not able to do the loopup
+ */
+ private static String lookupHostname(String ipAddress) {
+ try {
+ return InetAddress.getByName(ipAddress).getHostName();
+ } catch (UnknownHostException e) {
+ e.printStackTrace();
+ }
+ return null;
+ }
+
+ private void addReadyNodes() {
+ allNodes.stream()
+ .filter(node -> node.type().equals(requestedNodes.type()))
+ .filter(node -> node.state().equals(Node.State.ready))
+ .map(node -> toNodePriority(node, false, false))
+ .forEach(nodePriority -> nodes.put(nodePriority.node, nodePriority));
+ }
+
+ void addApplicationNodes() {
+ List<Node.State> legalStates = Arrays.asList(Node.State.active,Node.State.inactive, Node.State.reserved);
+ allNodes.stream()
+ .filter(node -> node.type().equals(requestedNodes.type()))
+ .filter(node -> legalStates.contains(node.state()))
+ .filter(node -> node.allocation().isPresent())
+ .filter(node -> node.allocation().get().owner().equals(appId))
+ .map(node -> toNodePriority(node, false, false))
+ .forEach(nodePriority -> nodes.put(nodePriority.node, nodePriority));
+ }
+
+ List<Node> filterNewNodes(Set<Node> acceptedNodes) {
+ List<Node> newNodes = new ArrayList<>();
+ for (Node node : acceptedNodes) {
+ if (nodes.get(node).isNewNode) {
+ newNodes.add(node);
+ }
+ }
+ return newNodes;
+ }
+
+ List<Node> filterSurplusNodes(Set<Node> acceptedNodes) {
+ List<Node> surplusNodes = new ArrayList<>();
+ for (Node node : acceptedNodes) {
+ if (nodes.get(node).isSurplusNode) {
+ surplusNodes.add(node);
+ }
+ }
+ return surplusNodes;
+ }
+
+ List<Node> filterInactiveAndReadyNodes(Set<Node> acceptedNodes) {
+ List<Node> inactiveAndReady = new ArrayList<>();
+ for (Node node : acceptedNodes) {
+ if (node.state().equals(Node.State.inactive) || node.state().equals(Node.State.ready)) {
+ inactiveAndReady.add(node);
+ }
+ }
+ return inactiveAndReady;
+ }
+
+ /**
+ * Convert a list of nodes to a list of node priorities. This includes finding, calculating
+ * parameters to the priority sorting procedure.
+ */
+ private NodePriority toNodePriority(Node node, boolean isSurplusNode, boolean isNewNode) {
+ NodePriority pri = new NodePriority();
+ pri.node = node;
+ pri.isSurplusNode = isSurplusNode;
+ pri.isNewNode = isNewNode;
+ pri.preferredOnFlavor = requestedNodes.specifiesNonStockFlavor() && node.flavor().equals(getDockerFlavor());
+ pri.parent = findParentNode(node);
+
+ if (pri.parent.isPresent()) {
+ Node parent = pri.parent.get();
+ pri.freeParentCapacity = capacity.freeCapacityOf(parent, true);
+
+ /**
+ * To be conservative we have a restriction of how many nodes we can retire for each cluster,
+ * pr. allocation iteration. TODO also account for previously retired nodes? (thus removing the pr iteration restriction)
+ */
+ if (nofViolations <= maxRetires) {
+ // Spare violation
+ if (spareHosts.contains(parent)) {
+ pri.violatesSpares = true;
+ nofViolations++;
+ }
+
+ // Headroom violation
+ if (headroomViolatedHosts.contains(parent)) {
+ pri.violatesHeadroom = true;
+ nofViolations++;
+ }
+ }
+ }
+ return pri;
+ }
+
+ void offer(NodeAllocation allocation) {
+ List<NodePriority> prioritizedNodes = nodes.values().stream().collect(Collectors.toList());
+ Collections.sort(prioritizedNodes, (a,b) -> NodePriority.compare(a,b));
+
+ List<Node> result = new ArrayList<>();
+ for (NodePriority nodePriority : prioritizedNodes) {
+
+ // The replacement heuristic assumes that new nodes are offered after already existing nodes
+ boolean isReplacement = isReplacement(allocation.getAcceptedNodes().size());
+
+ // Only add new allocations that violates the spare constraint if this is a replacement
+ if (!nodePriority.violatesSpares || isReplacement || !nodePriority.isNewNode) {
+ allocation.offer(Collections.singletonList(nodePriority.node), nodePriority.isSurplusNode);
+ }
+ }
+ }
+
+ private boolean isReplacement(int nodesAccepted) {
+ if (failedNodesInCluster == 0) return false;
+
+ int wantedCount = 0;
+ if (requestedNodes instanceof NodeSpec.CountNodeSpec) {
+ NodeSpec.CountNodeSpec countSpec = (NodeSpec.CountNodeSpec) requestedNodes;
+ wantedCount = countSpec.getCount();
+ }
+
+ return (wantedCount <= nodesAccepted + failedNodesInCluster);
+ }
+
+ private Flavor getDockerFlavor() {
+ if (requestedNodes instanceof NodeSpec.CountNodeSpec) {
+ NodeSpec.CountNodeSpec countSpec = (NodeSpec.CountNodeSpec) requestedNodes;
+ return countSpec.getFlavor();
+ }
+ return null;
+ }
+
+ private Optional<Node> findParentNode(Node node) {
+ if (!node.parentHostname().isPresent()) return Optional.empty();
+ return allNodes.stream()
+ .filter(n -> n.hostname().equals(node.parentHostname().orElse(" NOT A NODE")))
+ .findAny();
+ }
+} \ No newline at end of file
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePriority.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePriority.java
new file mode 100644
index 00000000000..b41082bd34d
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePriority.java
@@ -0,0 +1,93 @@
+package com.yahoo.vespa.hosted.provision.provisioning;
+
+import com.yahoo.vespa.hosted.provision.Node;
+
+import java.util.Optional;
+
+/**
+ * Encapsulates all the information nessesary to prioritize node for allocation.
+ *
+ * @author smorgrav
+ */
+public class NodePriority {
+
+ Node node;
+
+ /** The free capacity excluding headroom, including retired allocations */
+ ResourceCapacity freeParentCapacity = new ResourceCapacity();
+
+ /** The parent host (docker or hypervisor) */
+ Optional<Node> parent = Optional.empty();
+
+ /** True if the node is allocated to a host that should be spare (without allocations */
+ boolean violatesSpares;
+
+ /** True if the node is allocated on slots that should be headroom */
+ boolean violatesHeadroom;
+
+ /** True if this is a node that has been retired earlier in the allocation process */
+ boolean isSurplusNode;
+
+ /** This node does not exist in the node repository yet */
+ boolean isNewNode;
+
+ /** True if exact flavor is specified by the allocation request and this node has this flavor */
+ boolean preferredOnFlavor;
+
+ /**
+ * Compare two node priorities.
+ *
+ * Sorting order accending - thus negative if higher priority
+ *
+ * @return negative if first priority is higher than second node
+ */
+ static int compare(NodePriority n1, NodePriority n2) {
+ // First always pick nodes without violation above nodes with violations
+ if (!n1.violatesSpares && n2.violatesSpares) return -1;
+ if (!n2.violatesSpares && n1.violatesSpares) return 1;
+ if (!n1.violatesHeadroom && n2.violatesHeadroom) return -1;
+ if (!n2.violatesHeadroom && n1.violatesHeadroom) return 1;
+
+ // Choose active nodes
+ if (n1.node.state().equals(Node.State.active) && !n2.node.state().equals(Node.State.active)) return -1;
+ if (n2.node.state().equals(Node.State.active) && !n1.node.state().equals(Node.State.active)) return 1;
+
+ // Choose surplus nodes
+ if (n1.isSurplusNode && !n2.isSurplusNode) return -1;
+ if (n2.isSurplusNode && !n1.isSurplusNode) return 1;
+
+ // Choose inactive nodes
+ if (n1.node.state().equals(Node.State.inactive) && !n2.node.state().equals(Node.State.inactive)) return -1;
+ if (n2.node.state().equals(Node.State.inactive) && !n1.node.state().equals(Node.State.inactive)) return 1;
+
+ // Choose reserved nodes
+ if (n1.node.state().equals(Node.State.reserved) && !n2.node.state().equals(Node.State.reserved)) return -1;
+ if (n2.node.state().equals(Node.State.reserved) && !n1.node.state().equals(Node.State.reserved)) return 1;
+
+ // The node state has to be equal here
+ if (!n1.node.state().equals(n2.node.state())) {
+ throw new RuntimeException(
+ String.format("Nodes for allocation is not in expected state. Got %s and %s.",
+ n1.node.state(), n2.node.state()));
+ }
+
+ // Choose exact flavor
+ if (n1.preferredOnFlavor && !n2.preferredOnFlavor) return -1;
+ if (n2.preferredOnFlavor && !n1.preferredOnFlavor) return 1;
+
+ // Choose docker node over non-docker node (this is to differentiate between docker replaces non-docker flavors)
+ if (n1.parent.isPresent() && !n2.parent.isPresent()) return -1;
+ if (n2.parent.isPresent() && !n1.parent.isPresent()) return 1;
+
+ // Choose the node with parent node with the least capacity
+ int freeCapacity = n1.freeParentCapacity.compare(n2.freeParentCapacity);
+ if (freeCapacity != 0) return freeCapacity;
+
+ // Choose cheapest node
+ if (n1.node.flavor().cost() < n1.node.flavor().cost()) return -1;
+ if (n2.node.flavor().cost() < n2.node.flavor().cost()) return 1;
+
+ // All else equal choose hostname lexically
+ return n1.node.hostname().compareTo(n2.node.hostname());
+ }
+}