summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormgimle <michael@gimle.io>2019-07-15 11:49:42 +0200
committermgimle <michael@gimle.io>2019-07-15 11:58:25 +0200
commit9a98cc68d4a1f66e65c936cae9bd89b15972b661 (patch)
tree0869478ba7e2ea661c8bd26552d00eaf71a788c1
parent2a51bf3566624e069ac14e3f072e23b31907d78f (diff)
Added endpoint for explaining the conclusions of the host capacity checker.
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/HostCapacityResponse.java161
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java1
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java25
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-hostremoval-impossible.json20
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-hostremoval-possible.json20
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-zone.json46
6 files changed, 273 insertions, 0 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/HostCapacityResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/HostCapacityResponse.java
new file mode 100644
index 00000000000..be8de0d4475
--- /dev/null
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/HostCapacityResponse.java
@@ -0,0 +1,161 @@
+package com.yahoo.vespa.hosted.provision.restapi.v2;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.yahoo.container.jdisc.HttpRequest;
+import com.yahoo.container.jdisc.HttpResponse;
+import com.yahoo.slime.Cursor;
+import com.yahoo.slime.JsonFormat;
+import com.yahoo.slime.Slime;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.maintenance.CapacityChecker;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+public class HostCapacityResponse extends HttpResponse {
+ private final StringBuilder text;
+ private final Slime slime;
+ private final CapacityChecker capacityChecker;
+ private final boolean json;
+
+ public HostCapacityResponse(NodeRepository nodeRepository, HttpRequest request) {
+ super(200);
+ capacityChecker = new CapacityChecker(nodeRepository);
+
+ json = request.getBooleanProperty("json");
+ String hostsJson = request.getProperty("hosts");
+
+ text = new StringBuilder();
+ slime = new Slime();
+ Cursor root = slime.setObject();
+
+ if (hostsJson != null) {
+ ObjectMapper om = new ObjectMapper();
+ String[] hostsArray;
+ try {
+ hostsArray = om.readValue(hostsJson, String[].class);
+ } catch (Exception e) {
+ throw new IllegalArgumentException(e.getMessage());
+ }
+ List<String> hostNames = Arrays.asList(hostsArray);
+ List<Node> hosts;
+ try {
+ hosts = capacityChecker.nodesFromHostnames(hostNames);
+ } catch (IllegalArgumentException e) {
+ throw new NotFoundException(e.getMessage());
+ }
+ var failure = capacityChecker.findHostRemovalFailure(hosts);
+ if (failure.isPresent() && failure.get().failureReason.failureReasons.size() == 0) {
+ root.setBool("removalPossible", false);
+ error(root, "Removing all hosts is trivially impossible.");
+ } else {
+ if (json) hostLossPossibleToSlime(root, failure, hosts);
+ else hostLossPossibleToText(failure, hosts);
+ }
+ } else {
+ var failurePath = capacityChecker.worstCaseHostLossLeadingToFailure();
+ if (failurePath.isPresent()) {
+ if (json) zoneFailurePathToSlime(root, failurePath.get());
+ else zoneFailurePathToText(failurePath.get());
+ } else {
+ error(root, "Node repository contained no hosts.");
+ }
+ }
+ }
+
+ private void error(Cursor root, String errorMessage) {
+ if (json) root.setString("error", errorMessage);
+ else text.append(errorMessage);
+ }
+
+ private void hostLossPossibleToText(Optional<CapacityChecker.HostFailurePath> failure, List<Node> hostsToRemove) {
+ text.append(String.format("Attempting to remove %d hosts: ", hostsToRemove.size()));
+ CapacityChecker.AllocationHistory history = capacityChecker.allocationHistory;
+ if (failure.isEmpty()) {
+ text.append("OK\n\n");
+ text.append(history);
+ if (history.oldParents().size() != hostsToRemove.size()) {
+ long emptyHostCount = hostsToRemove.size() - history.oldParents().size();
+ text.append(String.format("\nTrivially removed %d empty host%s.", emptyHostCount, emptyHostCount > 1 ? "s" : ""));
+ }
+ } else {
+ text.append("FAILURE\n\n");
+ text.append(history).append("\n");
+ text.append(failure.get().failureReason).append("\n\n");
+ }
+ }
+
+ private void zoneFailurePathToText(CapacityChecker.HostFailurePath failurePath) {
+ text.append(String.format("Found %d hosts. Failure upon trying to remove %d hosts:\n\n",
+ capacityChecker.getHosts().size(),
+ failurePath.hostsCausingFailure.size()));
+ text.append(capacityChecker.allocationHistory).append("\n");
+ text.append(failurePath.failureReason);
+ }
+
+ private void hostLossPossibleToSlime(Cursor root, Optional<CapacityChecker.HostFailurePath> failure, List<Node> hostsToRemove) {
+ var hosts = root.setArray("hostsToRemove");
+ hostsToRemove.forEach(h -> hosts.addString(h.hostname()));
+ CapacityChecker.AllocationHistory history = capacityChecker.allocationHistory;
+ if (failure.isEmpty()) {
+ root.setBool("removalPossible", true);
+ } else {
+ root.setBool("removalPossible", false);
+ }
+ var arr = root.setArray("history");
+ for (var entry : history.historyEntries) {
+ var object = arr.addObject();
+ object.setString("tenant", entry.tenant.hostname());
+ if (entry.newParent != null) {
+ object.setString("newParent", entry.newParent.hostname());
+ }
+ object.setLong("eligibleParents", entry.eligibleParents);
+ }
+ }
+
+ public void zoneFailurePathToSlime(Cursor object, CapacityChecker.HostFailurePath failurePath) {
+ object.setLong("totalHosts", capacityChecker.getHosts().size());
+ object.setLong("couldLoseHosts", failurePath.hostsCausingFailure.size());
+ failurePath.failureReason.host.ifPresent(host ->
+ object.setString("failedTenantParent", host.hostname())
+ );
+ failurePath.failureReason.tenant.ifPresent(tenant -> {
+ object.setString("failedTenant", tenant.hostname());
+ object.setString("failedTenantResources", tenant.flavor().resources().toString());
+ tenant.allocation().ifPresent(allocation ->
+ object.setString("failedTenantAllocation", allocation.toString())
+ );
+ var explanation = object.setObject("hostCandidateRejectionReasons");
+ allocationFailureReasonListToSlime(explanation.setObject("singularReasonFailures"),
+ failurePath.failureReason.failureReasons.singularReasonFailures());
+ allocationFailureReasonListToSlime(explanation.setObject("totalFailures"),
+ failurePath.failureReason.failureReasons);
+ });
+ var details = object.setObject("details");
+ hostLossPossibleToSlime(details, Optional.of(failurePath), failurePath.hostsCausingFailure);
+ }
+
+ private void allocationFailureReasonListToSlime(Cursor root, CapacityChecker.AllocationFailureReasonList allocationFailureReasonList) {
+ root.setLong("insufficientVcpu", allocationFailureReasonList.insufficientVcpu());
+ root.setLong("insufficientMemoryGb", allocationFailureReasonList.insufficientMemoryGb());
+ root.setLong("insufficientDiskGb", allocationFailureReasonList.insufficientDiskGb());
+ root.setLong("incompatibleDiskSpeed", allocationFailureReasonList.incompatibleDiskSpeed());
+ root.setLong("insufficientAvailableIps", allocationFailureReasonList.insufficientAvailableIps());
+ root.setLong("violatesParentHostPolicy", allocationFailureReasonList.violatesParentHostPolicy());
+ }
+
+ @Override
+ public void render(OutputStream stream) throws IOException {
+ if (json) new JsonFormat(true).encode(stream, slime);
+ else stream.write(text.toString().getBytes());
+ }
+
+ @Override
+ public String getContentType() {
+ return json ? "application/json" : "text/plain";
+ }
+}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java
index 22318f1ddb4..e036124e489 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java
@@ -102,6 +102,7 @@ public class NodesApiHandler extends LoggingRequestHandler {
if (path.equals( "/nodes/v2/command/")) return ResourcesResponse.fromStrings(request.getUri(), "restart", "reboot");
if (path.equals( "/nodes/v2/maintenance/")) return new JobsResponse(nodeRepository.jobControl());
if (path.equals( "/nodes/v2/upgrade/")) return new UpgradeResponse(nodeRepository.infrastructureVersions(), nodeRepository.osVersions(), nodeRepository.dockerImages());
+ if (path.startsWith("/nodes/v2/capacity/")) return new HostCapacityResponse(nodeRepository, request);
throw new NotFoundException("Nothing at path '" + path + "'");
}
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java
index bfb24d30284..35fa5adaeff 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
+import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@@ -819,6 +820,30 @@ public class RestApiTest {
"{\"message\":\"Cancelled outstanding requests for firmware checks\"}");
}
+ @Test
+ public void test_capacity() throws Exception {
+ assertFile(new Request("http://localhost:8080/nodes/v2/capacity/?json=true"), "capacity-zone.json");
+
+ List<String> hostsToRemove = List.of(
+ "%22dockerhost1.yahoo.com%22",
+ "%22dockerhost2.yahoo.com%22",
+ "%22dockerhost3.yahoo.com%22",
+ "%22dockerhost4.yahoo.com%22"
+ );
+ String requestUriTemplate =
+ "http://localhost:8080/nodes/v2/capacity/?json=true&hosts=[%s]"
+ .replaceAll("\\[", "%%5B")
+ .replaceAll("]", "%%5D");
+
+ assertFile(new Request(String.format(requestUriTemplate,
+ String.join(",", hostsToRemove.subList(0, 3)))),
+ "capacity-hostremoval-possible.json");
+ assertFile(new Request(String.format(requestUriTemplate,
+ String.join(",", hostsToRemove))),
+ "capacity-hostremoval-impossible.json");
+ }
+
+
/** Tests the rendering of each node separately to make it easier to find errors */
@Test
public void test_single_node_rendering() throws Exception {
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-hostremoval-impossible.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-hostremoval-impossible.json
new file mode 100644
index 00000000000..f3c73e61c91
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-hostremoval-impossible.json
@@ -0,0 +1,20 @@
+{
+ "hostsToRemove": [
+ "dockerhost1.yahoo.com",
+ "dockerhost2.yahoo.com",
+ "dockerhost3.yahoo.com",
+ "dockerhost4.yahoo.com"
+ ],
+ "removalPossible": false,
+ "history": [
+ {
+ "tenant": "host4.yahoo.com",
+ "newParent": "dockerhost5.yahoo.com",
+ "eligibleParents": 1
+ },
+ {
+ "tenant": "test-node-pool-101-2",
+ "eligibleParents": 0
+ }
+ ]
+} \ No newline at end of file
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-hostremoval-possible.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-hostremoval-possible.json
new file mode 100644
index 00000000000..b896fd9d63a
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-hostremoval-possible.json
@@ -0,0 +1,20 @@
+{
+ "hostsToRemove": [
+ "dockerhost1.yahoo.com",
+ "dockerhost2.yahoo.com",
+ "dockerhost3.yahoo.com"
+ ],
+ "removalPossible": true,
+ "history": [
+ {
+ "tenant": "host4.yahoo.com",
+ "newParent": "dockerhost4.yahoo.com",
+ "eligibleParents": 2
+ },
+ {
+ "tenant": "test-node-pool-101-2",
+ "newParent": "dockerhost5.yahoo.com",
+ "eligibleParents": 1
+ }
+ ]
+} \ No newline at end of file
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-zone.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-zone.json
new file mode 100644
index 00000000000..9895948e69d
--- /dev/null
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/responses/capacity-zone.json
@@ -0,0 +1,46 @@
+{
+ "totalHosts": 5,
+ "couldLoseHosts": 4,
+ "failedTenantParent": "dockerhost1.yahoo.com",
+ "failedTenant": "host4.yahoo.com",
+ "failedTenantResources": "[vcpu: 1.0, memory: 1.0 Gb, disk 100.0 Gb]",
+ "failedTenantAllocation": "allocated to tenant3.application3.instance3 as 'content/id3/0/0'",
+ "hostCandidateRejectionReasons": {
+ "singularReasonFailures": {
+ "insufficientVcpu": 0,
+ "insufficientMemoryGb": 0,
+ "insufficientDiskGb": 0,
+ "incompatibleDiskSpeed": 0,
+ "insufficientAvailableIps": 0,
+ "violatesParentHostPolicy": 1
+ },
+ "totalFailures": {
+ "insufficientVcpu": 0,
+ "insufficientMemoryGb": 0,
+ "insufficientDiskGb": 0,
+ "incompatibleDiskSpeed": 0,
+ "insufficientAvailableIps": 0,
+ "violatesParentHostPolicy": 1
+ }
+ },
+ "details": {
+ "hostsToRemove": [
+ "dockerhost2.yahoo.com",
+ "dockerhost1.yahoo.com",
+ "dockerhost4.yahoo.com",
+ "dockerhost3.yahoo.com"
+ ],
+ "removalPossible": false,
+ "history": [
+ {
+ "tenant": "test-node-pool-101-2",
+ "newParent": "dockerhost5.yahoo.com",
+ "eligibleParents": 1
+ },
+ {
+ "tenant": "host4.yahoo.com",
+ "eligibleParents": 0
+ }
+ ]
+ }
+} \ No newline at end of file