diff options
author | Håkon Hallingstad <hakon@yahoo-inc.com> | 2017-07-11 12:23:14 +0200 |
---|---|---|
committer | Håkon Hallingstad <hakon@yahoo-inc.com> | 2017-07-11 12:23:14 +0200 |
commit | 52ff920f1509f85993bd67164fa9e409c0cd81bd (patch) | |
tree | 83042a065c7d0a774e2afedc082a76aaaa808ddd /node-repository/src | |
parent | e522f9accfee1a5b616d284dffb5d035ffc0a5d3 (diff) |
Propagate hardware failure to children
Diffstat (limited to 'node-repository/src')
5 files changed, 107 insertions, 24 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java index fde5669bfd5..96eae71d403 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java @@ -511,8 +511,7 @@ public class NodeRepository extends AbstractComponent { * * @return the written node for convenience */ - public Node write(Node node) { return db.writeTo(node.state(), node, - Agent.system, Optional.empty()); } + public Node write(Node node) { return db.writeTo(node.state(), node, Agent.system, Optional.empty()); } /** * Writes these nodes after they have changed some internal state but NOT changed their state field. @@ -520,17 +519,7 @@ public class NodeRepository extends AbstractComponent { * * @return the written nodes for convenience */ - public List<Node> write(List<Node> nodes) { - if (nodes.isEmpty()) return Collections.emptyList(); - - // decide current state and make sure all nodes have it (alternatively we could create a transaction here) - Node.State state = nodes.get(0).state(); - for (Node node : nodes) { - if ( node.state() != state) - throw new IllegalArgumentException("Multiple states: " + node.state() + " and " + state); - } - return db.writeTo(state, nodes, Agent.system, Optional.empty()); - } + public List<Node> write(List<Node> nodes) { return db.writeTo(nodes, Agent.system, Optional.empty()); } /** * Performs an operation requiring locking on all nodes matching some filter. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java index 2ad6a18b792..cd63599fed6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/CuratorDatabaseClient.java @@ -26,10 +26,12 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.stream.Collectors; /** * Client which reads and writes nodes to a curator database. @@ -119,6 +121,29 @@ public class CuratorDatabaseClient { } /** + * Writes the given nodes and returns a copy of the incoming nodes in their persisted state. + * + * @param nodes the list of nodes to write + * @param agent the agent causing this change + * @return the nodes in their persisted state + */ + public List<Node> writeTo(List<Node> nodes, Agent agent, Optional<String> reason) { + if (nodes.isEmpty()) return Collections.emptyList(); + + List<Node> writtenNodes = new ArrayList<>(nodes.size()); + + try (NestedTransaction nestedTransaction = new NestedTransaction()) { + Map<Node.State, List<Node>> nodesByState = nodes.stream().collect(Collectors.groupingBy(Node::state)); + for (Map.Entry<Node.State, List<Node>> entry : nodesByState.entrySet()) { + writtenNodes.addAll(writeTo(entry.getKey(), entry.getValue(), agent, reason, nestedTransaction)); + nestedTransaction.commit(); + } + } + + return writtenNodes; + } + + /** * Writes the given nodes to the given state (whether or not they are already in this state or another), * and returns a copy of the incoming nodes in their persisted state. * diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java index 2ea7dbfb0d1..9e7b178c2bc 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodePatcher.java @@ -3,21 +3,25 @@ package com.yahoo.vespa.hosted.provision.restapi.v2; import com.yahoo.component.Version; import com.yahoo.config.provision.DockerImage; +import com.yahoo.config.provision.NodeFlavors; +import com.yahoo.config.provision.NodeType; import com.yahoo.io.IOUtils; import com.yahoo.slime.Inspector; import com.yahoo.slime.Type; import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.provision.Node; +import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Allocation; -import com.yahoo.config.provision.NodeFlavors; import com.yahoo.vespa.hosted.provision.node.Status; import java.io.IOException; import java.io.InputStream; -import java.time.Clock; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; import java.util.Set; import java.util.TreeSet; +import java.util.stream.Collectors; /** * A class which can take a partial JSON node/v2 node JSON structure and apply it to a node object. @@ -27,18 +31,19 @@ import java.util.TreeSet; */ public class NodePatcher { + public static final String HARDWARE_FAILURE_TYPE = "hardwareFailureType"; private final NodeFlavors nodeFlavors; private final Inspector inspector; - private final Clock clock; + private final NodeRepository nodeRepository; private Node node; - public NodePatcher(NodeFlavors nodeFlavors, InputStream json, Node node, Clock clock) { + public NodePatcher(NodeFlavors nodeFlavors, InputStream json, Node node, NodeRepository nodeRepository) { try { this.nodeFlavors = nodeFlavors; inspector = SlimeUtils.jsonToSlime(IOUtils.readBytes(json, 1000 * 1000)).get(); this.node = node; - this.clock = clock; + this.nodeRepository = nodeRepository; } catch (IOException e) { throw new RuntimeException("Error reading request body", e); @@ -46,9 +51,11 @@ public class NodePatcher { } /** - * Apply the json to the node and return the resulting node + * Apply the json to the node and return all nodes affected by the patch. + * More than 1 node may be affected if e.g. the node is a Docker host, which may have + * children that must be updated in a consistent manner. */ - public Node apply() { + public List<Node> apply() { inspector.traverse((String name, Inspector value) -> { try { node = applyField(name, value); @@ -57,7 +64,29 @@ public class NodePatcher { throw new IllegalArgumentException("Could not set field '" + name + "'", e); } } ); - return node; + + + List<Node> nodes = new ArrayList<>(); + if (node.type() == NodeType.host) { + nodes.addAll(modifiedDockerChildNodes()); + } + nodes.add(node); + + return nodes; + } + + private List<Node> modifiedDockerChildNodes() { + List<Node> children = nodeRepository.getChildNodes(node.hostname()); + boolean modified = false; + + if (inspector.field(HARDWARE_FAILURE_TYPE).valid()) { + modified = true; + children = children.stream() + .map(node -> node.with(node.status().withHardwareFailure(toHardwareFailureType(asString(inspector.field(HARDWARE_FAILURE_TYPE)))))) + .collect(Collectors.toList()); + } + + return modified ? children : new ArrayList<>(); } private Node applyField(String name, Inspector value) { @@ -65,7 +94,7 @@ public class NodePatcher { case "convergedStateVersion" : return node; // TODO: Ignored, can be removed when callers no longer include this field case "currentRebootGeneration" : - return node.withCurrentRebootGeneration(asLong(value), clock.instant()); + return node.withCurrentRebootGeneration(asLong(value), nodeRepository.clock().instant()); case "currentRestartGeneration" : return patchCurrentRestartGeneration(asLong(value)); case "currentDockerImage" : @@ -83,7 +112,7 @@ public class NodePatcher { return node.with(node.status().setFailCount(asLong(value).intValue())); case "flavor" : return node.with(nodeFlavors.getFlavorOrThrow(asString(value))); - case "hardwareFailureType" : + case HARDWARE_FAILURE_TYPE: return node.with(node.status().withHardwareFailure(toHardwareFailureType(asString(value)))); case "parentHostname" : return node.withParentHostname(asString(value)); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java index 459b358415c..6d6f52aa831 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/v2/NodesApiHandler.java @@ -150,7 +150,7 @@ public class NodesApiHandler extends LoggingRequestHandler { String path = request.getUri().getPath(); if ( ! path.startsWith("/nodes/v2/node/")) throw new NotFoundException("Nothing at '" + path + "'"); Node node = nodeFromRequest(request); - nodeRepository.write(new NodePatcher(nodeFlavors, request.getData(), node, nodeRepository.clock()).apply()); + nodeRepository.write(new NodePatcher(nodeFlavors, request.getData(), node, nodeRepository).apply()); return new MessageResponse("Updated " + node.hostname()); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java index c46680b2fe0..53b14c2e1ec 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/v2/RestApiTest.java @@ -6,7 +6,10 @@ import com.yahoo.application.container.JDisc; import com.yahoo.application.container.handler.Request; import com.yahoo.application.container.handler.Response; import com.yahoo.io.IOUtils; +import com.yahoo.slime.Cursor; +import com.yahoo.slime.Slime; import com.yahoo.text.Utf8; +import com.yahoo.vespa.config.SlimeUtils; import com.yahoo.vespa.hosted.provision.testutils.ContainerConfig; import org.junit.After; import org.junit.Before; @@ -14,8 +17,10 @@ import org.junit.Test; import java.io.File; import java.io.IOException; +import java.nio.charset.CharacterCodingException; import java.nio.charset.StandardCharsets; import java.util.Arrays; +import java.util.Optional; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -436,6 +441,23 @@ public class RestApiTest { } @Test + public void test_hardware_patching_of_docker_host() throws Exception { + assertHardwareFailure(new Request("http://localhost:8080/nodes/v2/node/host5.yahoo.com"), Optional.of(false)); + assertHardwareFailure(new Request("http://localhost:8080/nodes/v2/node/parent1.yahoo.com"), Optional.of(false)); + + assertResponse(new Request("http://localhost:8080/nodes/v2/node/parent1.yahoo.com", + Utf8.toBytes("{" + + "\"hardwareFailureType\": \"memory_mcelog\"" + + "}" + ), + Request.Method.PATCH), + "{\"message\":\"Updated parent1.yahoo.com\"}"); + + assertHardwareFailure(new Request("http://localhost:8080/nodes/v2/node/host5.yahoo.com"), Optional.of(true)); + assertHardwareFailure(new Request("http://localhost:8080/nodes/v2/node/parent1.yahoo.com"), Optional.of(true)); + } + + @Test public void test_node_patch_to_remove_docker_ready_fields() throws Exception { assertResponse(new Request("http://localhost:8080/nodes/v2/node/host5.yahoo.com", Utf8.toBytes("{" + @@ -500,6 +522,24 @@ public class RestApiTest { "],"; } + private Optional<Boolean> getHardwareFailure(String json) { + Slime slime = SlimeUtils.jsonToSlime(json.getBytes()); + Cursor hardwareFailure = slime.get().field("hardwareFailure"); + if (!hardwareFailure.valid()) { + return Optional.empty(); + } + + return Optional.of(hardwareFailure.asBool()); + } + + private void assertHardwareFailure(Request request, Optional<Boolean> expectedHardwareFailure) throws CharacterCodingException { + Response response = container.handleRequest(request); + assertEquals(response.getStatus(), 200); + String json = response.getBodyAsString(); + Optional<Boolean> actualHardwareFailure = getHardwareFailure(json); + assertEquals(expectedHardwareFailure, actualHardwareFailure); + } + /** Asserts a particular response and 200 as response status */ private void assertResponse(Request request, String responseMessage) throws IOException { assertResponse(request, 200, responseMessage); |