diff options
author | Jon Bratseth <bratseth@oath.com> | 2021-04-08 16:51:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-08 16:51:37 +0200 |
commit | 67c724dfc6f20ecde3fdf8a5997f7fe37eb83a06 (patch) | |
tree | cdd770b4fed1bddc6f8387a6bcaa8705d487de2f | |
parent | 248b9d5278345c4932b6cc9f598e3943be44b850 (diff) | |
parent | f58f74b95f60702587fe053eda8f5c2ad4fc5f9f (diff) |
Merge pull request #17310 from vespa-engine/bratseth/wantToFail
Move nodes to 'failed' during activate
13 files changed, 99 insertions, 39 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 80dfafb5116..de72024cb77 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -194,6 +194,17 @@ public final class Node implements Nodelike { } /** + * Returns a copy of this where wantToFail is set to true and history is updated to reflect this. + */ + public Node withWantToFail(boolean wantToFail, Agent agent, String reason, Instant at) { + Node node = this.with(status.withWantToFail(wantToFail)); + if (wantToFail) + node = node.with(history.with(new History.Event(History.Event.Type.wantToFail, agent, at))); + return node; + + } + + /** * Returns a copy of this node with wantToRetire and wantToDeprovision set to the given values and updated history. * * If both given wantToRetire and wantToDeprovision are equal to the current values, the method is no-op. diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java index 24f94b9d63b..5ab2c272b00 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeList.java @@ -91,6 +91,11 @@ public class NodeList extends AbstractFilteringList<Node, NodeList> { !node.status().vespaVersion().get().equals(node.allocation().get().membership().cluster().vespaVersion())); } + /** Returns the subset of nodes with want to fail set to true */ + public NodeList failing() { + return matching(node -> node.status().wantToFail()); + } + /** Returns the subset of nodes that are currently changing their OS version to given version */ public NodeList changingOsVersionTo(Version version) { return matching(node -> node.status().osVersion().changingTo(version)); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index eb9c8300724..ac6ecd98fac 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -277,7 +277,7 @@ public class NodeFailer extends NodeRepositoryMaintainer { } if (! allTenantNodesFailedOutSuccessfully) return false; - node = nodeRepository().nodes().fail(node.hostname(), Agent.NodeFailer, reason); + wantToFail(node, true, reason, lock); try { deployment.get().activate(); return true; @@ -287,17 +287,20 @@ public class NodeFailer extends NodeRepositoryMaintainer { Exceptions.toMessageString(e)); return true; } catch (RuntimeException e) { - // The expected reason for deployment to fail here is that there is no capacity available to redeploy. - // In that case we should leave the node in the active state to avoid failing additional nodes. - nodeRepository().nodes().reactivate(node.hostname(), Agent.NodeFailer, - "Failed to redeploy after being failed by NodeFailer"); - log.log(Level.WARNING, "Attempted to fail " + node + " for " + node.allocation().get().owner() + - ", but redeploying without the node failed", e); + // Reset want to fail: We'll retry failing unless it heals in the meantime + nodeRepository().nodes().node(node.hostname()) + .ifPresent(n -> wantToFail(n, false, "Could not fail", lock)); + log.log(Level.WARNING, "Could not fail " + node + " for " + node.allocation().get().owner() + + " for " + reason + ": " + Exceptions.toMessageString(e)); return false; } } } + private void wantToFail(Node node, boolean wantToFail, String reason, Mutex lock) { + nodeRepository().nodes().write(node.withWantToFail(wantToFail, Agent.NodeFailer, reason, clock().instant()), lock); + } + /** Returns true if node failing should be throttled */ private boolean throttle(Node node) { if (throttlePolicy == ThrottlePolicy.disabled) return false; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java index 0c5a8ea1d9f..b5780cd86a0 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java @@ -152,6 +152,8 @@ public class History { wantToRetire(false), // The node was scheduled for retirement (soft) preferToRetire(false), + // This node was scheduled for failing + wantToFail, // The active node was retired retired, // The active node went down according to the service monitor diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index cd0928ef320..b4d72a35b80 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -252,6 +252,14 @@ public class Nodes { } + /** + * Fails these nodes in a transaction and returns the nodes in the new state which will hold if the + * transaction commits. + */ + public List<Node> fail(List<Node> nodes, ApplicationTransaction transaction) { + return db.writeTo(Node.State.failed, nodes, Agent.application, Optional.of("Failed by application"), transaction.nested()); + } + /** Move nodes to the dirty state */ public List<Node> deallocate(List<Node> nodes, Agent agent, String reason) { return performOn(NodeListFilter.from(nodes), (node, lock) -> deallocate(node, agent, reason)); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java index 8964977091c..39d0d80b88f 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Status.java @@ -23,6 +23,7 @@ public class Status { private final boolean wantToDeprovision; private final boolean wantToRebuild; private final boolean preferToRetire; + private final boolean wantToFail; private final OsVersion osVersion; private final Optional<Instant> firmwareVerifiedAt; @@ -34,6 +35,7 @@ public class Status { boolean wantToDeprovision, boolean wantToRebuild, boolean preferToRetire, + boolean wantToFail, OsVersion osVersion, Optional<Instant> firmwareVerifiedAt) { this.reboot = Objects.requireNonNull(generation, "Generation must be non-null"); @@ -50,76 +52,79 @@ public class Status { this.wantToDeprovision = wantToDeprovision; this.wantToRebuild = wantToRebuild; this.preferToRetire = preferToRetire; + this.wantToFail = wantToFail; this.osVersion = Objects.requireNonNull(osVersion, "OS version must be non-null"); this.firmwareVerifiedAt = Objects.requireNonNull(firmwareVerifiedAt, "Firmware check instant must be non-null"); } /** Returns a copy of this with the reboot generation changed */ - public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, osVersion, firmwareVerifiedAt); } + public Status withReboot(Generation reboot) { return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); } /** Returns the reboot generation of this node */ public Generation reboot() { return reboot; } /** Returns a copy of this with the vespa version changed */ - public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, osVersion, firmwareVerifiedAt); } + public Status withVespaVersion(Version version) { return new Status(reboot, Optional.of(version), containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); } /** Returns the Vespa version installed on the node, if known */ public Optional<Version> vespaVersion() { return vespaVersion; } /** Returns a copy of this with the container image changed */ - public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, osVersion, firmwareVerifiedAt); } + public Status withContainerImage(DockerImage containerImage) { return new Status(reboot, vespaVersion, Optional.of(containerImage), failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); } /** Returns the container image the node is running, if any */ public Optional<DockerImage> containerImage() { return containerImage; } - public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, osVersion, firmwareVerifiedAt); } + public Status withIncreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount + 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); } - public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, osVersion, firmwareVerifiedAt); } + public Status withDecreasedFailCount() { return new Status(reboot, vespaVersion, containerImage, failCount - 1, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); } - public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, osVersion, firmwareVerifiedAt); } + public Status withFailCount(int value) { return new Status(reboot, vespaVersion, containerImage, value, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); } /** Returns how many times this node has been moved to the failed state. */ public int failCount() { return failCount; } /** Returns a copy of this with the want to retire/deprovision/rebuild flags changed */ public Status withWantToRetire(boolean wantToRetire, boolean wantToDeprovision, boolean wantToRebuild) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, osVersion, firmwareVerifiedAt); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); } /** * Returns whether this node is requested to retire. This is a hard request to retire, which allows any replacement * to increase node skew in the cluster. */ - public boolean wantToRetire() { - return wantToRetire; - } + public boolean wantToRetire() { return wantToRetire; } - /** Returns whether this node should be de-provisioned when possible. */ - public boolean wantToDeprovision() { - return wantToDeprovision; - } + /** + * Returns whether this node should be de-provisioned when possible. + */ + public boolean wantToDeprovision() { return wantToDeprovision; } /** Returns whether this node should be rebuilt when possible. */ - public boolean wantToRebuild() { - return wantToRebuild; - } + public boolean wantToRebuild() { return wantToRebuild; } /** * Returns whether this node is requested to retire. Unlike {@link Status#wantToRetire()}, this is a soft * request to retire, which will not allow any replacement to increase node skew in the cluster. */ - public boolean preferToRetire() { - return preferToRetire; + public boolean preferToRetire() { return preferToRetire; } + + /** Returns a copy of this with want to fail set to the given value */ + public Status withWantToFail(boolean wantToFail) { + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); } + /** Returns whether this node should be failed */ + public boolean wantToFail() { return wantToFail; } + /** Returns a copy of this with prefer-to-retire set to given value */ public Status withPreferToRetire(boolean preferToRetire) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, osVersion, firmwareVerifiedAt); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, firmwareVerifiedAt); } /** Returns a copy of this with the OS version set to given version */ public Status withOsVersion(OsVersion version) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, version, firmwareVerifiedAt); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, version, firmwareVerifiedAt); } /** Returns the OS version of this node */ @@ -129,7 +134,7 @@ public class Status { /** Returns a copy of this with the firmwareVerifiedAt set to the given instant. */ public Status withFirmwareVerifiedAt(Instant instant) { - return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, osVersion, Optional.of(instant)); + return new Status(reboot, vespaVersion, containerImage, failCount, wantToRetire, wantToDeprovision, wantToRebuild, preferToRetire, wantToFail, osVersion, Optional.of(instant)); } /** Returns the last time this node had firmware that was verified to be up to date. */ @@ -140,7 +145,7 @@ public class Status { /** Returns the initial status of a newly provisioned node */ public static Status initial() { return new Status(Generation.initial(), Optional.empty(), Optional.empty(), 0, false, - false, false, false, OsVersion.EMPTY, Optional.empty()); + false, false, false, false, OsVersion.EMPTY, Optional.empty()); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 5c006f6d6a0..cc3fd75a22c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -84,6 +84,7 @@ public class NodeSerializer { private static final String wantToDeprovisionKey = "wantToDeprovision"; private static final String wantToRebuildKey = "wantToRebuild"; private static final String preferToRetireKey = "preferToRetire"; + private static final String wantToFailKey = "wantToFailKey"; private static final String osVersionKey = "osVersion"; private static final String wantedOsVersionKey = "wantedOsVersion"; private static final String firmwareCheckKey = "firmwareCheck"; @@ -165,6 +166,7 @@ public class NodeSerializer { object.setBool(wantToRetireKey, node.status().wantToRetire()); object.setBool(preferToRetireKey, node.status().preferToRetire()); object.setBool(wantToDeprovisionKey, node.status().wantToDeprovision()); + object.setBool(wantToFailKey, node.status().wantToFail()); object.setBool(wantToRebuildKey, node.status().wantToRebuild()); node.allocation().ifPresent(allocation -> toSlime(allocation, object.setObject(instanceKey))); toSlime(node.history(), object.setArray(historyKey)); @@ -275,6 +277,7 @@ public class NodeSerializer { object.field(wantToDeprovisionKey).asBool(), object.field(wantToRebuildKey).asBool(), object.field(preferToRetireKey).asBool(), + object.field(wantToFailKey).asBool(), new OsVersion(versionFromSlime(object.field(osVersionKey)), versionFromSlime(object.field(wantedOsVersionKey))), instantFromSlime(object.field(firmwareCheckKey))); @@ -416,6 +419,7 @@ public class NodeSerializer { case "reserved" : return History.Event.Type.reserved; case "activated" : return History.Event.Type.activated; case "wantToRetire": return History.Event.Type.wantToRetire; + case "wantToFail": return History.Event.Type.wantToFail; case "retired" : return History.Event.Type.retired; case "deactivated" : return History.Event.Type.deactivated; case "parked" : return History.Event.Type.parked; @@ -441,6 +445,7 @@ public class NodeSerializer { case reserved : return "reserved"; case activated : return "activated"; case wantToRetire: return "wantToRetire"; + case wantToFail: return "wantToFail"; case retired : return "retired"; case deactivated : return "deactivated"; case parked : return "parked"; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java index 03db7e0e7e5..cd0ec2c5d66 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/Activator.java @@ -88,7 +88,7 @@ class Activator { List<Node> activeToRemove = removeHostsFromList(hostnames, oldActive); activeToRemove = activeToRemove.stream().map(Node::unretire).collect(Collectors.toList()); // only active nodes can be retired. TODO: Move this line to deactivate - nodeRepository.nodes().deactivate(activeToRemove, transaction); // TODO: Pass activation time in this call and next line + deactivate(activeToRemove, transaction); // TODO: Pass activation time in this call and next line nodeRepository.nodes().activate(newActive, transaction.nested()); // activate also continued active to update node state rememberResourceChange(transaction, generation, activationTime, @@ -97,6 +97,12 @@ class Activator { unreserveParentsOf(reservedToActivate); } + private void deactivate(List<Node> toDeactivateList, ApplicationTransaction transaction) { + NodeList toDeactivate = NodeList.copyOf(toDeactivateList); + nodeRepository.nodes().deactivate(toDeactivate.not().failing().asList(), transaction); + nodeRepository.nodes().fail(toDeactivate.failing().asList(), transaction); + } + private void rememberResourceChange(ApplicationTransaction transaction, long generation, Instant at, NodeList oldNodes, NodeList newNodes) { Optional<Application> application = nodeRepository.applications().get(transaction.application()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java index 3d3a54774e4..720548c2d99 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeAllocation.java @@ -111,6 +111,7 @@ class NodeAllocation { if ( ! membership.cluster().satisfies(cluster)) continue; // wrong cluster id/type if ((! candidate.isSurplus || saturated()) && ! membership.cluster().group().equals(cluster.group())) continue; // wrong group and we can't or have no reason to change it if ( candidate.state() == Node.State.active && allocation.isRemovable()) continue; // don't accept; causes removal + if ( candidate.state() == Node.State.active && candidate.wantToFail()) continue; // don't accept; causes failing if ( indexes.contains(membership.index())) continue; // duplicate index (just to be sure) boolean resizeable = requestedNodes.considerRetiring() && candidate.isResizable; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java index 85e73245508..5adde885276 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodeCandidate.java @@ -57,7 +57,6 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat /** This node can be resized to the new NodeResources */ final boolean isResizable; - private NodeCandidate(NodeResources freeParentCapacity, Optional<Node> parent, boolean violatesSpares, boolean exclusiveSwitch, boolean isSurplus, boolean isNew, boolean isResizeable) { if (isResizeable && isNew) throw new IllegalArgumentException("A new node cannot be resizable"); @@ -79,6 +78,8 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public abstract boolean preferToRetire(); + public abstract boolean wantToFail(); + public abstract Flavor flavor(); public abstract NodeCandidate allocate(ApplicationId owner, ClusterMembership membership, NodeResources requestedResources, Instant at); @@ -301,6 +302,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public boolean preferToRetire() { return node.status().preferToRetire(); } @Override + public boolean wantToFail() { return node.status().wantToFail(); } + + @Override public Flavor flavor() { return node.flavor(); } @Override @@ -387,6 +391,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public boolean preferToRetire() { return false; } @Override + public boolean wantToFail() { return false; } + + @Override public Flavor flavor() { return new Flavor(resources); } @Override @@ -483,6 +490,9 @@ public abstract class NodeCandidate implements Nodelike, Comparable<NodeCandidat public boolean preferToRetire() { return false; } @Override + public boolean wantToFail() { return false; } + + @Override public Flavor flavor() { return new Flavor(resources); } @Override diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java index db7bc394089..6ab8fc8ad49 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java @@ -206,7 +206,7 @@ public class NodePrioritizer { /** Returns whether we are allocating to replace a failed node */ private boolean isReplacement(NodeList nodesInCluster) { - int failedNodesInCluster = nodesInCluster.state(Node.State.failed).size(); + int failedNodesInCluster = nodesInCluster.failing().size(); if (failedNodesInCluster == 0) return false; return ! requestedNodes.fulfilledBy(nodesInCluster.size() - failedNodesInCluster); } diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java index 27f5b45173b..3f66f9cadc4 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailTester.java @@ -52,6 +52,7 @@ public class NodeFailTester { public static final ApplicationId tenantHostApp = ApplicationId.from("hosted-vespa", "tenant-host", "default"); public static final ApplicationId app1 = ApplicationId.from("foo1", "bar", "fuz"); public static final ApplicationId app2 = ApplicationId.from("foo2", "bar", "fuz"); + public static final ClusterSpec.Id testCluster = ClusterSpec.Id.from("test"); public static final NodeFlavors hostFlavors = FlavorConfigBuilder.createDummies("default", "docker"); private static final Duration downtimeLimitOneHour = Duration.ofMinutes(60); @@ -96,8 +97,8 @@ public class NodeFailTester { tester.createHostNodes(3); // Create applications - ClusterSpec clusterApp1 = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test")).vespaVersion("6.42").build(); - ClusterSpec clusterApp2 = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test")).vespaVersion("6.42").build(); + ClusterSpec clusterApp1 = ClusterSpec.request(ClusterSpec.Type.container, testCluster).vespaVersion("6.42").build(); + ClusterSpec clusterApp2 = ClusterSpec.request(ClusterSpec.Type.content, testCluster).vespaVersion("6.42").build(); Capacity capacity1 = Capacity.from(new ClusterResources(5, 1, nodeResources), false, true); Capacity capacity2 = Capacity.from(new ClusterResources(7, 1, nodeResources), false, true); @@ -125,8 +126,8 @@ public class NodeFailTester { // Create applications ClusterSpec clusterNodeAdminApp = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("node-admin")).vespaVersion("6.42").build(); - ClusterSpec clusterApp1 = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test")).vespaVersion("6.75.0").build(); - ClusterSpec clusterApp2 = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("test")).vespaVersion("6.75.0").build(); + ClusterSpec clusterApp1 = ClusterSpec.request(ClusterSpec.Type.container, testCluster).vespaVersion("6.75.0").build(); + ClusterSpec clusterApp2 = ClusterSpec.request(ClusterSpec.Type.content, testCluster).vespaVersion("6.75.0").build(); Capacity allHosts = Capacity.fromRequiredNodeType(NodeType.host); Capacity capacity1 = Capacity.from(new ClusterResources(3, 1, new NodeResources(1, 4, 100, 0.3)), false, true); Capacity capacity2 = Capacity.from(new ClusterResources(5, 1, new NodeResources(1, 4, 100, 0.3)), false, true); @@ -150,7 +151,7 @@ public class NodeFailTester { NodeFailTester tester = new NodeFailTester(); // Create applications - ClusterSpec clusterApp = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test")).vespaVersion("6.42").build(); + ClusterSpec clusterApp = ClusterSpec.request(ClusterSpec.Type.container, testCluster).vespaVersion("6.42").build(); Map<ApplicationId, MockDeployer.ApplicationContext> apps = Map.of(app1, new MockDeployer.ApplicationContext(app1, clusterApp, capacity)); tester.initializeMaintainers(apps); return tester; diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java index ca1fa2831b8..bb954058916 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailerTest.java @@ -42,7 +42,6 @@ import static org.mockito.Mockito.when; */ public class NodeFailerTest { - private static final Report badTotalMemorySizeReport = Report.basicReport( "badTotalMemorySize", HARD_FAIL, Instant.now(), "too low"); @@ -308,6 +307,10 @@ public class NodeFailerTest { tester.highestIndex(tester.nodeRepository.nodes().list(Node.State.active).owner(NodeFailTester.app1)).allocation().get().membership().index() > lastNode.allocation().get().membership().index()); + + assertEquals("Node failing does not cause recording of scaling events", + 1, + tester.nodeRepository.applications().get(NodeFailTester.app1).get().cluster(NodeFailTester.testCluster).get().scalingEvents().size()); } @Test |