From 9b516b5c0d8d0a679c321f1c061afadf6566e9d4 Mon Sep 17 00:00:00 2001 From: HÃ¥kon Hallingstad Date: Wed, 28 Feb 2024 16:00:52 +0100 Subject: Avoid expiring failed host with children to dirty --- .../hosted/provision/maintenance/FailedExpirer.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'node-repository/src/main/java/com/yahoo/vespa') diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java index c3fea72fab9..ced1776bb62 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java @@ -108,7 +108,22 @@ public class FailedExpirer extends NodeRepositoryMaintainer { return Optional.empty(); } } else { - return Optional.of(nodeRepository.nodes().deallocate(node, Agent.FailedExpirer, "Expired by FailedExpirer")); + List childrenBlockingDirtying = children + .stream() + // Examples: a failed child node may have an index we want to preserve. A dirty child node has + // log we want to sync. A parked child w/o wTD may have been parked by an operator for inspection. + .filter(child -> child.state() != Node.State.parked || !child.status().wantToDeprovision()) + .map(Node::hostname) + .toList(); + + if (childrenBlockingDirtying.isEmpty()) { + return Optional.of(nodeRepository.nodes().deallocate(node, Agent.FailedExpirer, "Expired by FailedExpirer")); + } else { + log.info(String.format("Expired failed host %s was not dirtied because it has children: %s", + node.hostname(), String.join(", ", childrenBlockingDirtying))); + return Optional.empty(); + } + } } -- cgit v1.2.3