diff options
Diffstat (limited to 'node-repository/src/main/java/com')
9 files changed, 63 insertions, 31 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 9d9a1304418..368a8da0f90 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -12,7 +12,6 @@ import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.jdisc.Metric; import com.yahoo.lang.MutableInteger; -import com.yahoo.transaction.Mutex; import com.yahoo.vespa.flags.FlagSource; import com.yahoo.vespa.flags.JacksonFlag; import com.yahoo.vespa.flags.ListFlag; @@ -77,16 +76,14 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { @Override protected double maintain() { - try (Mutex lock = nodeRepository().nodes().lockUnallocated()) { - NodeList nodes = nodeRepository().nodes().list(); - resumeProvisioning(nodes, lock); - convergeToCapacity(nodes); - } + NodeList nodes = nodeRepository().nodes().list(); + resumeProvisioning(nodes); + convergeToCapacity(nodes); return 1.0; } /** Resume provisioning of already provisioned hosts and their children */ - private void resumeProvisioning(NodeList nodes, Mutex lock) { + private void resumeProvisioning(NodeList nodes) { Map<String, Set<Node>> nodesByProvisionedParentHostname = nodes.nodeType(NodeType.tenant, NodeType.config, NodeType.controller) .asList() @@ -97,9 +94,11 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { nodes.state(Node.State.provisioned).nodeType(NodeType.host, NodeType.confighost, NodeType.controllerhost).forEach(host -> { Set<Node> children = nodesByProvisionedParentHostname.getOrDefault(host.hostname(), Set.of()); try { - List<Node> updatedNodes = hostProvisioner.provision(host, children); - verifyDns(updatedNodes); - nodeRepository().nodes().write(updatedNodes, lock); + try (var lock = nodeRepository().nodes().lockUnallocated()) { + List<Node> updatedNodes = hostProvisioner.provision(host, children); + verifyDns(updatedNodes); + nodeRepository().nodes().write(updatedNodes, lock); + } } catch (IllegalArgumentException | IllegalStateException e) { log.log(Level.INFO, "Could not provision " + host.hostname() + " with " + children.size() + " children, will retry in " + interval() + ": " + Exceptions.toMessageString(e)); @@ -189,17 +188,12 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { private List<Node> candidatesForRemoval(List<Node> nodes) { Map<String, Node> hostsByHostname = new HashMap<>(nodes.stream() - .filter(node -> { - switch (node.type()) { - case host: - // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here - return node.state() != Node.State.parked || node.status().wantToDeprovision(); - case confighost: - case controllerhost: - return node.state() == Node.State.parked && node.status().wantToDeprovision(); - default: - return false; - } + .filter(node -> switch (node.type()) { + case host -> + // TODO: Mark empty tenant hosts as wanttoretire & wanttodeprovision elsewhere, then handle as confighost here + node.state() != Node.State.parked || node.status().wantToDeprovision(); + case confighost, controllerhost -> node.state() == Node.State.parked && node.status().wantToDeprovision(); + default -> false; }) .collect(Collectors.toMap(Node::hostname, Function.identity()))); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java index ac804f99cd3..c2d4506a28c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.provision.node; import com.google.common.collect.ImmutableMap; import com.yahoo.vespa.hosted.provision.Node; +import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.Collection; @@ -50,6 +51,12 @@ public class History { return builder.build(); } + /** Returns the age of this node as best as we can determine: The time since the first event registered for it */ + public Duration age(Instant now) { + Instant oldestEventTime = events.values().stream().map(event -> event.at()).sorted().findFirst().orElse(now); + return Duration.between(oldestEventTime, now); + } + /** Returns the last event of given type, if it is present in this history */ public Optional<Event> event(Event.Type type) { return Optional.ofNullable(events.get(type)); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeListFilter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeListFilter.java index 8578e3eb5ec..2b790ff7392 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeListFilter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/filter/NodeListFilter.java @@ -30,4 +30,5 @@ public class NodeListFilter { public static Predicate<Node> from(List<Node> nodes) { return makePredicate(nodes); } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java index 30fd2713017..4178d4a6328 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/DelegatingOsUpgrader.java @@ -7,6 +7,7 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.filter.NodeListFilter; +import java.time.Instant; import java.util.Objects; import java.util.Optional; import java.util.logging.Logger; @@ -39,8 +40,10 @@ public class DelegatingOsUpgrader implements OsUpgrader { public void upgradeTo(OsVersionTarget target) { NodeList activeNodes = nodeRepository.nodes().list(Node.State.active).nodeType(target.nodeType()); int numberToUpgrade = Math.max(0, maxActiveUpgrades - activeNodes.changingOsVersionTo(target.version()).size()); + Instant now = nodeRepository.clock().instant(); NodeList nodesToUpgrade = activeNodes.not().changingOsVersionTo(target.version()) .osVersionIsBefore(target.version()) + .matching(node -> canUpgradeAt(now, node)) .byIncreasingOsVersion() .first(numberToUpgrade); if (nodesToUpgrade.size() == 0) return; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java index 5310ef339ed..4140de76368 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsUpgrader.java @@ -2,6 +2,9 @@ package com.yahoo.vespa.hosted.provision.os; import com.yahoo.config.provision.NodeType; +import com.yahoo.vespa.hosted.provision.Node; + +import java.time.Instant; /** * Interface for an OS upgrader. @@ -16,4 +19,9 @@ public interface OsUpgrader { /** Disable OS upgrade for all nodes of given type */ void disableUpgrade(NodeType type); + /** Returns whether node can upgrade at given instant */ + default boolean canUpgradeAt(Instant instant, Node node) { + return true; + } + } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java index 7c6d1cb69db..440046ab818 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/OsVersions.java @@ -84,7 +84,7 @@ public class OsVersions { Version target = Optional.ofNullable(change.targets().get(nodeType)) .map(OsVersionTarget::version) .orElse(Version.emptyVersion); - chooseUpgrader(nodeType, target).disableUpgrade(nodeType); + chooseUpgrader(nodeType, Optional.of(target)).disableUpgrade(nodeType); return change.withoutTarget(nodeType); }); } @@ -120,7 +120,7 @@ public class OsVersions { try (Lock lock = db.lockOsVersionChange()) { OsVersionTarget target = readChange().targets().get(nodeType); if (target == null) return; // No target set for this type - OsUpgrader upgrader = chooseUpgrader(nodeType, target.version()); + OsUpgrader upgrader = chooseUpgrader(nodeType, Optional.of(target.version())); if (resume) { upgrader.upgradeTo(target); } else { @@ -129,17 +129,23 @@ public class OsVersions { } } + /** Returns whether node can be upgraded now */ + public boolean canUpgrade(Node node) { + return chooseUpgrader(node.type(), Optional.empty()).canUpgradeAt(nodeRepository.clock().instant(), node); + } + /** Returns the upgrader to use when upgrading given node type to target */ - private OsUpgrader chooseUpgrader(NodeType nodeType, Version target) { + private OsUpgrader chooseUpgrader(NodeType nodeType, Optional<Version> target) { if (reprovisionToUpgradeOs) { return new RetiringOsUpgrader(nodeRepository); } // Require rebuild if we have any nodes of this type on a major version lower than target - boolean rebuildRequired = nodeRepository.nodes().list(Node.State.active).nodeType(nodeType).stream() + boolean rebuildRequired = target.isPresent() && + nodeRepository.nodes().list(Node.State.active).nodeType(nodeType).stream() .map(Node::status) .map(Status::osVersion) .anyMatch(osVersion -> osVersion.current().isPresent() && - osVersion.current().get().getMajor() < target.getMajor()); + osVersion.current().get().getMajor() < target.get().getMajor()); if (rebuildRequired) { return new RebuildingOsUpgrader(nodeRepository); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java index efc377e6cc3..f96effe9e10 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RebuildingOsUpgrader.java @@ -47,7 +47,7 @@ public class RebuildingOsUpgrader implements OsUpgrader { public void upgradeTo(OsVersionTarget target) { NodeList allNodes = nodeRepository.nodes().list(); Instant now = nodeRepository.clock().instant(); - rebuildableHosts(target, allNodes).forEach(host -> rebuild(host, target.version(), now)); + rebuildableHosts(target, allNodes, now).forEach(host -> rebuild(host, target.version(), now)); } @Override @@ -62,7 +62,7 @@ public class RebuildingOsUpgrader implements OsUpgrader { return Math.max(0, limit - hostsOfType.rebuilding().size()); } - private List<Node> rebuildableHosts(OsVersionTarget target, NodeList allNodes) { + private List<Node> rebuildableHosts(OsVersionTarget target, NodeList allNodes, Instant now) { NodeList hostsOfTargetType = allNodes.nodeType(target.nodeType()); int rebuildLimit = rebuildLimit(target.nodeType(), hostsOfTargetType); @@ -76,6 +76,7 @@ public class RebuildingOsUpgrader implements OsUpgrader { NodeList candidates = hostsOfTargetType.state(Node.State.active) .not().rebuilding() .osVersionIsBefore(target.version()) + .matching(node -> canUpgradeAt(now, node)) .byIncreasingOsVersion(); for (Node host : candidates) { if (hostsToRebuild.size() == rebuildLimit) break; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java index d923c78a929..79b7441cc34 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/os/RetiringOsUpgrader.java @@ -26,6 +26,9 @@ public class RetiringOsUpgrader implements OsUpgrader { private static final Logger LOG = Logger.getLogger(RetiringOsUpgrader.class.getName()); + /** The duration this leaves new nodes alone before scheduling any upgrade */ + static final Duration GRACE_PERIOD = Duration.ofDays(30); + protected final NodeRepository nodeRepository; public RetiringOsUpgrader(NodeRepository nodeRepository) { @@ -33,21 +36,27 @@ public class RetiringOsUpgrader implements OsUpgrader { } @Override - public final void upgradeTo(OsVersionTarget target) { + public void upgradeTo(OsVersionTarget target) { NodeList allNodes = nodeRepository.nodes().list(); Instant now = nodeRepository.clock().instant(); NodeList candidates = candidates(now, target, allNodes); candidates.not().deprovisioning() + .matching(node -> canUpgradeAt(now, node)) .byIncreasingOsVersion() .first(1) .forEach(node -> deprovision(node, target.version(), now)); } @Override - public final void disableUpgrade(NodeType type) { + public void disableUpgrade(NodeType type) { // No action needed in this implementation. } + @Override + public boolean canUpgradeAt(Instant instant, Node node) { + return node.history().age(instant).compareTo(GRACE_PERIOD) > 0; + } + /** Returns nodes that are candidates for upgrade */ private NodeList candidates(Instant instant, OsVersionTarget target, NodeList allNodes) { NodeList activeNodes = allNodes.state(Node.State.active).nodeType(target.nodeType()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java index 3659166c9da..efd76187bc6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java @@ -161,7 +161,10 @@ class NodesResponse extends SlimeJsonResponse { object.setLong("rebootGeneration", node.status().reboot().wanted()); object.setLong("currentRebootGeneration", node.status().reboot().current()); node.status().osVersion().current().ifPresent(version -> object.setString("currentOsVersion", version.toFullString())); - node.status().osVersion().wanted().ifPresent(version -> object.setString("wantedOsVersion", version.toFullString())); + node.status().osVersion().wanted().ifPresent(version -> { + object.setString("wantedOsVersion", version.toFullString()); + object.setBool("deferOsUpgrade", !nodeRepository.osVersions().canUpgrade(node)); + }); node.status().firmwareVerifiedAt().ifPresent(instant -> object.setLong("currentFirmwareCheck", instant.toEpochMilli())); if (node.type().isHost()) nodeRepository.firmwareChecks().requiredAfter().ifPresent(after -> object.setLong("wantedFirmwareCheck", after.toEpochMilli())); |