diff options
author | Jon Bratseth <bratseth@gmail.com> | 2022-04-12 18:25:00 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-12 18:25:00 +0200 |
commit | 6e15dfa09d2fcdf9470fa873ec6f256ff7c42bb3 (patch) | |
tree | 2fdd489d7ea366bcaac79c994b392b2f4fe87acc /node-repository/src | |
parent | 4f22734462a0fac9dcc4fcdc8542218bb849c842 (diff) |
Revert "Preserve all node events"
Diffstat (limited to 'node-repository/src')
24 files changed, 148 insertions, 198 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java index 853c67dc169..3db68a27234 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/Node.java @@ -347,24 +347,19 @@ public final class Node implements Nodelike { allocation, history, type, reports, Optional.empty(), reservedTo, exclusiveToApplicationId, exclusiveToClusterType, switchHostname, trustStoreItems); } - /** Returns a copy of this with a history record saying it was detected to be down at given instant */ + /** Returns a copy of this with a history record saying it was detected to be down at this instant */ public Node downAt(Instant instant, Agent agent) { return with(history.with(new History.Event(History.Event.Type.down, agent, instant))); } - /** Returns a copy of this with a history record saying it was detected to be up at given instant */ - public Node upAt(Instant instant, Agent agent) { - return with(history.with(new History.Event(History.Event.Type.up, agent, instant))); + /** Returns a copy of this with any history record saying it has been detected down removed */ + public Node up() { + return with(history.without(History.Event.Type.down)); } - /** Returns whether this node is down, according to its recorded 'down' and 'up' events */ + /** Returns whether this node has a record of being down */ public boolean isDown() { - Optional<Instant> downAt = history().lastEvent(History.Event.Type.down).map(History.Event::at); - if (downAt.isEmpty()) return false; - - Optional<Instant> upAt = history().lastEvent(History.Event.Type.up).map(History.Event::at); - if (upAt.isEmpty()) return true; - return !downAt.get().isBefore(upAt.get()); + return history().event(History.Event.Type.down).isPresent(); } /** Returns a copy of this with allocation set as specified. <code>node.state</code> is *not* changed. */ @@ -449,15 +444,12 @@ public final class Node implements Nodelike { /** Returns a copy of this node with the current OS version set to the given version at the given instant */ public Node withCurrentOsVersion(Version version, Instant instant) { - Optional<Version> newVersion = Optional.of(version); - if (status.osVersion().current().equals(newVersion)) return this; // No change - - History newHistory = history(); - // Only update history if version was non-empty and changed to a different version - if (status.osVersion().current().isPresent() && !status.osVersion().current().equals(newVersion)) { + var newStatus = status.withOsVersion(status.osVersion().withCurrent(Optional.of(version))); + var newHistory = history(); + // Only update history if version has changed + if (status.osVersion().current().isEmpty() || !status.osVersion().current().get().equals(version)) { newHistory = history.with(new History.Event(History.Event.Type.osUpgraded, Agent.system, instant)); } - Status newStatus = status.withOsVersion(status.osVersion().withCurrent(newVersion)); return this.with(newStatus).with(newHistory); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java index 45a90daac96..eab9f755db2 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/AutoscalingMaintainer.java @@ -107,7 +107,7 @@ public class AutoscalingMaintainer extends NodeRepositoryMaintainer { // Scaling event is complete if: // - 1. no nodes which was retired by this are still present (which also implies data distribution is complete) if (clusterNodes.retired().stream() - .anyMatch(node -> node.history().hasLastEventAt(event.at(), History.Event.Type.retired))) + .anyMatch(node -> node.history().hasEventAt(History.Event.Type.retired, event.at()))) return cluster; // - 2. all nodes have switched to the right config generation (currently only measured on containers) for (var nodeTimeseries : nodeRepository().metricsDb().getNodeTimeseries(Duration.between(event.at(), clock().instant()), diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java index 459ab6a3e1c..6eaee7b33de 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/DynamicProvisioningMaintainer.java @@ -6,9 +6,9 @@ import com.yahoo.component.Vtag; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ClusterMembership; import com.yahoo.config.provision.ClusterSpec; -import com.yahoo.config.provision.NodeAllocationException; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; +import com.yahoo.config.provision.NodeAllocationException; import com.yahoo.jdisc.Metric; import com.yahoo.lang.MutableInteger; import com.yahoo.transaction.Mutex; @@ -164,7 +164,8 @@ public class DynamicProvisioningMaintainer extends NodeRepositoryMaintainer { } return candidatesForRemoval(nodes).stream() - .sorted(Comparator.comparing(node -> node.history().asList().stream().findFirst().map(History.Event::at).orElse(Instant.MIN))) + .sorted(Comparator.comparing(node -> node.history().events().stream() + .map(History.Event::at).min(Comparator.naturalOrder()).orElse(Instant.MIN))) .filter(node -> { if (!sharedHosts.containsKey(node.hostname()) || sharedHosts.size() > minCount) { sharedHosts.remove(node.hostname()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java index ced0a161e59..fa6b201def4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ExpeditedChangeApplicationMaintainer.java @@ -78,11 +78,11 @@ public class ExpeditedChangeApplicationMaintainer extends ApplicationMaintainer List<String> reasons = nodes.stream() .flatMap(node -> node.history() - .asList() - .stream() - .filter(event -> expediteChangeBy(event.agent())) - .filter(event -> lastDeployTime.get().isBefore(event.at())) - .map(event -> event.type() + (event.agent() == Agent.system ? "" : " by " + event.agent()))) + .events() + .stream() + .filter(event -> expediteChangeBy(event.agent())) + .filter(event -> lastDeployTime.get().isBefore(event.at())) + .map(event -> event.type() + (event.agent() == Agent.system ? "" : " by " + event.agent()))) .sorted() .distinct() .collect(Collectors.toList()); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java index 51d73bd8a58..5a77fcca85c 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/Expirer.java @@ -57,7 +57,7 @@ public abstract class Expirer extends NodeRepositoryMaintainer { } protected final boolean isExpired(Node node, Duration expiryTime) { - return node.history().hasLastEventBefore(clock().instant().minus(expiryTime), eventType); + return node.history().hasEventBefore(eventType, clock().instant().minus(expiryTime)); } /** Implement this callback to take action to expire these nodes */ diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java index af17260ad84..3274f12dbc6 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/FailedExpirer.java @@ -73,10 +73,10 @@ public class FailedExpirer extends NodeRepositoryMaintainer { recycleIf(remainingNodes, node -> node.allocation().isEmpty()); recycleIf(remainingNodes, node -> !node.allocation().get().membership().cluster().isStateful() && - node.history().hasLastEventBefore(clock().instant().minus(statelessExpiry), History.Event.Type.failed)); + node.history().hasEventBefore(History.Event.Type.failed, clock().instant().minus(statelessExpiry))); recycleIf(remainingNodes, node -> node.allocation().get().membership().cluster().isStateful() && - node.history().hasLastEventBefore(clock().instant().minus(statefulExpiry), History.Event.Type.failed)); + node.history().hasEventBefore(History.Event.Type.failed, clock().instant().minus(statefulExpiry))); return 1.0; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java index b117523dce0..1fe29c8b162 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/MetricsReporter.java @@ -20,6 +20,7 @@ import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.NodeRepository; import com.yahoo.vespa.hosted.provision.node.Allocation; import com.yahoo.vespa.hosted.provision.node.ClusterId; +import com.yahoo.vespa.hosted.provision.node.History; import com.yahoo.vespa.hosted.provision.persistence.CacheStats; import com.yahoo.vespa.service.monitor.ServiceModel; import com.yahoo.vespa.service.monitor.ServiceMonitor; @@ -247,8 +248,8 @@ public class MetricsReporter extends NodeRepositoryMaintainer { boolean down = NodeHealthTracker.allDown(services); metric.set("nodeFailerBadNode", (down ? 1 : 0), context); - boolean recordedDown = node.isDown(); - metric.set("downInNodeRepo", (recordedDown ? 1 : 0), context); + boolean nodeDownInNodeRepo = node.history().event(History.Event.Type.down).isPresent(); + metric.set("downInNodeRepo", (nodeDownInNodeRepo ? 1 : 0), context); } metric.set("numberOfServices", numberOfServices, context); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java index 661ba531de5..3900d10a53e 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeFailer.java @@ -155,9 +155,9 @@ public class NodeFailer extends NodeRepositoryMaintainer { for (Node node : activeNodes) { Instant graceTimeStart = clock().instant().minus(nodeRepository().nodes().suspended(node) ? suspendedDownTimeLimit : downTimeLimit); - if (downBefore(graceTimeStart, node) && !applicationSuspended(node)) { + if (node.history().hasEventBefore(History.Event.Type.down, graceTimeStart) && !applicationSuspended(node)) { // Allow a grace period after node re-activation - if (!node.history().hasLastEventAfter(graceTimeStart, History.Event.Type.activated)) + if (!node.history().hasEventAfter(History.Event.Type.activated, graceTimeStart)) failingNodes.add(new FailingNode(node, "Node has been down longer than " + downTimeLimit)); } } @@ -278,11 +278,6 @@ public class NodeFailer extends NodeRepositoryMaintainer { } } - /** Returns whether node is down, and has been down since before given instant */ - private static boolean downBefore(Instant instant, Node node) { - return node.isDown() && node.history().hasLastEventBefore(instant, History.Event.Type.down); - } - private void wantToFail(Node node, boolean wantToFail, Mutex lock) { nodeRepository().nodes().write(node.withWantToFail(wantToFail, Agent.NodeFailer, clock().instant()), lock); } @@ -293,8 +288,8 @@ public class NodeFailer extends NodeRepositoryMaintainer { Instant startOfThrottleWindow = clock().instant().minus(throttlePolicy.throttleWindow); NodeList allNodes = nodeRepository().nodes().list(); NodeList recentlyFailedNodes = allNodes.state(Node.State.failed) - .matching(n -> n.history().hasLastEventAfter(startOfThrottleWindow, History.Event.Type.failed - )); + .matching(n -> n.history().hasEventAfter(History.Event.Type.failed, + startOfThrottleWindow)); // Allow failing any node within policy if (recentlyFailedNodes.size() < throttlePolicy.allowedToFailOf(allNodes.size())) return false; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java index ab2cc2da257..874ff91d8a4 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeHealthTracker.java @@ -61,7 +61,7 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer { Optional<Instant> lastLocalRequest = hostLivenessTracker.lastRequestFrom(node.hostname()); if (lastLocalRequest.isEmpty()) continue; - if (!node.history().hasLastEventAfter(lastLocalRequest.get(), History.Event.Type.requested)) { + if (!node.history().hasEventAfter(History.Event.Type.requested, lastLocalRequest.get())) { History updatedHistory = node.history() .with(new History.Event(History.Event.Type.requested, Agent.NodeHealthTracker, lastLocalRequest.get())); nodeRepository().nodes().write(node.with(updatedHistory), lock); @@ -96,7 +96,7 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer { if (isDown) { recordAsDown(node.get(), lock); } else { - recordAsUp(node.get(), lock); + clearDownRecord(node.get(), lock); } } catch (ApplicationLockException e) { // Fine, carry on with other nodes. We'll try updating this one in the next run @@ -129,14 +129,14 @@ public class NodeHealthTracker extends NodeRepositoryMaintainer { /** Record a node as down if not already recorded */ private void recordAsDown(Node node, Mutex lock) { - if (node.isDown()) return; // already down: Don't change down timestamp + if (node.history().event(History.Event.Type.down).isPresent()) return; // already down: Don't change down timestamp nodeRepository().nodes().write(node.downAt(clock().instant(), Agent.NodeHealthTracker), lock); } /** Clear down record for node, if any */ - private void recordAsUp(Node node, Mutex lock) { - if (!node.isDown()) return; // already up: Don't change up timestamp - nodeRepository().nodes().write(node.upAt(clock().instant(), Agent.NodeHealthTracker), lock); + private void clearDownRecord(Node node, Mutex lock) { + if (node.history().event(History.Event.Type.down).isEmpty()) return; + nodeRepository().nodes().write(node.up(), lock); } } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java index 6093977bc9d..3cd97c64e4d 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooter.java @@ -55,11 +55,11 @@ public class NodeRebooter extends NodeRepositoryMaintainer { var rebootEvents = EnumSet.of(History.Event.Type.provisioned, History.Event.Type.rebooted, History.Event.Type.osUpgraded); var rebootInterval = Duration.ofDays(rebootIntervalInDays.value()); - Optional<Duration> overdue = node.history().asList().stream() - .filter(event -> rebootEvents.contains(event.type())) - .map(History.Event::at) - .max(Comparator.naturalOrder()) - .map(lastReboot -> Duration.between(lastReboot, clock().instant()).minus(rebootInterval)); + Optional<Duration> overdue = node.history().events().stream() + .filter(event -> rebootEvents.contains(event.type())) + .map(History.Event::at) + .max(Comparator.naturalOrder()) + .map(lastReboot -> Duration.between(lastReboot, clock().instant()).minus(rebootInterval)); if (overdue.isEmpty()) // should never happen as all hosts should have provisioned timestamp return random.nextDouble() < interval().getSeconds() / (double) rebootInterval.getSeconds(); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ProvisionedExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ProvisionedExpirer.java index 39015306ae2..2db02992f40 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ProvisionedExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/ProvisionedExpirer.java @@ -54,7 +54,7 @@ public class ProvisionedExpirer extends Expirer { } private boolean parkedByProvisionedExpirer(Node node) { - return node.history().lastEvent(History.Event.Type.parked) + return node.history().event(History.Event.Type.parked) .map(History.Event::agent) .map(Agent.ProvisionedExpirer::equals) .orElse(false); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java index d4eb02b59b0..73c9a1ab55a 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/maintenance/RetiredExpirer.java @@ -116,7 +116,7 @@ public class RetiredExpirer extends NodeRepositoryMaintainer { // allowing the removal of any config server. return false; } - } else if (node.history().hasLastEventBefore(clock().instant().minus(retiredExpiry), History.Event.Type.retired)) { + } else if (node.history().hasEventBefore(History.Event.Type.retired, clock().instant().minus(retiredExpiry))) { log.warning("Node " + node + " has been retired longer than " + retiredExpiry + ": Allowing removal. This may cause data loss"); return true; } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java index 59e43fd9971..f1e62634235 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/History.java @@ -1,19 +1,17 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.node; +import com.google.common.collect.ImmutableMap; import com.yahoo.vespa.hosted.provision.Node; import java.time.Instant; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; -import java.util.Objects; +import java.util.Collection; +import java.util.Collections; import java.util.Optional; import java.util.stream.Collectors; /** - * An immutable list of events happening to this node, in chronological order. - * + * An immutable record of the last event of each type happening to this node. * Note that the history cannot be used to find the nodes current state - it will have a record of some * event happening in the past even if that event is later undone. * @@ -21,60 +19,72 @@ import java.util.stream.Collectors; */ public class History { - /** The maximum number of events to keep for a node */ - private static final int MAX_SIZE = 15; - - private final List<Event> events; + private final ImmutableMap<Event.Type, Event> events; - public History(List<Event> events) { - this(events, MAX_SIZE); + public History(Collection<Event> events) { + this(toImmutableMap(events)); } - History(List<Event> events, int maxSize) { - this.events = Objects.requireNonNull(events, "events must be non-null") - .stream() - .sorted(Comparator.comparing(Event::at)) - .skip(Math.max(events.size() - maxSize, 0)) - .collect(Collectors.toUnmodifiableList()); + private History(ImmutableMap<Event.Type, Event> events) { + this.events = events; } - /** Returns the last event of given type, if it is present in this history */ - public Optional<Event> lastEvent(Event.Type type) { - return events.stream().filter(event -> event.type() == type).max(Comparator.comparing(Event::at)); + private static ImmutableMap<Event.Type, Event> toImmutableMap(Collection<Event> events) { + ImmutableMap.Builder<Event.Type, Event> builder = new ImmutableMap.Builder<>(); + for (Event event : events) + builder.put(event.type(), event); + return builder.build(); } - /** Returns true if the last event of this type is registered in this history at the given time */ - public boolean hasLastEventAt(Instant time, Event.Type type) { - return lastEvent(type).map(event -> event.at().equals(time)) - .orElse(false); - } + /** Returns this event if it is present in this history */ + public Optional<Event> event(Event.Type type) { return Optional.ofNullable(events.get(type)); } - /** Returns true if the last event of this type is registered after the given time */ - public boolean hasLastEventAfter(Instant time, Event.Type type) { - return lastEvent(type).map(event -> event.at().isAfter(time)) - .orElse(false); + /** Returns true if a given event is registered in this history at the given time */ + public boolean hasEventAt(Event.Type type, Instant time) { + return event(type) + .map(event -> event.at().equals(time)) + .orElse(false); } - /** Returns true if the last event of this type is registered before the given time */ - public boolean hasLastEventBefore(Instant time, Event.Type type) { - return lastEvent(type).map(event -> event.at().isBefore(time)) - .orElse(false); + /** Returns true if a given event is registered in this history after the given time */ + public boolean hasEventAfter(Event.Type type, Instant time) { + return event(type) + .map(event -> event.at().isAfter(time)) + .orElse(false); } - public List<Event> asList() { - return events; + /** Returns true if a given event is registered in this history before the given time */ + public boolean hasEventBefore(Event.Type type, Instant time) { + return event(type) + .map(event -> event.at().isBefore(time)) + .orElse(false); } + public Collection<Event> events() { return events.values(); } + /** Returns a copy of this history with the given event added */ public History with(Event event) { - List<Event> copy = new ArrayList<>(events); - copy.add(event); - return new History(copy); + ImmutableMap.Builder<Event.Type, Event> builder = builderWithout(event.type()); + builder.put(event.type(), event); + return new History(builder.build()); + } + + /** Returns a copy of this history with the given event type removed (or an identical history if it was not present) */ + public History without(Event.Type type) { + return new History(builderWithout(type).build()); + } + + private ImmutableMap.Builder<Event.Type, Event> builderWithout(Event.Type type) { + ImmutableMap.Builder<Event.Type, Event> builder = new ImmutableMap.Builder<>(); + for (Event event : events.values()) + if (event.type() != type) + builder.put(event.type(), event); + return builder; } /** Returns a copy of this history with a record of this state transition added, if applicable */ public History recordStateTransition(Node.State from, Node.State to, Agent agent, Instant at) { - // If the event is a re-reservation, allow the new event to overwrite the older one. + // If the event is a re-reservation, allow the new one to override the older one. if (from == to && from != Node.State.reserved) return this; switch (to) { case provisioned: return this.with(new Event(Event.Type.provisioned, agent, at)); @@ -82,14 +92,7 @@ public class History { case ready: return this.withoutApplicationEvents().with(new Event(Event.Type.readied, agent, at)); case active: return this.with(new Event(Event.Type.activated, agent, at)); case inactive: return this.with(new Event(Event.Type.deactivated, agent, at)); - case reserved: { - History history = this; - if (!events.isEmpty() && events.get(events.size() - 1).type() == Event.Type.reserved) { - // Avoid repeating reserved event - history = new History(events.subList(0, events.size() - 1)); - } - return history.with(new Event(Event.Type.reserved, agent, at)); - } + case reserved: return this.with(new Event(Event.Type.reserved, agent, at)); case failed: return this.with(new Event(Event.Type.failed, agent, at)); case dirty: return this.with(new Event(Event.Type.deallocated, agent, at)); case parked: return this.with(new Event(Event.Type.parked, agent, at)); @@ -103,17 +106,17 @@ public class History { * This returns a copy of this history with all application level events removed. */ private History withoutApplicationEvents() { - return new History(asList().stream().filter(e -> ! e.type().isApplicationLevel()).collect(Collectors.toList())); + return new History(events().stream().filter(e -> ! e.type().isApplicationLevel()).collect(Collectors.toList())); } /** Returns the empty history */ - public static History empty() { return new History(List.of()); } + public static History empty() { return new History(Collections.emptyList()); } @Override public String toString() { if (events.isEmpty()) return "history: (empty)"; StringBuilder b = new StringBuilder("history: "); - for (Event e : events) + for (Event e : events.values()) b.append(e).append(", "); b.setLength(b.length() - 2); // remove last comma return b.toString(); @@ -145,27 +148,27 @@ public class History { readied, reserved, - /** The node was scheduled for retirement (hard) */ + // The node was scheduled for retirement (hard) wantToRetire(false), - /** The node was scheduled for retirement (soft) */ + // The node was scheduled for retirement (soft) preferToRetire(false), - /** This node was scheduled for failing */ + // This node was scheduled for failing wantToFail, - /** The active node was retired */ + // The active node was retired retired, - /** The active node went down according to the service monitor */ + // The active node went down according to the service monitor down, - /** The active node came up according to the service monitor */ + // The active node came up according to the service monitor up, - /** The node made a config request, indicating it is live */ + // The node made a config request, indicating it is live requested, - /** The node resources/flavor were changed */ + // The node resources/flavor were changed resized(false), - /** The node was rebooted */ + // The node was rebooted rebooted(false), - /** The node upgraded its OS (implies a reboot) */ + // The node upgraded its OS (implies a reboot) osUpgraded(false), - /** The node verified its firmware (whether this resulted in a reboot depends on the node model) */ + // The node verified its firmware (whether this resulted in a reboot depends on the node model) firmwareVerified(false); private final boolean applicationLevel; diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java index c403dda1b74..d544ea76983 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/node/Nodes.java @@ -840,7 +840,7 @@ public class Nodes { if (agent == Agent.operator) return false; if (node.type() == NodeType.tenant && node.status().wantToDeprovision()) return false; boolean retirementRequestedByOperator = node.status().wantToRetire() && - node.history().lastEvent(History.Event.Type.wantToRetire) + node.history().event(History.Event.Type.wantToRetire) .map(History.Event::agent) .map(a -> a == Agent.operator) .orElse(false); diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java index 551a15aa804..4990c1e9db8 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializer.java @@ -219,7 +219,7 @@ public class NodeSerializer { } private void toSlime(History history, Cursor array) { - for (History.Event event : history.asList()) + for (History.Event event : history.events()) toSlime(event, array.addObject()); } diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java index 2822114375f..922c8bc8e20 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodesResponse.java @@ -197,7 +197,7 @@ class NodesResponse extends SlimeJsonResponse { } private void toSlime(History history, Cursor array) { - for (History.Event event : history.asList()) { + for (History.Event event : history.events()) { Cursor object = array.addObject(); object.setString("event", event.type().name()); object.setLong("at", event.at().toEpochMilli()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java index 74fe5800e89..078df5264a1 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/NodeRepositoryTest.java @@ -185,7 +185,7 @@ public class NodeRepositoryTest { tester.clock().advance(Duration.ofSeconds(1)); tester.addHost("id1", "host1", "default", NodeType.host); tester.addHost("id2", "host2", "default", NodeType.host); - assertFalse(tester.nodeRepository().nodes().node("host1").get().history().hasLastEventAfter(testStart, History.Event.Type.deprovisioned)); + assertFalse(tester.nodeRepository().nodes().node("host1").get().history().hasEventAfter(History.Event.Type.deprovisioned, testStart)); // Set host 1 properties and deprovision it try (var lock = tester.nodeRepository().nodes().lockAndGetRequired("host1")) { @@ -208,7 +208,7 @@ public class NodeRepositoryTest { Node host1 = tester.nodeRepository().nodes().node("host1").get(); Node host2 = tester.nodeRepository().nodes().node("host2").get(); assertEquals(Node.State.deprovisioned, host1.state()); - assertTrue(host1.history().hasLastEventAfter(testStart, History.Event.Type.deprovisioned)); + assertTrue(host1.history().hasEventAfter(History.Event.Type.deprovisioned, testStart)); // Adding it again preserves some information from the deprovisioned host and removes it tester.addHost("id2", "host1", "default", NodeType.host); @@ -218,7 +218,7 @@ public class NodeRepositoryTest { tester.nodeRepository().nodes().node("host1", Node.State.deprovisioned).isPresent()); assertFalse("Not transferred from deprovisioned host", host1.status().wantToRetire()); assertFalse("Not transferred from deprovisioned host", host1.status().wantToDeprovision()); - assertTrue("Transferred from deprovisioned host", host1.history().hasLastEventAfter(testStart, History.Event.Type.deprovisioned)); + assertTrue("Transferred from deprovisioned host", host1.history().hasEventAfter(History.Event.Type.deprovisioned, testStart)); assertTrue("Transferred from deprovisioned host", host1.status().firmwareVerifiedAt().isPresent()); assertEquals("Transferred from deprovisioned host", 1, host1.status().failCount()); assertEquals("Transferred from deprovisioned host", 1, host1.reports().getReports().size()); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java index 4972e9f6f20..5211b855fff 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/InactiveAndFailedExpirerTest.java @@ -78,7 +78,7 @@ public class InactiveAndFailedExpirerTest { Node ready = tester.nodeRepository().nodes().setReady(List.of(dirty.asList().get(0)), Agent.system, getClass().getSimpleName()).get(0); assertEquals("Allocated history is removed on readying", List.of(History.Event.Type.provisioned, History.Event.Type.readied), - ready.history().asList().stream().map(History.Event::type).collect(Collectors.toList())); + ready.history().events().stream().map(History.Event::type).collect(Collectors.toList())); // Dirty times out for the other one tester.advanceTime(Duration.ofMinutes(14)); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java index 84270a54218..b8f6011d6bf 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/maintenance/NodeRebooterTest.java @@ -16,7 +16,6 @@ import org.junit.Test; import java.time.Duration; import java.time.Instant; import java.util.List; -import java.util.Optional; import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; @@ -61,8 +60,7 @@ public class NodeRebooterTest { // OS upgrade counts as reboot, so within 0x-1x there is no reboots tester.clock().advance(rebootInterval); rebooter.maintain(); - scheduleOsUpgrade(nodeRepository); - simulateOsUpgrade(nodeRepository); + simulateReboot(nodeRepository); assertReadyHosts(15, nodeRepository, 1L); // OS upgrade counts as reboot, but within 1x-2x reboots are scheduled again @@ -111,8 +109,6 @@ public class NodeRebooterTest { private void makeReadyHosts(int count, ProvisioningTester tester) { tester.makeReadyNodes(count, new NodeResources(64, 256, 1000, 10), NodeType.host, 10); - // Set initial OS version - tester.patchNodes(node -> node.type().isHost(), (node) -> node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(Optional.of(Version.fromString("7.0")))))); } /** Set current reboot generation to the wanted reboot generation whenever it is larger (i.e record a reboot) */ @@ -126,7 +122,7 @@ public class NodeRebooterTest { /** Schedule OS upgrade for all host nodes */ private void scheduleOsUpgrade(NodeRepository nodeRepository) { - nodeRepository.osVersions().setTarget(NodeType.host, Version.fromString("7.1"), Duration.ZERO, false); + nodeRepository.osVersions().setTarget(NodeType.host, Version.fromString("7.0"), Duration.ZERO, false); } /** Simulate completion of an OS upgrade */ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/node/HistoryTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/node/HistoryTest.java deleted file mode 100644 index 62c5dda51a7..00000000000 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/node/HistoryTest.java +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.vespa.hosted.provision.node; - -import com.yahoo.vespa.hosted.provision.node.History.Event; -import org.junit.Test; - -import java.time.Instant; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.stream.Collectors; - -import static org.junit.Assert.assertEquals; - -/** - * @author mpolden - */ -public class HistoryTest { - - @Test - public void truncate_events() { - assertEquals(0, new History(List.of(), 2).asList().size()); - assertEquals(1, new History(shuffledEvents(1), 2).asList().size()); - assertEquals(2, new History(shuffledEvents(2), 2).asList().size()); - - History history = new History(shuffledEvents(5), 3); - assertEquals(3, history.asList().size()); - assertEquals("Most recent events are kept", - List.of(2L, 3L, 4L), - history.asList().stream().map(e -> e.at().toEpochMilli()).collect(Collectors.toList())); - } - - private static List<Event> shuffledEvents(int count) { - Instant start = Instant.ofEpochMilli(0); - List<Event> events = new ArrayList<>(); - for (int i = 0; i < count; i++) { - events.add(new Event(Event.Type.values()[i], Agent.system, start.plusMillis(i))); - } - Collections.shuffle(events); - return events; - } - -} diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java index f14675d1536..48ee23c7b60 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/persistence/NodeSerializerTest.java @@ -71,7 +71,7 @@ public class NodeSerializerTest { assertEquals(node.id(), copy.id()); assertEquals(node.state(), copy.state()); assertFalse(copy.allocation().isPresent()); - assertEquals(0, copy.history().asList().size()); + assertEquals(0, copy.history().events().size()); } @Test @@ -81,14 +81,14 @@ public class NodeSerializerTest { DiskSpeed.any, StorageType.any, Architecture.arm64); clock.advance(Duration.ofMinutes(3)); - assertEquals(0, node.history().asList().size()); + assertEquals(0, node.history().events().size()); node = node.allocate(ApplicationId.from(TenantName.from("myTenant"), ApplicationName.from("myApplication"), InstanceName.from("myInstance")), ClusterMembership.from("content/myId/0/0/stateful", Vtag.currentVersion, Optional.empty()), requestedResources, clock.instant()); - assertEquals(1, node.history().asList().size()); + assertEquals(1, node.history().events().size()); node = node.withRestart(new Generation(1, 2)); node = node.withReboot(new Generation(3, 4)); node = node.with(FlavorConfigBuilder.createDummies("arm64").getFlavorOrThrow("arm64"), Agent.system, clock.instant()); @@ -112,8 +112,8 @@ public class NodeSerializerTest { assertEquals(node.allocation().get().membership(), copy.allocation().get().membership()); assertEquals(node.allocation().get().requestedResources(), copy.allocation().get().requestedResources()); assertEquals(node.allocation().get().isRemovable(), copy.allocation().get().isRemovable()); - assertEquals(2, copy.history().asList().size()); - assertEquals(clock.instant().truncatedTo(MILLIS), copy.history().lastEvent(History.Event.Type.reserved).get().at()); + assertEquals(2, copy.history().events().size()); + assertEquals(clock.instant().truncatedTo(MILLIS), copy.history().event(History.Event.Type.reserved).get().at()); assertEquals(NodeType.tenant, copy.type()); } @@ -160,7 +160,7 @@ public class NodeSerializerTest { assertEquals(3, node.allocation().get().restartGeneration().wanted()); assertEquals(4, node.allocation().get().restartGeneration().current()); assertEquals(Arrays.asList(History.Event.Type.provisioned, History.Event.Type.reserved), - node.history().asList().stream().map(History.Event::type).collect(Collectors.toList())); + node.history().events().stream().map(History.Event::type).collect(Collectors.toList())); assertTrue(node.allocation().get().isRemovable()); assertEquals(NodeType.tenant, node.type()); } @@ -170,21 +170,21 @@ public class NodeSerializerTest { Node node = createNode(); clock.advance(Duration.ofMinutes(3)); - assertEquals(0, node.history().asList().size()); + assertEquals(0, node.history().events().size()); node = node.allocate(ApplicationId.from(TenantName.from("myTenant"), ApplicationName.from("myApplication"), InstanceName.from("myInstance")), ClusterMembership.from("content/myId/0/0/stateful", Vtag.currentVersion, Optional.empty()), node.flavor().resources(), clock.instant()); - assertEquals(1, node.history().asList().size()); + assertEquals(1, node.history().events().size()); clock.advance(Duration.ofMinutes(2)); node = node.retire(Agent.application, clock.instant()); Node copy = nodeSerializer.fromJson(Node.State.provisioned, nodeSerializer.toJson(node)); - assertEquals(2, copy.history().asList().size()); - assertEquals(clock.instant().truncatedTo(MILLIS), copy.history().lastEvent(History.Event.Type.retired).get().at()); + assertEquals(2, copy.history().events().size()); + assertEquals(clock.instant().truncatedTo(MILLIS), copy.history().event(History.Event.Type.retired).get().at()); assertEquals(Agent.application, - (copy.history().lastEvent(History.Event.Type.retired).get()).agent()); + (copy.history().event(History.Event.Type.retired).get()).agent()); assertTrue(copy.allocation().get().membership().retired()); Node removable = copy.with(node.allocation().get().removable(true)); @@ -209,13 +209,13 @@ public class NodeSerializerTest { " \"wantedVespaVersion\": \"6.42.2\"\n" + " }\n" + "}\n").getBytes()); - assertEquals(0, node.history().asList().size()); + assertEquals(0, node.history().events().size()); assertTrue(node.allocation().isPresent()); assertEquals("ugccloud-container", node.allocation().get().membership().cluster().id().value()); assertEquals("container", node.allocation().get().membership().cluster().type().name()); assertEquals(0, node.allocation().get().membership().cluster().group().get().index()); Node copy = nodeSerializer.fromJson(Node.State.provisioned, nodeSerializer.toJson(node)); - assertEquals(0, copy.history().asList().size()); + assertEquals(0, copy.history().events().size()); } @Test @@ -296,7 +296,7 @@ public class NodeSerializerTest { Node copy = nodeSerializer.fromJson(Node.State.provisioned, nodeSerializer.toJson(node)); assertEquals(1234, copy.flavor().resources().diskGb(), 0); assertEquals(node, copy); - assertTrue(node.history().lastEvent(History.Event.Type.resized).isPresent()); + assertTrue(node.history().event(History.Event.Type.resized).isPresent()); } @Test @@ -351,12 +351,12 @@ public class NodeSerializerTest { assertFalse(serialized.status().osVersion().current().isPresent()); // Update OS version - serialized = serialized.withCurrentOsVersion(Version.fromString("7.1"), Instant.ofEpochMilli(42)) - .withCurrentOsVersion(Version.fromString("7.2"), Instant.ofEpochMilli(123)) - .withCurrentOsVersion(Version.fromString("7.2"), Instant.ofEpochMilli(456)); + serialized = serialized.withCurrentOsVersion(Version.fromString("7.1"), Instant.ofEpochMilli(123)) + // Another update for same version: + .withCurrentOsVersion(Version.fromString("7.1"), Instant.ofEpochMilli(456)); serialized = nodeSerializer.fromJson(State.provisioned, nodeSerializer.toJson(serialized)); - assertEquals(Version.fromString("7.2"), serialized.status().osVersion().current().get()); - var osUpgradedEvents = serialized.history().asList().stream() + assertEquals(Version.fromString("7.1"), serialized.status().osVersion().current().get()); + var osUpgradedEvents = serialized.history().events().stream() .filter(event -> event.type() == History.Event.Type.osUpgraded) .collect(Collectors.toList()); assertEquals("OS upgraded event is added", 1, osUpgradedEvents.size()); @@ -372,7 +372,7 @@ public class NodeSerializerTest { node = node.withFirmwareVerifiedAt(Instant.ofEpochMilli(100)); node = nodeSerializer.fromJson(State.active, nodeSerializer.toJson(node)); assertEquals(100, node.status().firmwareVerifiedAt().get().toEpochMilli()); - assertEquals(Instant.ofEpochMilli(100), node.history().lastEvent(History.Event.Type.firmwareVerified).get().at()); + assertEquals(Instant.ofEpochMilli(100), node.history().event(History.Event.Type.firmwareVerified).get().at()); } @Test diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java index a74eaf32bf8..94822c85a03 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/ProvisioningTest.java @@ -880,7 +880,7 @@ public class ProvisioningTest { assertTrue("Time of event is updated for all nodes", reserved.stream() .allMatch(n -> n.history() - .lastEvent(History.Event.Type.reserved) + .event(History.Event.Type.reserved) .get().at() .equals(tester.clock().instant().truncatedTo(MILLIS)))); @@ -1167,7 +1167,7 @@ public class ProvisioningTest { /** A predicate that returns whether a node has been retired by the given agent */ private static Predicate<Node> retiredBy(Agent agent) { - return (node) -> node.history().lastEvent(History.Event.Type.retired) + return (node) -> node.history().event(History.Event.Type.retired) .filter(e -> e.type() == History.Event.Type.retired) .filter(e -> e.agent() == agent) .isPresent(); diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/docker-node1-os-upgrade-complete.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/docker-node1-os-upgrade-complete.json index cb874896927..09b5575ea02 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/docker-node1-os-upgrade-complete.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/docker-node1-os-upgrade-complete.json @@ -55,6 +55,11 @@ "event": "activated", "at": 123, "agent": "application" + }, + { + "event": "osUpgraded", + "at": 123, + "agent": "system" } ], "ipAddresses": [ diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/dockerhost1-with-firmware-data.json b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/dockerhost1-with-firmware-data.json index 3e8b19135c0..a73e9a7bade 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/dockerhost1-with-firmware-data.json +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/restapi/responses/dockerhost1-with-firmware-data.json @@ -37,11 +37,6 @@ "wantToRebuild": false, "history": [ { - "event": "firmwareVerified", - "at": 100, - "agent": "system" - }, - { "event": "provisioned", "at": 123, "agent": "system" @@ -60,6 +55,11 @@ "event": "activated", "at": 123, "agent": "application" + }, + { + "event": "firmwareVerified", + "at": 100, + "agent": "system" } ], "ipAddresses": [ |