diff options
12 files changed, 92 insertions, 11 deletions
diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java index 684dc3cbc25..d8103c864df 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/Node.java @@ -32,6 +32,8 @@ public class Node { private final Version wantedOsVersion; private final ServiceState serviceState; private final Optional<Instant> suspendedSince; + private final Optional<Instant> currentFirmwareCheck; + private final Optional<Instant> wantedFirmwareCheck; private final long restartGeneration; private final long wantedRestartGeneration; private final long rebootGeneration; @@ -45,7 +47,8 @@ public class Node { private final Optional<TenantName> reservedTo; public Node(HostName hostname, Optional<HostName> parentHostname, State state, NodeType type, NodeResources resources, Optional<ApplicationId> owner, - Version currentVersion, Version wantedVersion, Version currentOsVersion, Version wantedOsVersion, ServiceState serviceState, + Version currentVersion, Version wantedVersion, Version currentOsVersion, Version wantedOsVersion, + Optional<Instant> currentFirmwareCheck, Optional<Instant> wantedFirmwareCheck, ServiceState serviceState, Optional<Instant> suspendedSince, long restartGeneration, long wantedRestartGeneration, long rebootGeneration, long wantedRebootGeneration, int cost, String flavor, String clusterId, ClusterType clusterType, boolean wantToRetire, boolean wantToDeprovision, Optional<TenantName> reservedTo) { @@ -59,6 +62,8 @@ public class Node { this.wantedVersion = wantedVersion; this.currentOsVersion = currentOsVersion; this.wantedOsVersion = wantedOsVersion; + this.currentFirmwareCheck = currentFirmwareCheck; + this.wantedFirmwareCheck = wantedFirmwareCheck; this.serviceState = serviceState; this.suspendedSince = suspendedSince; this.restartGeneration = restartGeneration; @@ -112,6 +117,14 @@ public class Node { return wantedOsVersion; } + public Optional<Instant> currentFirmwareCheck() { + return currentFirmwareCheck; + } + + public Optional<Instant> wantedFirmwareCheck() { + return wantedFirmwareCheck; + } + public ServiceState serviceState() { return serviceState; } @@ -215,6 +228,8 @@ public class Node { private Version wantedVersion; private Version currentOsVersion; private Version wantedOsVersion; + private Optional<Instant> currentFirmwareCheck = Optional.empty(); + private Optional<Instant> wantedFirmwareCheck = Optional.empty(); private ServiceState serviceState; private Optional<Instant> suspendedSince = Optional.empty(); private long restartGeneration; @@ -242,6 +257,8 @@ public class Node { this.wantedVersion = node.wantedVersion; this.currentOsVersion = node.currentOsVersion; this.wantedOsVersion = node.wantedOsVersion; + this.currentFirmwareCheck = node.currentFirmwareCheck; + this.wantedFirmwareCheck = node.wantedFirmwareCheck; this.serviceState = node.serviceState; this.suspendedSince = node.suspendedSince; this.restartGeneration = node.restartGeneration; @@ -307,6 +324,16 @@ public class Node { return this; } + public Builder currentFirmwareCheck(Instant currentFirmwareCheck) { + this.currentFirmwareCheck = Optional.ofNullable(currentFirmwareCheck); + return this; + } + + public Builder wantedFirmwareCheck(Instant wantedFirmwareCheck) { + this.wantedFirmwareCheck = Optional.ofNullable(wantedFirmwareCheck); + return this; + } + public Builder serviceState(ServiceState serviceState) { this.serviceState = serviceState; return this; @@ -373,9 +400,10 @@ public class Node { } public Node build() { - return new Node(hostname, parentHostname, state, type, resources, owner, currentVersion, wantedVersion, currentOsVersion, - wantedOsVersion, serviceState, suspendedSince, restartGeneration, wantedRestartGeneration, rebootGeneration, wantedRebootGeneration, - cost, flavor, clusterId, clusterType, wantToRetire, wantToDeprovision, reservedTo); + return new Node(hostname, parentHostname, state, type, resources, owner, currentVersion, wantedVersion, + currentOsVersion, wantedOsVersion, currentFirmwareCheck, wantedFirmwareCheck, serviceState, + suspendedSince, restartGeneration, wantedRestartGeneration, rebootGeneration, wantedRebootGeneration, + cost, flavor, clusterId, clusterType, wantToRetire, wantToDeprovision, reservedTo); } } diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java index 43d8d1c5a6e..22c373e97ee 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/configserver/NodeRepository.java @@ -108,6 +108,8 @@ public interface NodeRepository { versionFrom(node.getWantedVespaVersion()), versionFrom(node.getCurrentOsVersion()), versionFrom(node.getWantedOsVersion()), + Optional.ofNullable(node.getCurrentFirmwareCheck()).map(Instant::ofEpochMilli), + Optional.ofNullable(node.getWantedFirmwareCheck()).map(Instant::ofEpochMilli), fromBoolean(node.getAllowedToBeDown()), Optional.ofNullable(node.suspendedSince()).map(Instant::ofEpochMilli), toInt(node.getCurrentRestartGeneration()), diff --git a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeRepositoryNode.java b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeRepositoryNode.java index 985b7e3a339..2abf40be527 100644 --- a/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeRepositoryNode.java +++ b/controller-api/src/main/java/com/yahoo/vespa/hosted/controller/api/integration/noderepository/NodeRepositoryNode.java @@ -56,6 +56,10 @@ public class NodeRepositoryNode { private String currentOsVersion; @JsonProperty("wantedOsVersion") private String wantedOsVersion; + @JsonProperty("currentFirmwareCheck") + private Long currentFirmwareCheck; + @JsonProperty("wantedFirmwareCheck") + private Long wantedFirmwareCheck; @JsonProperty("failCount") private Integer failCount; @JsonProperty("environment") @@ -337,6 +341,22 @@ public class NodeRepositoryNode { this.wantedOsVersion = wantedOsVersion; } + public Long getCurrentFirmwareCheck() { + return currentFirmwareCheck; + } + + public void setCurrentFirmwareCheck(Long currentFirmwareCheck) { + this.currentFirmwareCheck = currentFirmwareCheck; + } + + public Long getWantedFirmwareCheck() { + return wantedFirmwareCheck; + } + + public void setWantedFirmwareCheck(Long wantedFirmwareCheck) { + this.wantedFirmwareCheck = wantedFirmwareCheck; + } + public Map<String, JsonNode> getReports() { return reports; } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/ConvergenceSummary.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/ConvergenceSummary.java index 789e6ab79af..d874a8042f2 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/ConvergenceSummary.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/ConvergenceSummary.java @@ -13,6 +13,7 @@ public class ConvergenceSummary { private final long nodes; private final long down; private final long upgradingOs; + private final long upgradingFirmware; private final long needPlatformUpgrade; private final long upgradingPlatform; private final long needReboot; @@ -22,11 +23,12 @@ public class ConvergenceSummary { private final long services; private final long needNewConfig; - public ConvergenceSummary(long nodes, long down, long upgradingOs, long needPlatformUpgrade, long upgradingPlatform, + public ConvergenceSummary(long nodes, long down, long upgradingOs, long upgradingFirmware, long needPlatformUpgrade, long upgradingPlatform, long needReboot, long rebooting, long needRestart, long restarting, long services, long needNewConfig) { this.nodes = nodes; this.down = down; this.upgradingOs = upgradingOs; + this.upgradingFirmware = upgradingFirmware; this.needPlatformUpgrade = needPlatformUpgrade; this.upgradingPlatform = upgradingPlatform; this.needReboot = needReboot; @@ -52,6 +54,11 @@ public class ConvergenceSummary { return upgradingOs; } + /** Number of nodes down for firmware upgrade. */ + public long upgradingFirmware() { + return upgradingFirmware; + } + /** Number of nodes in need of a platform upgrade. */ public long needPlatformUpgrade() { return needPlatformUpgrade; @@ -110,6 +117,7 @@ public class ConvergenceSummary { return nodes == that.nodes && down == that.down && upgradingOs == that.upgradingOs && + upgradingFirmware == that.upgradingFirmware && needPlatformUpgrade == that.needPlatformUpgrade && upgradingPlatform == that.upgradingPlatform && needReboot == that.needReboot && @@ -122,7 +130,7 @@ public class ConvergenceSummary { @Override public int hashCode() { - return Objects.hash(nodes, down, upgradingOs, needPlatformUpgrade, upgradingPlatform, needReboot, rebooting, needRestart, restarting, services, needNewConfig); + return Objects.hash(nodes, down, upgradingOs, upgradingFirmware, needPlatformUpgrade, upgradingPlatform, needReboot, rebooting, needRestart, restarting, services, needNewConfig); } } diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java index 3d26a67c639..989f4108dda 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/InternalStepRunner.java @@ -489,6 +489,14 @@ public class InternalStepRunner implements StepRunner { (node.node().wantedOsVersion().isAfter(node.node().currentOsVersion()) && node.node().serviceState() == Node.ServiceState.allowedDown ? ", upgrading OS (" + node.node().wantedOsVersion() + " <-- " + node.node().currentOsVersion() + ")" : "") + + (node.parent().wantedFirmwareCheck() + .map(wanted -> node.parent().currentFirmwareCheck() + .map(wanted::isAfter) + .orElse(true)) + .orElse(false) + && node.node().serviceState() == Node.ServiceState.allowedDown + ? ", upgrading firmware" + : "") + (node.node().wantedRestartGeneration() > node.node().restartGeneration() ? ", restart pending (" + node.node().wantedRestartGeneration() + " <-- " + node.node().restartGeneration() + ")" : "") + diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java index cccf8a15a59..afcf393a293 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/deployment/NodeList.java @@ -46,6 +46,15 @@ public class NodeList extends AbstractFilteringList<NodeWithServices, NodeList> return matching(node -> node.parent().wantedOsVersion().isAfter(node.parent().currentOsVersion())); } + /** The nodes on an outdated OS. */ + public NodeList upgradingFirmware() { + return matching(node -> node.parent().wantedFirmwareCheck() + .map(wanted -> node.parent().currentFirmwareCheck() + .map(wanted::isAfter) + .orElse(true)) + .orElse(false)); + } + /** The nodes whose parent is down. */ public NodeList withParentDown() { return matching(node -> node.parent().serviceState() == Node.ServiceState.allowedDown); @@ -87,6 +96,7 @@ public class NodeList extends AbstractFilteringList<NodeWithServices, NodeList> return new ConvergenceSummary(size(), allowedDown.size(), withParentDown().upgradingOs().size(), + withParentDown().upgradingFirmware().size(), upgradingPlatform().size(), allowedDown.upgradingPlatform().size(), rebooting().size(), diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java index 22129d8bb06..a4b0df31883 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializer.java @@ -180,8 +180,8 @@ class RunSerializer { if ( ! summaryArray.valid()) return Optional.empty(); - if (summaryArray.entries() != 11) - throw new IllegalArgumentException("Convergence summary must have 11 entries"); + if (summaryArray.entries() != 12) + throw new IllegalArgumentException("Convergence summary must have 12 entries"); return Optional.of(new ConvergenceSummary(summaryArray.entry(0).asLong(), summaryArray.entry(1).asLong(), @@ -193,7 +193,8 @@ class RunSerializer { summaryArray.entry(7).asLong(), summaryArray.entry(8).asLong(), summaryArray.entry(9).asLong(), - summaryArray.entry(10).asLong())); + summaryArray.entry(10).asLong(), + summaryArray.entry(11).asLong())); } Slime toSlime(Iterable<Run> runs) { @@ -261,6 +262,7 @@ class RunSerializer { summaryArray.addLong(summary.nodes()); summaryArray.addLong(summary.down()); summaryArray.addLong(summary.upgradingOs()); + summaryArray.addLong(summary.upgradingFirmware()); summaryArray.addLong(summary.needPlatformUpgrade()); summaryArray.addLong(summary.upgradingPlatform()); summaryArray.addLong(summary.needReboot()); diff --git a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java index 2fd64bb6ba7..91a0455db11 100644 --- a/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java +++ b/controller-server/src/main/java/com/yahoo/vespa/hosted/controller/restapi/application/JobControllerApiHandlerHelper.java @@ -476,6 +476,7 @@ class JobControllerApiHandlerHelper { summaryObject.setLong("needRestart", summary.needRestart()); summaryObject.setLong("restarting", summary.restarting()); summaryObject.setLong("upgradingOs", summary.upgradingOs()); + summaryObject.setLong("upgradingFirmware", summary.upgradingFirmware()); summaryObject.setLong("services", summary.services()); summaryObject.setLong("needNewConfig", summary.needNewConfig()); } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializerTest.java b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializerTest.java index 3839e2103cd..e5757604caf 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializerTest.java +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/RunSerializerTest.java @@ -100,7 +100,7 @@ public class RunSerializerTest { "badb17"), 122), run.versions().sourceApplication().get()); - assertEquals(Optional.of(new ConvergenceSummary(1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89)), + assertEquals(Optional.of(new ConvergenceSummary(1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144)), run.convergenceSummary()); assertEquals(X509CertificateUtils.fromPem("-----BEGIN CERTIFICATE-----\n" + "MIIBEzCBu6ADAgECAgEBMAoGCCqGSM49BAMEMBQxEjAQBgNVBAMTCW15c2Vydmlj\n" + diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/testdata/run-status.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/testdata/run-status.json index a66b9d3e955..a7e5d249a9d 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/testdata/run-status.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/persistence/testdata/run-status.json @@ -8,7 +8,7 @@ "lastTestRecord": 3, "lastVespaLogTimestamp": 1196676930000432, "noNodesDownSince": 321321321321, - "convergenceSummary": [1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89], + "convergenceSummary": [1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144], "testerCertificate": "-----BEGIN CERTIFICATE-----\nMIIBEzCBu6ADAgECAgEBMAoGCCqGSM49BAMEMBQxEjAQBgNVBAMTCW15c2Vydmlj\nZTAeFw0xOTA5MDYwNzM3MDZaFw0xOTA5MDcwNzM3MDZaMBQxEjAQBgNVBAMTCW15\nc2VydmljZTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABM0JhD8fV2DlAkjQOGX3\nY50ryMBr3g2+v/uFiRoxJ1muuSOWYrW7HCQIGuzc04fa0QwtaX/voAZKCV51t6jF\n0fwwCgYIKoZIzj0EAwQDRwAwRAIgVbQ3Co1H4X0gmRrtXSyTU0HgBQu9PXHMmX20\n5MyyPSoCIBltOcmaPfdN03L3zqbqZ6PgUBWsvAHgiBzL3hrtJ+iy\n-----END CERTIFICATE-----", "steps": { "deployInitialReal": "unfinished", diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-first-part.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-first-part.json index fac963fd5eb..e1c2310ce7e 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-first-part.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/dev-us-east-1-log-first-part.json @@ -68,6 +68,7 @@ "needRestart": 0, "restarting": 0, "upgradingOs": 0, + "upgradingFirmware": 0, "services": 1, "needNewConfig": 1 } diff --git a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/staging-test-log.json b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/staging-test-log.json index 5559ac952a2..273887c26c4 100644 --- a/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/staging-test-log.json +++ b/controller-server/src/test/java/com/yahoo/vespa/hosted/controller/restapi/application/responses/staging-test-log.json @@ -152,6 +152,7 @@ "needRestart": 0, "restarting": 0, "upgradingOs": 0, + "upgradingFirmware": 0, "services": 1, "needNewConfig": 1 } |