diff options
Diffstat (limited to 'node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java')
-rw-r--r-- | node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java | 94 |
1 files changed, 81 insertions, 13 deletions
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java index 3d7db9a1f96..4d75b8a5acc 100644 --- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java +++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/os/OsVersionsTest.java @@ -8,6 +8,7 @@ import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.test.ManualClock; +import com.yahoo.vespa.flags.Flags; import com.yahoo.vespa.flags.PermanentFlags; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.NodeList; @@ -273,35 +274,35 @@ public class OsVersionsTest { versions.resumeUpgradeOf(NodeType.host, true); // One host starts rebuilding - assertEquals(1, hostNodes.get().rebuilding().size()); + assertEquals(1, hostNodes.get().rebuilding(false).size()); // We cannot rebuild another host until the current one is done versions.resumeUpgradeOf(NodeType.host, true); - NodeList hostsRebuilding = hostNodes.get().rebuilding(); + NodeList hostsRebuilding = hostNodes.get().rebuilding(false); assertEquals(1, hostsRebuilding.size()); completeRebuildOf(hostsRebuilding.asList(), NodeType.host); assertEquals(1, hostNodes.get().onOsVersion(version1).size()); // Second host is rebuilt versions.resumeUpgradeOf(NodeType.host, true); - completeRebuildOf(hostNodes.get().rebuilding().asList(), NodeType.host); + completeRebuildOf(hostNodes.get().rebuilding(false).asList(), NodeType.host); assertEquals(2, hostNodes.get().onOsVersion(version1).size()); // The remaining hosts complete their upgrade for (int i = 0; i < hostCount - 2; i++) { versions.resumeUpgradeOf(NodeType.host, true); - hostsRebuilding = hostNodes.get().rebuilding(); + hostsRebuilding = hostNodes.get().rebuilding(false); assertEquals(1, hostsRebuilding.size()); completeRebuildOf(hostsRebuilding.asList(), NodeType.host); } // All hosts upgraded and none are rebuilding - assertEquals(hostCount, hostNodes.get().onOsVersion(version1).not().rebuilding().size()); + assertEquals(hostCount, hostNodes.get().onOsVersion(version1).not().rebuilding(false).size()); assertEquals(hostCount, tester.nodeRepository().nodes().list(Node.State.active).size()); // Resuming after everything has upgraded has no effect versions.resumeUpgradeOf(NodeType.host, true); - assertEquals(0, hostNodes.get().rebuilding().size()); + assertEquals(0, hostNodes.get().rebuilding(false).size()); // Next version is within same major. Upgrade mechanism switches to delegated var version2 = Version.fromString("8.1"); @@ -319,12 +320,62 @@ public class OsVersionsTest { // Resuming upgrades reactivated host. Upgrade mechanism switches to rebuilding versions.resumeUpgradeOf(NodeType.host, true); - hostsRebuilding = hostNodes.get().rebuilding(); + hostsRebuilding = hostNodes.get().rebuilding(false); assertEquals(List.of(reactivatedHost), hostsRebuilding.asList()); completeRebuildOf(hostsRebuilding.asList(), NodeType.host); } @Test + public void upgrade_by_soft_rebuilding() { + int maxRebuilds = 3; + int hostCount = 12; + boolean softRebuild = true; + + tester.flagSource().withIntFlag(PermanentFlags.MAX_REBUILDS.id(), maxRebuilds); + tester.flagSource().withBooleanFlag(Flags.SOFT_REBUILD.id(), softRebuild); + var versions = new OsVersions(tester.nodeRepository(), true, Integer.MAX_VALUE); + + provisionInfraApplication(hostCount, infraApplication, NodeType.host, NodeResources.StorageType.remote); + Supplier<NodeList> hostNodes = () -> tester.nodeRepository().nodes().list().nodeType(NodeType.host); + + // New target is set + int hostsRebuilt = 0; + var version1 = Version.fromString("8.0"); + versions.setTarget(NodeType.host, version1, Duration.ZERO, false); + versions.resumeUpgradeOf(NodeType.host, true); + + // First batch of hosts start rebuilding + assertEquals(maxRebuilds, hostNodes.get().rebuilding(softRebuild).size()); + + // We cannot rebuild another host yet + versions.resumeUpgradeOf(NodeType.host, true); + NodeList hostsRebuilding = hostNodes.get().rebuilding(softRebuild); + assertEquals(maxRebuilds, hostsRebuilding.size()); + completeSoftRebuildOf(hostsRebuilding.asList()); + assertEquals(hostsRebuilt += maxRebuilds, hostNodes.get().onOsVersion(version1).size()); + + // Another batch is rebuilt + versions.resumeUpgradeOf(NodeType.host, true); + completeSoftRebuildOf(hostNodes.get().rebuilding(softRebuild).asList()); + assertEquals(hostsRebuilt += maxRebuilds, hostsRebuilt); + + // The remaining batches complete their upgrade + for (int i = 0; i < (hostCount - hostsRebuilt) / maxRebuilds; i++) { + versions.resumeUpgradeOf(NodeType.host, true); + hostsRebuilding = hostNodes.get().rebuilding(softRebuild); + assertEquals(maxRebuilds, hostsRebuilding.size()); + completeSoftRebuildOf(hostsRebuilding.asList()); + } + + // All hosts upgraded and none are rebuilding + assertEquals(hostCount, hostNodes.get().onOsVersion(version1).not().rebuilding(softRebuild).size()); + + // Resuming after everything has upgraded has no effect + versions.resumeUpgradeOf(NodeType.host, true); + assertEquals(0, hostNodes.get().rebuilding(softRebuild).size()); + } + + @Test public void upgrade_by_rebuilding_multiple_host_types() { tester.flagSource().withIntFlag(PermanentFlags.MAX_REBUILDS.id(), 1); var versions = new OsVersions(tester.nodeRepository(), false, Integer.MAX_VALUE); @@ -349,7 +400,7 @@ public class OsVersionsTest { for (int i = 0; i < hostCount; i++) { versions.resumeUpgradeOf(NodeType.host, true); versions.resumeUpgradeOf(NodeType.confighost, true); - NodeList hostsRebuilding = hosts.get().rebuilding(); + NodeList hostsRebuilding = hosts.get().rebuilding(false); assertEquals(2, hostsRebuilding.size()); completeRebuildOf(hostsRebuilding.nodeType(NodeType.host).asList(), NodeType.host); completeRebuildOf(hostsRebuilding.nodeType(NodeType.confighost).asList(), NodeType.confighost); @@ -382,7 +433,7 @@ public class OsVersionsTest { versions.resumeUpgradeOf(NodeType.host, true); NodeList allNodes = tester.nodeRepository().nodes().list(); List<Node> hostsRebuilding = allNodes.nodeType(NodeType.host) - .rebuilding() + .rebuilding(false) .sortedBy(Comparator.comparing(Node::hostname)) .asList(); List<Optional<ApplicationId>> owners = List.of(Optional.of(app1), Optional.of(app2), Optional.empty()); @@ -420,7 +471,7 @@ public class OsVersionsTest { // Since both applications now occupy all remaining hosts, we can only upgrade 1 at a time for (int i = 0; i < hostsOnOldVersion.size(); i++) { versions.resumeUpgradeOf(NodeType.host, true); - hostsRebuilding = hosts.get().rebuilding().asList(); + hostsRebuilding = hosts.get().rebuilding(false).asList(); assertEquals(1, hostsRebuilding.size()); replaceNodes(app1); replaceNodes(app2); @@ -430,7 +481,7 @@ public class OsVersionsTest { // Resuming upgrade has no effect as all hosts have upgraded versions.resumeUpgradeOf(NodeType.host, true); NodeList allHosts = hosts.get(); - assertEquals(0, allHosts.rebuilding().size()); + assertEquals(0, allHosts.rebuilding(false).size()); assertEquals(allHosts.size(), allHosts.onOsVersion(version1).size()); } @@ -454,7 +505,7 @@ public class OsVersionsTest { // Upgrades 1 infrastructure host at a time for (int i = 0; i < hostCount; i++) { versions.resumeUpgradeOf(NodeType.proxyhost, true); - List<Node> hostsRebuilding = hosts.get().rebuilding().asList(); + List<Node> hostsRebuilding = hosts.get().rebuilding(false).asList(); assertEquals(1, hostsRebuilding.size()); completeRebuildOf(hostsRebuilding, NodeType.proxyhost); } @@ -490,7 +541,13 @@ public class OsVersionsTest { } private List<Node> provisionInfraApplication(int nodeCount, ApplicationId application, NodeType nodeType) { - var nodes = tester.makeReadyNodes(nodeCount, new NodeResources(48, 128, 2000, 10), nodeType, 10); + return provisionInfraApplication(nodeCount, application, nodeType, NodeResources.StorageType.local); + } + + private List<Node> provisionInfraApplication(int nodeCount, ApplicationId application, NodeType nodeType, NodeResources.StorageType storageType) { + var nodes = tester.makeReadyNodes(nodeCount, new NodeResources(48, 128, 2000, 10, + NodeResources.DiskSpeed.fast, storageType), + nodeType, 10); tester.prepareAndActivateInfraApplication(application, nodeType); return nodes.stream() .map(Node::hostname) @@ -557,4 +614,15 @@ public class OsVersionsTest { }); } + private void completeSoftRebuildOf(List<Node> nodes) { + tester.patchNodes(nodes, (node) -> { + Optional<Version> wantedOsVersion = node.status().osVersion().wanted(); + assertFalse(node + " is not retiring", node.status().wantToRetire()); + assertTrue(node + " is rebuilding", node.status().wantToRebuild()); + node = node.withWantToRetire(false, false, false, Agent.system, + tester.clock().instant()); + return node.with(node.status().withOsVersion(node.status().osVersion().withCurrent(wantedOsVersion))); + }); + } + } |