// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.vespa.hosted.provision.provisioning; import com.yahoo.config.provision.ApplicationId; import com.yahoo.config.provision.ApplicationTransaction; import com.yahoo.config.provision.Capacity; import com.yahoo.config.provision.ClusterMembership; import com.yahoo.config.provision.ClusterResources; import com.yahoo.config.provision.ClusterSpec; import com.yahoo.config.provision.Environment; import com.yahoo.config.provision.Flavor; import com.yahoo.config.provision.HostSpec; import com.yahoo.config.provision.NodeAllocationException; import com.yahoo.config.provision.NodeResources; import com.yahoo.config.provision.NodeType; import com.yahoo.config.provision.ProvisionLock; import com.yahoo.config.provision.RegionName; import com.yahoo.config.provision.SystemName; import com.yahoo.config.provision.Zone; import com.yahoo.config.provisioning.FlavorsConfig; import com.yahoo.transaction.NestedTransaction; import com.yahoo.vespa.applicationmodel.HostName; import com.yahoo.vespa.curator.transaction.CuratorTransaction; import com.yahoo.vespa.hosted.provision.Node; import com.yahoo.vespa.hosted.provision.Node.State; import com.yahoo.vespa.hosted.provision.NodeList; import com.yahoo.vespa.hosted.provision.node.Agent; import com.yahoo.vespa.hosted.provision.node.IP; import org.junit.Test; import java.time.Instant; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; /** * @author mortent */ public class DynamicAllocationTest { /** * Test relocation of nodes from spare hosts. *

* Setup 4 hosts and allocate one container on each (from two different applications) * getSpareCapacityProd() spares. *

* Check that it relocates containers away from the getSpareCapacityProd() spares *

* Initial allocation of app 1 and 2 --> final allocation (example using 2 spares): *

* | | | | | | | | | | * | | | | | --> | 2a | 2b | | | * | 1a | 1b | 2a | 2b | | 1a | 1b | | | */ @Test public void relocate_nodes_from_spare_hosts() { int spareCount = 1; ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))) .flavorsConfig(flavorsConfig()) .spareCount(spareCount) .build(); tester.makeReadyNodes(4, "host-small", NodeType.host, 32); tester.activateTenantHosts(); List hosts = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.host).asList(); NodeResources flavor = new NodeResources(1, 4, 100, 1); // Application 1 ApplicationId application1 = makeApplicationId("t1", "a1"); ClusterSpec clusterSpec1 = clusterSpec("myContent.t1.a1"); addAndAssignNode(application1, "1a", hosts.get(0).hostname(), clusterSpec1, flavor, 0, tester); addAndAssignNode(application1, "1b", hosts.get(1).hostname(), clusterSpec1, flavor, 1, tester); // Application 2 ApplicationId application2 = makeApplicationId("t2", "a2"); ClusterSpec clusterSpec2 = clusterSpec("myContent.t2.a2"); addAndAssignNode(application2, "2a", hosts.get(2).hostname(), clusterSpec2, flavor, 3, tester); addAndAssignNode(application2, "2b", hosts.get(3).hostname(), clusterSpec2, flavor, 4, tester); // Redeploy both applications (to be agnostic on which hosts are picked as spares) deployApp(application1, clusterSpec1, flavor, tester, 2); deployApp(application2, clusterSpec2, flavor, tester, 2); // Assert that we have two spare nodes (two hosts that are don't have allocations) Set hostsWithChildren = new HashSet<>(); for (Node node : tester.nodeRepository().nodes().list(State.active).nodeType(NodeType.tenant).not().state(State.inactive).not().retired()) { hostsWithChildren.add(node.parentHostname().get()); } assertEquals(4 - spareCount, hostsWithChildren.size()); } /** * Test an allocation workflow: *

* 5 Hosts of capacity 3 (2 spares) * - Allocate app with 3 nodes * - Allocate app with 2 nodes * - Fail host and check redistribution */ @Test public void relocate_failed_nodes() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(5, "host-small", NodeType.host, 32); tester.activateTenantHosts(); NodeList hosts = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.host); NodeResources resources = new NodeResources(1, 4, 100, 0.3); // Application 1 ApplicationId application1 = makeApplicationId("t1", "a1"); ClusterSpec clusterSpec1 = clusterSpec("myContent.t1.a1"); deployApp(application1, clusterSpec1, resources, tester, 3); // Application 2 ApplicationId application2 = makeApplicationId("t2", "a2"); ClusterSpec clusterSpec2 = clusterSpec("myContent.t2.a2"); deployApp(application2, clusterSpec2, resources, tester, 2); // Application 3 ApplicationId application3 = makeApplicationId("t3", "a3"); ClusterSpec clusterSpec3 = clusterSpec("myContent.t3.a3"); deployApp(application3, clusterSpec3, resources, tester, 2); // App 2 and 3 should have been allocated to the same nodes - fail one of the parent hosts from there String parent = "host-1.yahoo.com"; tester.nodeRepository().nodes().failOrMarkRecursively(parent, Agent.system, "Testing"); // Redeploy all applications deployApp(application1, clusterSpec1, resources, tester, 3); deployApp(application2, clusterSpec2, resources, tester, 2); deployApp(application3, clusterSpec3, resources, tester, 2); Map numberOfChildrenStat = new HashMap<>(); for (Node host : hosts) { int nofChildren = tester.nodeRepository().nodes().list().childrenOf(host).size(); if (!numberOfChildrenStat.containsKey(nofChildren)) { numberOfChildrenStat.put(nofChildren, 0); } numberOfChildrenStat.put(nofChildren, numberOfChildrenStat.get(nofChildren) + 1); } assertEquals(4, numberOfChildrenStat.get(2).intValue()); assertEquals(1, numberOfChildrenStat.get(1).intValue()); } @Test public void allocation_balancing() { // Here we test balancing between cpu and memory and ignore disk ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(3, "flt", NodeType.host, 8); // cpu: 30, mem: 30 tester.makeReadyNodes(3, "cpu", NodeType.host, 8); // cpu: 40, mem: 20 tester.makeReadyNodes(3, "mem", NodeType.host, 8); // cpu: 20, mem: 40 tester.activateTenantHosts(); NodeResources fltResources = new NodeResources(6, 6, 10, 0.1); NodeResources cpuResources = new NodeResources(8, 4, 10, 0.1); NodeResources memResources = new NodeResources(4, 8, 10, 0.1); // Cpu heavy application ApplicationId application1 = makeApplicationId("t1", "a1"); deployApp(application1, clusterSpec("c"), cpuResources, tester, 2); tester.assertAllocatedOn("Cpu nodes cause least skew increase", "cpu", application1); // Mem heavy application ApplicationId application2 = makeApplicationId("t2", "a2"); deployApp(application2, clusterSpec("c"), memResources, tester, 2); tester.assertAllocatedOn("Mem nodes cause least skew increase", "mem", application2); // Flat application ApplicationId application3 = makeApplicationId("t3", "a3"); deployApp(application3, clusterSpec("c"), fltResources, tester, 2); tester.assertAllocatedOn("Flat nodes cause least skew increase", "flt", application3); // Mem heavy application which can't all be allocated on mem nodes ApplicationId application4 = makeApplicationId("t4", "a4"); deployApp(application4, clusterSpec("c"), memResources, tester, 3); assertEquals(2, tester.hostFlavorCount("mem", application4)); assertEquals(1, tester.hostFlavorCount("flt", application4)); } /** * Test redeployment of nodes that violates spare headroom - but without alternatives *

* Setup 2 hosts and allocate one app with a container on each. 2 spares *

* Initial allocation of app 1 --> final allocation: *

* | | | | | | * | | | --> | | | * | 1a | 1b | | 1a | 1b | */ @Test public void do_not_relocate_nodes_from_spare_if_no_where_to_relocate_them() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(2, "host-small", NodeType.host, 32); tester.activateTenantHosts(); List hosts = tester.nodeRepository().nodes().list(Node.State.active).nodeType(NodeType.host).asList(); NodeResources flavor = new NodeResources(1, 4, 100, 1); // Application 1 ApplicationId application1 = makeApplicationId("t1", "a1"); ClusterSpec clusterSpec1 = clusterSpec("myContent.t1.a1"); addAndAssignNode(application1, "1a", hosts.get(0).hostname(), clusterSpec1, flavor, 0, tester); addAndAssignNode(application1, "1b", hosts.get(1).hostname(), clusterSpec1, flavor, 1, tester); // Redeploy both applications (to be agnostic on which hosts are picked as spares) deployApp(application1, clusterSpec1, flavor, tester, 2); // Assert that we have two spare nodes (two hosts that are don't have allocations) Set hostsWithChildren = new HashSet<>(); for (Node node : tester.nodeRepository().nodes().list(State.active).nodeType(NodeType.tenant).not().state(State.inactive).not().retired()) { hostsWithChildren.add(node.parentHostname().get()); } assertEquals(2, hostsWithChildren.size()); } @Test(expected = NodeAllocationException.class) public void multiple_groups_are_on_separate_parent_hosts() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(5, "host-small", NodeType.host, 32); tester.activateTenantHosts(); //Deploy an application having 6 nodes (3 nodes in 2 groups). We only have 5 hosts available ApplicationId application1 = ProvisioningTester.applicationId(); tester.prepare(application1, clusterSpec("myContent.t1.a1"), 6, 2, new NodeResources(1, 4, 100, 1)); fail("Two groups have been allocated to the same parent host"); } @Test public void spare_capacity_used_only_when_replacement() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))) .flavorsConfig(flavorsConfig()) .spareCount(2) .build(); // Setup test ApplicationId application1 = ProvisioningTester.applicationId(); tester.makeReadyNodes(5, "host-small", NodeType.host, 32); tester.activateTenantHosts(); NodeResources flavor = new NodeResources(1, 4, 100, 1); // Deploy initial state (can max deploy 3 nodes due to redundancy requirements) ClusterSpec clusterSpec = clusterSpec("myContent.t1.a1"); List hosts = tester.prepare(application1, clusterSpec, 3, 1, flavor); tester.activate(application1, Set.copyOf(hosts)); List initialSpareCapacity = findSpareCapacity(tester); assertEquals(2, initialSpareCapacity.size()); try { hosts = tester.prepare(application1, clusterSpec, 4, 1, flavor); fail("Was able to deploy with 4 nodes, should not be able to use spare capacity"); } catch (NodeAllocationException ignored) { } tester.fail(hosts.get(0)); hosts = tester.prepare(application1, clusterSpec, 3, 1, flavor); tester.activate(application1, Set.copyOf(hosts)); List finalSpareCapacity = findSpareCapacity(tester); assertEquals(1, finalSpareCapacity.size()); } @Test public void does_not_allocate_to_suspended_hosts() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(4, "host-small", NodeType.host, 32); tester.activateTenantHosts(); HostName randomHost = new HostName(tester.nodeRepository().nodes().list(State.active).first().get().hostname()); tester.orchestrator().suspend(randomHost); ApplicationId application1 = ProvisioningTester.applicationId(); ClusterSpec clusterSpec = clusterSpec("myContent.t1.a1"); NodeResources flavor = new NodeResources(1, 4, 100, 1); try { tester.prepare(application1, clusterSpec, 4, 1, flavor); fail("Should not be able to deploy 4 nodes on 4 hosts because 1 is suspended"); } catch (NodeAllocationException ignored) { } // Resume the host, the deployment goes through tester.orchestrator().resume(randomHost); tester.activate(application1, tester.prepare(application1, clusterSpec, 4, 1, flavor)); Set hostnames = tester.getNodes(application1, State.active).hostnames(); // Verify that previously allocated nodes are not affected by host suspension tester.orchestrator().suspend(randomHost); tester.activate(application1, tester.prepare(application1, clusterSpec, 4, 1, flavor)); assertEquals(hostnames, tester.getNodes(application1, State.active).hostnames()); } @Test public void non_prod_zones_do_not_have_spares() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.perf, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(3, "host-small", NodeType.host, 32); tester.activateTenantHosts(); ApplicationId application1 = ProvisioningTester.applicationId(); List hosts = tester.prepare(application1, clusterSpec("myContent.t1.a1"), 3, 1, new NodeResources(1, 4, 100, 1)); tester.activate(application1, Set.copyOf(hosts)); List initialSpareCapacity = findSpareCapacity(tester); assertEquals(0, initialSpareCapacity.size()); } @Test public void cd_uses_slow_disk_hosts() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(SystemName.cd, Environment.test, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(4, new Flavor(new NodeResources(1, 8, 120, 1, NodeResources.DiskSpeed.slow)), NodeType.host, 10, true); tester.activateTenantHosts(); ApplicationId application1 = ProvisioningTester.applicationId(); List hosts = tester.prepare(application1, clusterSpec("myContent.t1.a1"), 3, 1, new NodeResources(1, 4, 100, 1)); tester.activate(application1, Set.copyOf(hosts)); } @Test(expected = NodeAllocationException.class) public void allocation_should_fail_when_host_is_not_in_allocatable_state() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeProvisionedNodes(3, "host-small", NodeType.host, 32).forEach(node -> tester.nodeRepository().nodes().fail(node.hostname(), Agent.system, getClass().getSimpleName())); ApplicationId application = ProvisioningTester.applicationId(); tester.prepare(application, clusterSpec("myContent.t2.a2"), 2, 1, new NodeResources(1, 40, 100, 1)); } @Test public void provision_dual_stack_containers() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(2, "host-large", NodeType.host, 10, true); tester.activateTenantHosts(); ApplicationId application = ProvisioningTester.applicationId(); List hosts = tester.prepare(application, clusterSpec("myContent.t1.a1"), 2, 1, new NodeResources(1, 4, 100, 1)); tester.activate(application, hosts); NodeList activeNodes = tester.nodeRepository().nodes().list().owner(application); assertEquals(List.of("127.0.127.2", "::2"), activeNodes.asList().get(1).ipConfig().primary()); assertEquals(List.of("127.0.127.13", "::d"), activeNodes.asList().get(0).ipConfig().primary()); } @Test public void provisioning_fast_disk_speed_do_not_get_slow_nodes() { provisionFastAndSlowThenDeploy(NodeResources.DiskSpeed.fast, true); } @Test public void provisioning_slow_disk_speed_do_not_get_fast_nodes() { provisionFastAndSlowThenDeploy(NodeResources.DiskSpeed.slow, true); } @Test public void provisioning_any_disk_speed_gets_slow_and_fast_nodes() { provisionFastAndSlowThenDeploy(NodeResources.DiskSpeed.any, false); } @Test public void slow_disk_nodes_are_preferentially_allocated() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 120, 1, NodeResources.DiskSpeed.fast)), NodeType.host, 10, true); tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 120, 1, NodeResources.DiskSpeed.slow)), NodeType.host, 10, true); tester.activateTenantHosts(); ApplicationId application = ProvisioningTester.applicationId(); ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test")).vespaVersion("1").build(); NodeResources resources = new NodeResources(1, 4, 100, 1, NodeResources.DiskSpeed.any); List hosts = tester.prepare(application, cluster, 2, 1, resources); assertEquals(2, hosts.size()); assertEquals(NodeResources.DiskSpeed.slow, hosts.get(0).advertisedResources().diskSpeed()); assertEquals(NodeResources.DiskSpeed.slow, hosts.get(1).advertisedResources().diskSpeed()); tester.activate(application, hosts); } private void provisionFastAndSlowThenDeploy(NodeResources.DiskSpeed requestDiskSpeed, boolean expectNodeAllocationFailure) { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 120, 1, NodeResources.DiskSpeed.fast)), NodeType.host, 10, true); tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 120, 1, NodeResources.DiskSpeed.slow)), NodeType.host, 10, true); tester.activateTenantHosts(); ApplicationId application = ProvisioningTester.applicationId(); ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test")).vespaVersion("1").build(); NodeResources resources = new NodeResources(1, 4, 100, 1, requestDiskSpeed); try { List hosts = tester.prepare(application, cluster, 4, 1, resources); if (expectNodeAllocationFailure) fail("Expected node allocation fail"); assertEquals(4, hosts.size()); tester.activate(application, hosts); } catch (NodeAllocationException e) { if ( ! expectNodeAllocationFailure) throw e; } } @Test public void node_resources_are_relaxed_in_dev() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.dev, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 120, 1, NodeResources.DiskSpeed.fast)), NodeType.host, 10, true); tester.makeReadyNodes(2, new Flavor(new NodeResources(1, 8, 120, 1, NodeResources.DiskSpeed.slow)), NodeType.host, 10, true); tester.activateTenantHosts(); ApplicationId application = ProvisioningTester.applicationId(); ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test")).vespaVersion("1").build(); NodeResources resources = new NodeResources(1, 4, 100, 1, NodeResources.DiskSpeed.fast); List hosts = tester.prepare(application, cluster, 4, 1, resources); assertEquals(1, hosts.size()); tester.activate(application, hosts); assertEquals(0.1, hosts.get(0).advertisedResources().vcpu(), 0.000001); assertEquals(0.1, hosts.get(0).advertisedResources().bandwidthGbps(), 0.000001); assertEquals("Slow nodes are allowed in dev and preferred because they are cheaper", NodeResources.DiskSpeed.slow, hosts.get(0).advertisedResources().diskSpeed()); } @Test public void switching_from_legacy_flavor_syntax_to_resources_does_not_cause_reallocation() { ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); tester.makeReadyNodes(2, new Flavor(new NodeResources(5, 20, 1400, 3)), NodeType.host, 10, true); tester.activateTenantHosts(); ApplicationId application = ProvisioningTester.applicationId(); ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test")).vespaVersion("1").build(); List hosts1 = tester.prepare(application, cluster, Capacity.from(new ClusterResources(2, 1, NodeResources.fromLegacyName("d-2-8-500")), false, true)); tester.activate(application, hosts1); NodeResources resources = new NodeResources(1.5, 8, 500, 0.3); List hosts2 = tester.prepare(application, cluster, Capacity.from(new ClusterResources(2, 1, resources))); tester.activate(application, hosts2); assertEquals(hosts1, hosts2); } @Test public void prefer_exclusive_network_switch() { // Hosts are provisioned, without switch information ProvisioningTester tester = new ProvisioningTester.Builder().zone(new Zone(Environment.prod, RegionName.from("us-east"))).flavorsConfig(flavorsConfig()).build(); NodeResources hostResources = new NodeResources(32, 128, 2000, 10); List hosts0 = tester.makeReadyNodes(3, hostResources, NodeType.host, 5); tester.activateTenantHosts(); // Application is deployed ClusterSpec cluster = ClusterSpec.request(ClusterSpec.Type.container, ClusterSpec.Id.from("test")).vespaVersion("1").build(); NodeResources resources = new NodeResources(2, 4, 50, 1, NodeResources.DiskSpeed.any); ApplicationId app1 = ApplicationId.from("t1", "a1", "i1"); tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(2, 1, resources)))); tester.assertSwitches(Set.of(), app1, cluster.id()); // One host is provisioned on a known switch String switch0 = "switch0"; { List hosts = tester.makeReadyNodes(1, hostResources, NodeType.host, 5); tester.activateTenantHosts(); tester.patchNodes(hosts, (host) -> host.withSwitchHostname(switch0)); } // Redeploy does not change allocation as a host with switch information is no better or worse than hosts // without switch information NodeList allocatedNodes = tester.nodeRepository().nodes().list().owner(app1); tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(2, 1, resources)))); assertEquals("Allocation unchanged", allocatedNodes, tester.nodeRepository().nodes().list().owner(app1)); // Initial hosts are attached to the same switch tester.patchNodes(hosts0, (host) -> host.withSwitchHostname(switch0)); // Redeploy does not change allocation tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(2, 1, resources)))); assertEquals("Allocation unchanged", allocatedNodes, tester.nodeRepository().nodes().list().owner(app1)); // One regular host and one slow-disk host are provisioned on the same switch String switch1 = "switch1"; Node hostWithSlowDisk; { NodeResources slowDisk = hostResources.with(NodeResources.DiskSpeed.slow); List hosts = tester.makeReadyNodes(1, slowDisk, NodeType.host, 5); hosts.addAll(tester.makeReadyNodes(1, hostResources, NodeType.host, 5)); tester.patchNodes(hosts, (host) -> host.withSwitchHostname(switch1)); tester.activateTenantHosts(); hostWithSlowDisk = hosts.get(0); } // Redeploy does not change allocation as we prefer to keep our already active nodes tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(2, 1, resources)))); tester.assertSwitches(Set.of(switch0), app1, cluster.id()); // A node is retired tester.patchNode(tester.nodeRepository().nodes().list().owner(app1).asList().get(0), (node) -> node.withWantToRetire(true, Agent.system, tester.clock().instant())); // Redeploy allocates new node on a distinct switch, and the host with slowest disk (cheapest) on that switch tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(2, 1, resources)))); tester.assertSwitches(Set.of(switch0, switch1), app1, cluster.id()); assertTrue("Host with slow disk on " + switch1 + " is chosen", tester.nodeRepository().nodes().list().owner(app1).state(State.active).stream() .anyMatch(node -> node.hasParent(hostWithSlowDisk.hostname()))); // Growing cluster picks new node on exclusive switch String switch2 = "switch2"; { List hosts = tester.makeReadyNodes(1, hostResources, NodeType.host, 5); tester.activateTenantHosts(); tester.patchNodes(hosts, (host) -> host.withSwitchHostname(switch2)); } tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(3, 1, resources)))); tester.assertSwitches(Set.of(switch0, switch1, switch2), app1, cluster.id()); // Growing cluster further can reuse switches as we're now out of exclusive ones tester.activate(app1, tester.prepare(app1, cluster, Capacity.from(new ClusterResources(4, 1, resources)))); tester.assertSwitches(Set.of(switch0, switch1, switch2), app1, cluster.id()); // Additional cluster can reuse switches of existing cluster ClusterSpec cluster2 = ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from("content")).vespaVersion("1").build(); tester.activate(app1, tester.prepare(app1, cluster2, Capacity.from(new ClusterResources(3, 1, resources)))); tester.assertSwitches(Set.of(switch0, switch1, switch2), app1, cluster2.id()); // Another application is deployed on exclusive switches ApplicationId app2 = ApplicationId.from("t2", "a2", "i2"); tester.activate(app2, tester.prepare(app2, cluster, Capacity.from(new ClusterResources(3, 1, resources)))); tester.assertSwitches(Set.of(switch0, switch1, switch2), app2, cluster.id()); } private ApplicationId makeApplicationId(String tenant, String appName) { return ApplicationId.from(tenant, appName, "default"); } private void deployApp(ApplicationId id, ClusterSpec spec, NodeResources flavor, ProvisioningTester tester, int nodeCount) { List hostSpec = tester.prepare(id, spec, nodeCount, 1, flavor); tester.activate(id, new HashSet<>(hostSpec)); } private void addAndAssignNode(ApplicationId id, String hostname, String parentHostname, ClusterSpec clusterSpec, NodeResources flavor, int index, ProvisioningTester tester) { Node node1a = Node.create("open1", IP.Config.ofEmptyPool("127.0.233." + index), hostname, new Flavor(flavor), NodeType.tenant).parentHostname(parentHostname).build(); ClusterMembership clusterMembership1 = ClusterMembership.from( clusterSpec.with(Optional.of(ClusterSpec.Group.from(0))), index); // Need to add group here so that group is serialized in node allocation Node node1aAllocation = node1a.allocate(id, clusterMembership1, node1a.resources(), Instant.now()); tester.nodeRepository().nodes().addNodes(List.of(node1aAllocation), Agent.system); NestedTransaction transaction = new NestedTransaction().add(new CuratorTransaction(tester.getCurator())); tester.nodeRepository().nodes().activate(List.of(node1aAllocation), new ApplicationTransaction(new ProvisionLock(id, () -> { }), transaction)); transaction.commit(); } private List findSpareCapacity(ProvisioningTester tester) { NodeList nodes = tester.nodeRepository().nodes().list(); return nodes.nodeType(NodeType.host) .matching(host -> nodes.childrenOf(host).size() == 0) // Hosts without children .asList(); } private FlavorsConfig flavorsConfig() { FlavorConfigBuilder b = new FlavorConfigBuilder(); b.addFlavor("host-large", 6, 24, 800, 6, Flavor.Type.BARE_METAL); b.addFlavor("host-small", 3, 12, 400, 3, Flavor.Type.BARE_METAL); b.addFlavor("flt", 30, 30, 400, 3, Flavor.Type.BARE_METAL); b.addFlavor("cpu", 40, 20, 400, 3, Flavor.Type.BARE_METAL); b.addFlavor("mem", 20, 40, 400, 3, Flavor.Type.BARE_METAL); return b.build(); } private ClusterSpec clusterSpec(String clusterId) { return ClusterSpec.request(ClusterSpec.Type.content, ClusterSpec.Id.from(clusterId)).vespaVersion("6.42").build(); } }