summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2021-02-07 23:06:18 +0100
committerJon Bratseth <bratseth@gmail.com>2021-02-07 23:06:18 +0100
commitbf9901192ff27633f652530f9895f6f93e751000 (patch)
treeadbaa1a54c2ee9d224c0bceeea8ca0bac65e17c9
parentaa95b558c50813d237dad0e28e23241968443b52 (diff)
Decide acls in load balancers
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java108
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancers.java117
-rw-r--r--node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java2
-rw-r--r--node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java16
4 files changed, 126 insertions, 117 deletions
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
index c1af58ac820..7df8d9d4920 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/NodeRepository.java
@@ -20,8 +20,6 @@ import com.yahoo.vespa.curator.Curator;
import com.yahoo.vespa.flags.FlagSource;
import com.yahoo.vespa.hosted.provision.Node.State;
import com.yahoo.vespa.hosted.provision.applications.Applications;
-import com.yahoo.vespa.hosted.provision.lb.LoadBalancer;
-import com.yahoo.vespa.hosted.provision.lb.LoadBalancerInstance;
import com.yahoo.vespa.hosted.provision.lb.LoadBalancers;
import com.yahoo.vespa.hosted.provision.maintenance.InfrastructureVersions;
import com.yahoo.vespa.hosted.provision.maintenance.NodeFailer;
@@ -49,17 +47,13 @@ import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
-import java.util.Comparator;
import java.util.EnumSet;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
-import java.util.TreeSet;
import java.util.function.BiFunction;
-import java.util.function.Predicate;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;
@@ -171,7 +165,7 @@ public class NodeRepository extends AbstractComponent {
this.containerImages = new ContainerImages(db, containerImage);
this.jobControl = new JobControl(new JobControlFlags(db, flagSource));
this.applications = new Applications(db);
- this.loadBalancers = new LoadBalancers(db);
+ this.loadBalancers = new LoadBalancers(db, this);
this.spareCount = spareCount;
rewriteNodes();
}
@@ -283,106 +277,6 @@ public class NodeRepository extends AbstractComponent {
public List<Node> getFailed() { return db.readNodes(State.failed); }
/**
- * Returns the ACL for the node (trusted nodes, networks and ports)
- */
- private NodeAcl getNodeAcl(Node node, NodeList candidates) {
- Set<Node> trustedNodes = new TreeSet<>(Comparator.comparing(Node::hostname));
- Set<Integer> trustedPorts = new LinkedHashSet<>();
- Set<String> trustedNetworks = new LinkedHashSet<>();
-
- // For all cases below, trust:
- // - SSH: If the Docker host has one container, and it is using the Docker host's network namespace,
- // opening up SSH to the Docker host is done here as a trusted port. For simplicity all nodes have
- // SSH opened (which is safe for 2 reasons: SSH daemon is not run inside containers, and NPT networks
- // will (should) not forward port 22 traffic to container).
- // - parent host (for health checks and metrics)
- // - nodes in same application
- // - load balancers allocated to application
- trustedPorts.add(22);
- candidates.parentOf(node).ifPresent(trustedNodes::add);
- node.allocation().ifPresent(allocation -> {
- trustedNodes.addAll(candidates.owner(allocation.owner()).asList());
- loadBalancers.list(allocation.owner()).asList().stream()
- .map(LoadBalancer::instance)
- .map(LoadBalancerInstance::networks)
- .forEach(trustedNetworks::addAll);
- });
-
- switch (node.type()) {
- case tenant:
- // Tenant nodes in other states than ready, trust:
- // - config servers
- // - proxy nodes
- // - parents of the nodes in the same application: If some of the nodes are on a different IP versions
- // or only a subset of them are dual-stacked, the communication between the nodes may be NATed
- // with via parent's IP address.
- trustedNodes.addAll(candidates.nodeType(NodeType.config).asList());
- trustedNodes.addAll(candidates.nodeType(NodeType.proxy).asList());
- node.allocation().ifPresent(allocation ->
- trustedNodes.addAll(candidates.parentsOf(candidates.owner(allocation.owner())).asList()));
-
- if (node.state() == State.ready) {
- // Tenant nodes in state ready, trust:
- // - All tenant nodes in zone. When a ready node is allocated to a an application there's a brief
- // window where current ACLs have not yet been applied on the node. To avoid service disruption
- // during this window, ready tenant nodes trust all other tenant nodes.
- trustedNodes.addAll(candidates.nodeType(NodeType.tenant).asList());
- }
- break;
-
- case config:
- // Config servers trust:
- // - all nodes
- // - port 4443 from the world
- trustedNodes.addAll(candidates.asList());
- trustedPorts.add(4443);
- break;
-
- case proxy:
- // Proxy nodes trust:
- // - config servers
- // - all connections from the world on 4080 (insecure tb removed), and 4443
- trustedNodes.addAll(candidates.nodeType(NodeType.config).asList());
- trustedPorts.add(443);
- trustedPorts.add(4080);
- trustedPorts.add(4443);
- break;
-
- case controller:
- // Controllers:
- // - port 4443 (HTTPS + Athenz) from the world
- // - port 443 (HTTPS + Okta) from the world
- // - port 80 (HTTP) from the world - for redirect to HTTPS/443 only
- trustedPorts.add(4443);
- trustedPorts.add(443);
- trustedPorts.add(80);
- break;
-
- default:
- illegal("Don't know how to create ACL for " + node + " of type " + node.type());
- }
-
- return new NodeAcl(node, trustedNodes, trustedNetworks, trustedPorts);
- }
-
- /**
- * Creates a list of node ACLs which identify which nodes the given node should trust
- *
- * @param node Node for which to generate ACLs
- * @param children Return ACLs for the children of the given node (e.g. containers on a Docker host)
- * @return List of node ACLs
- */
- public List<NodeAcl> getNodeAcls(Node node, boolean children) {
- NodeList candidates = list();
- if (children) {
- return candidates.childrenOf(node).asList().stream()
- .map(childNode -> getNodeAcl(childNode, candidates))
- .collect(Collectors.toUnmodifiableList());
- }
- return List.of(getNodeAcl(node, candidates));
- }
-
- /**
* Returns whether the zone managed by this node repository seems to be working.
* If too many nodes are not responding, there is probably some zone-wide issue
* and we should probably refrain from making changes to it.
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancers.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancers.java
index ff6a147b8c0..b9d825f51c9 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancers.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/lb/LoadBalancers.java
@@ -2,9 +2,20 @@
package com.yahoo.vespa.hosted.provision.lb;
import com.yahoo.config.provision.ApplicationId;
+import com.yahoo.config.provision.NodeType;
+import com.yahoo.vespa.hosted.provision.Node;
+import com.yahoo.vespa.hosted.provision.NodeList;
+import com.yahoo.vespa.hosted.provision.NodeRepository;
+import com.yahoo.vespa.hosted.provision.node.NodeAcl;
import com.yahoo.vespa.hosted.provision.persistence.CuratorDatabaseClient;
+import java.util.Comparator;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
import java.util.function.Predicate;
+import java.util.stream.Collectors;
/**
* @author bratseth
@@ -12,9 +23,11 @@ import java.util.function.Predicate;
public class LoadBalancers {
private final CuratorDatabaseClient db;
+ private final NodeRepository nodeRepository;
- public LoadBalancers(CuratorDatabaseClient db) {
+ public LoadBalancers(CuratorDatabaseClient db, NodeRepository nodeRepository) {
this.db = db;
+ this.nodeRepository = nodeRepository;
}
/** Returns a filterable list of all load balancers in this repository */
@@ -31,4 +44,106 @@ public class LoadBalancers {
return LoadBalancerList.copyOf(db.readLoadBalancers(predicate).values());
}
+ /**
+ * Returns the ACL for the node (trusted nodes, networks and ports)
+ */
+ private NodeAcl getNodeAcl(Node node, NodeList candidates) {
+ Set<Node> trustedNodes = new TreeSet<>(Comparator.comparing(Node::hostname));
+ Set<Integer> trustedPorts = new LinkedHashSet<>();
+ Set<String> trustedNetworks = new LinkedHashSet<>();
+
+ // For all cases below, trust:
+ // - SSH: If the Docker host has one container, and it is using the Docker host's network namespace,
+ // opening up SSH to the Docker host is done here as a trusted port. For simplicity all nodes have
+ // SSH opened (which is safe for 2 reasons: SSH daemon is not run inside containers, and NPT networks
+ // will (should) not forward port 22 traffic to container).
+ // - parent host (for health checks and metrics)
+ // - nodes in same application
+ // - load balancers allocated to application
+ trustedPorts.add(22);
+ candidates.parentOf(node).ifPresent(trustedNodes::add);
+ node.allocation().ifPresent(allocation -> {
+ trustedNodes.addAll(candidates.owner(allocation.owner()).asList());
+ list(allocation.owner()).asList()
+ .stream()
+ .map(LoadBalancer::instance)
+ .map(LoadBalancerInstance::networks)
+ .forEach(trustedNetworks::addAll);
+ });
+
+ switch (node.type()) {
+ case tenant:
+ // Tenant nodes in other states than ready, trust:
+ // - config servers
+ // - proxy nodes
+ // - parents of the nodes in the same application: If some of the nodes are on a different IP versions
+ // or only a subset of them are dual-stacked, the communication between the nodes may be NATed
+ // with via parent's IP address.
+ trustedNodes.addAll(candidates.nodeType(NodeType.config).asList());
+ trustedNodes.addAll(candidates.nodeType(NodeType.proxy).asList());
+ node.allocation().ifPresent(allocation ->
+ trustedNodes.addAll(candidates.parentsOf(candidates.owner(allocation.owner())).asList()));
+
+ if (node.state() == Node.State.ready) {
+ // Tenant nodes in state ready, trust:
+ // - All tenant nodes in zone. When a ready node is allocated to a an application there's a brief
+ // window where current ACLs have not yet been applied on the node. To avoid service disruption
+ // during this window, ready tenant nodes trust all other tenant nodes.
+ trustedNodes.addAll(candidates.nodeType(NodeType.tenant).asList());
+ }
+ break;
+
+ case config:
+ // Config servers trust:
+ // - all nodes
+ // - port 4443 from the world
+ trustedNodes.addAll(candidates.asList());
+ trustedPorts.add(4443);
+ break;
+
+ case proxy:
+ // Proxy nodes trust:
+ // - config servers
+ // - all connections from the world on 4080 (insecure tb removed), and 4443
+ trustedNodes.addAll(candidates.nodeType(NodeType.config).asList());
+ trustedPorts.add(443);
+ trustedPorts.add(4080);
+ trustedPorts.add(4443);
+ break;
+
+ case controller:
+ // Controllers:
+ // - port 4443 (HTTPS + Athenz) from the world
+ // - port 443 (HTTPS + Okta) from the world
+ // - port 80 (HTTP) from the world - for redirect to HTTPS/443 only
+ trustedPorts.add(4443);
+ trustedPorts.add(443);
+ trustedPorts.add(80);
+ break;
+
+ default:
+ throw new IllegalArgumentException("Don't know how to create ACL for " + node +
+ " of type " + node.type());
+ }
+
+ return new NodeAcl(node, trustedNodes, trustedNetworks, trustedPorts);
+ }
+
+ /**
+ * Creates a list of node ACLs which identify which nodes the given node should trust
+ *
+ * @param node Node for which to generate ACLs
+ * @param children Return ACLs for the children of the given node (e.g. containers on a Docker host)
+ * @return List of node ACLs
+ */
+ public List<NodeAcl> getNodeAcls(Node node, boolean children) {
+ NodeList candidates = nodeRepository.list();
+ if (children) {
+ return candidates.childrenOf(node).asList().stream()
+ .map(childNode -> getNodeAcl(childNode, candidates))
+ .collect(Collectors.toUnmodifiableList());
+ }
+ return List.of(getNodeAcl(node, candidates));
+ }
+
}
diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java
index 07e93111b6f..149e2f5d22b 100644
--- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java
+++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/restapi/NodeAclResponse.java
@@ -42,7 +42,7 @@ public class NodeAclResponse extends HttpResponse {
Node node = nodeRepository.getNode(hostname)
.orElseThrow(() -> new NotFoundException("No node with hostname '" + hostname + "'"));
- List<NodeAcl> acls = nodeRepository.getNodeAcls(node, aclsForChildren);
+ List<NodeAcl> acls = nodeRepository.loadBalancers().getNodeAcls(node, aclsForChildren);
Cursor trustedNodesArray = object.setArray("trustedNodes");
acls.forEach(nodeAcl -> toSlime(nodeAcl, trustedNodesArray));
diff --git a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java
index 5da13f05763..7075f4dd80b 100644
--- a/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java
+++ b/node-repository/src/test/java/com/yahoo/vespa/hosted/provision/provisioning/AclProvisioningTest.java
@@ -52,7 +52,7 @@ public class AclProvisioningTest {
// Get trusted nodes for the first active node
Node node = activeNodes.get(0);
List<Node> host = node.parentHostname().flatMap(tester.nodeRepository()::getNode).map(List::of).orElseGet(List::of);
- Supplier<List<NodeAcl>> nodeAcls = () -> tester.nodeRepository().getNodeAcls(node, false);
+ Supplier<List<NodeAcl>> nodeAcls = () -> tester.nodeRepository().loadBalancers().getNodeAcls(node, false);
// Trusted nodes are active nodes in same application, proxy nodes and config servers
assertAcls(List.of(activeNodes, proxyNodes, configServers, host),
@@ -73,7 +73,7 @@ public class AclProvisioningTest {
// Get trusted nodes for a ready tenant node
Node node = tester.nodeRepository().getNodes(NodeType.tenant, Node.State.ready).get(0);
- List<NodeAcl> nodeAcls = tester.nodeRepository().getNodeAcls(node, false);
+ List<NodeAcl> nodeAcls = tester.nodeRepository().loadBalancers().getNodeAcls(node, false);
List<Node> tenantNodes = tester.nodeRepository().getNodes(NodeType.tenant);
// Trusted nodes are all proxy-, config-, and, tenant-nodes
@@ -95,7 +95,7 @@ public class AclProvisioningTest {
// Get trusted nodes for the first config server
Node node = tester.nodeRepository().getNode("cfg1")
.orElseThrow(() -> new RuntimeException("Failed to find cfg1"));
- List<NodeAcl> nodeAcls = tester.nodeRepository().getNodeAcls(node, false);
+ List<NodeAcl> nodeAcls = tester.nodeRepository().loadBalancers().getNodeAcls(node, false);
// Trusted nodes is all tenant nodes, all proxy nodes, all config servers and load balancer subnets
assertAcls(List.of(tenantNodes, proxyNodes, configServers), Set.of("10.2.3.0/24", "10.4.5.0/24"), nodeAcls);
@@ -116,7 +116,7 @@ public class AclProvisioningTest {
// Get trusted nodes for first proxy node
List<Node> proxyNodes = tester.nodeRepository().getNodes(zoneApplication);
Node node = proxyNodes.get(0);
- List<NodeAcl> nodeAcls = tester.nodeRepository().getNodeAcls(node, false);
+ List<NodeAcl> nodeAcls = tester.nodeRepository().loadBalancers().getNodeAcls(node, false);
// Trusted nodes is all config servers and all proxy nodes
assertAcls(List.of(proxyNodes, configServers), nodeAcls);
@@ -132,7 +132,7 @@ public class AclProvisioningTest {
List<Node> dockerNodes = tester.makeReadyVirtualDockerNodes(5, new NodeResources(1, 4, 10, 1),
dockerHostNodeUnderTest.hostname());
- List<NodeAcl> acls = tester.nodeRepository().getNodeAcls(dockerHostNodeUnderTest, true);
+ List<NodeAcl> acls = tester.nodeRepository().loadBalancers().getNodeAcls(dockerHostNodeUnderTest, true);
// ACLs for each container on the Docker host
assertFalse(dockerNodes.isEmpty());
@@ -156,7 +156,7 @@ public class AclProvisioningTest {
List<Node> controllers = tester.deploy(controllerApplication, Capacity.fromRequiredNodeType(NodeType.controller));
// Controllers and hosts all trust each other
- List<NodeAcl> controllerAcls = tester.nodeRepository().getNodeAcls(controllers.get(0), false);
+ List<NodeAcl> controllerAcls = tester.nodeRepository().loadBalancers().getNodeAcls(controllers.get(0), false);
assertAcls(List.of(controllers), controllerAcls);
assertEquals(Set.of(22, 80, 4443, 443), controllerAcls.get(0).trustedPorts());
}
@@ -184,7 +184,7 @@ public class AclProvisioningTest {
// ACL for nodes with allocation trust their respective load balancer networks, if any
for (var host : hosts) {
- var acls = tester.nodeRepository().getNodeAcls(host, true);
+ var acls = tester.nodeRepository().loadBalancers().getNodeAcls(host, true);
assertEquals(2, acls.size());
assertEquals(Set.of(), acls.get(0).trustedNetworks());
assertEquals(application, acls.get(1).node().allocation().get().owner());
@@ -197,7 +197,7 @@ public class AclProvisioningTest {
tester.makeConfigServers(3, "default", Version.fromString("6.123.456"));
List<Node> readyNodes = tester.makeReadyNodes(1, "default", NodeType.proxy);
- List<NodeAcl> nodeAcls = tester.nodeRepository().getNodeAcls(readyNodes.get(0), false);
+ List<NodeAcl> nodeAcls = tester.nodeRepository().loadBalancers().getNodeAcls(readyNodes.get(0), false);
assertEquals(3, nodeAcls.get(0).trustedNodes().size());
Iterator<Node> trustedNodes = nodeAcls.get(0).trustedNodes().iterator();