diff options
7 files changed, 66 insertions, 15 deletions
diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java index bc94c39d135..b30b1f96dbb 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java @@ -45,6 +45,10 @@ public interface Docker { Map<String, Object> getBlkioStats(); } + default boolean networkNATted() { + return false; + } + Optional<ContainerStats> getContainerStats(ContainerName containerName); void startContainer(ContainerName containerName); @@ -113,5 +117,5 @@ public interface Docker { */ ProcessResult executeInContainerAsRoot(ContainerName containerName, Long timeoutSeconds, String... command); - + String getGlobalIPv6Address(ContainerName name); } diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java index fa093e0b4dc..e180dcffe47 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java @@ -4,6 +4,7 @@ package com.yahoo.vespa.hosted.dockerapi; import com.github.dockerjava.api.DockerClient; import com.github.dockerjava.api.command.ExecCreateCmdResponse; import com.github.dockerjava.api.command.ExecStartCmd; +import com.github.dockerjava.api.command.InspectContainerCmd; import com.github.dockerjava.api.command.InspectContainerResponse; import com.github.dockerjava.api.command.InspectExecResponse; import com.github.dockerjava.api.command.InspectImageResponse; @@ -125,15 +126,23 @@ public class DockerImpl implements Docker { Duration minAgeToDelete = Duration.ofMinutes(config.imageGCMinTimeToLiveMinutes()); dockerImageGC = Optional.of(new DockerImageGarbageCollector(minAgeToDelete)); - try { - setupDockerNetworkIfNeeded(); - } catch (Exception e) { - throw new DockerException("Could not setup docker network", e); + + if (!config.networkNATted()) { + try { + setupDockerNetworkIfNeeded(); + } catch (Exception e) { + throw new DockerException("Could not setup docker network", e); + } } } } } + @Override + public boolean networkNATted() { + return config.networkNATted(); + } + static DefaultDockerClientConfig.Builder buildDockerClientConfig(DockerConfig config) { DefaultDockerClientConfig.Builder dockerConfigBuilder = new DefaultDockerClientConfig.Builder() .withDockerHost(config.uri()); @@ -393,6 +402,12 @@ public class DockerImpl implements Docker { return asContainer(containerName.asString()).findFirst(); } + @Override + public String getGlobalIPv6Address(ContainerName name) { + InspectContainerCmd cmd = dockerClient.inspectContainerCmd(name.asString()); + return cmd.exec().getNetworkSettings().getGlobalIPv6Address(); + } + private Stream<Container> asContainer(String container) { return inspectContainerCmd(container) .map(response -> diff --git a/docker-api/src/main/resources/configdefinitions/docker.def b/docker-api/src/main/resources/configdefinitions/docker.def index 5c6e52b2f63..20b156a9a6d 100644 --- a/docker-api/src/main/resources/configdefinitions/docker.def +++ b/docker-api/src/main/resources/configdefinitions/docker.def @@ -14,3 +14,5 @@ readTimeoutMillis int default = 1800000 # 30 min isRunningLocally bool default = false imageGCMinTimeToLiveMinutes int default = 45 + +networkNATted bool default = false diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index d2863468ee7..ea4ae79ccfe 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -12,6 +12,7 @@ import com.yahoo.vespa.hosted.dockerapi.DockerImpl; import com.yahoo.vespa.hosted.dockerapi.DockerNetworkCreator; import com.yahoo.vespa.hosted.dockerapi.ProcessResult; import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec; +import com.yahoo.vespa.hosted.node.admin.maintenance.acl.iptables.NATCommand; import com.yahoo.vespa.hosted.node.admin.util.Environment; import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger; @@ -104,14 +105,13 @@ public class DockerOperationsImpl implements DockerOperations { String configServers = environment.getConfigServerUris().stream() .map(URI::getHost) .collect(Collectors.joining(",")); + Docker.CreateContainerCommand command = docker.createContainerCommand( nodeSpec.wantedDockerImage.get(), ContainerResources.from(nodeSpec.minCpuCores, nodeSpec.minMainMemoryAvailableGb), containerName, nodeSpec.hostname) .withManagedBy(MANAGER_NAME) - .withNetworkMode(DockerImpl.DOCKER_CUSTOM_MACVLAN_NETWORK_NAME) - .withIpAddress(nodeInetAddress) .withEnvironment("CONFIG_SERVER_ADDRESS", configServers) .withUlimit("nofile", 262_144, 262_144) .withUlimit("nproc", 32_768, 409_600) @@ -119,6 +119,12 @@ public class DockerOperationsImpl implements DockerOperations { .withAddCapability("SYS_PTRACE") // Needed for gcore, pstack etc. .withAddCapability("SYS_ADMIN"); // Needed for perf + if (!docker.networkNATted()) { + logger.info("Network not natted - setting up with specific ip address on a macvlan"); + command.withIpAddress(nodeInetAddress); + command.withNetworkMode(DockerImpl.DOCKER_CUSTOM_MACVLAN_NETWORK_NAME); + } + command.withVolume("/etc/hosts", "/etc/hosts"); for (String pathInNode : DIRECTORIES_TO_MOUNT.keySet()) { String pathInHost = environment.pathInHostFromPathInNode(containerName, pathInNode).toString(); @@ -137,11 +143,15 @@ public class DockerOperationsImpl implements DockerOperations { command.create(); if (isIPv6) { - docker.connectContainerToNetwork(containerName, "bridge"); + if (!docker.networkNATted()) { + docker.connectContainerToNetwork(containerName, "bridge"); + } + docker.startContainer(containerName); - setupContainerNetworkingWithScript(containerName); + setupContainerNetworkConnectivity(containerName, nodeInetAddress); } else { docker.startContainer(containerName); + setupContainerNetworkConnectivity(containerName, nodeInetAddress); } DIRECTORIES_TO_MOUNT.entrySet().stream().filter(Map.Entry::getValue).forEach(entry -> @@ -191,13 +201,24 @@ public class DockerOperationsImpl implements DockerOperations { } /** + * For macvlan: * Due to a bug in docker (https://github.com/docker/libnetwork/issues/1443), we need to manually set * IPv6 gateway in containers connected to more than one docker network + * + * For nat: + * Setup iptables NAT rules */ - private void setupContainerNetworkingWithScript(ContainerName containerName) throws IOException { - InetAddress hostDefaultGateway = DockerNetworkCreator.getDefaultGatewayLinux(true); - executeCommandInNetworkNamespace(containerName, - "route", "-A", "inet6", "add", "default", "gw", hostDefaultGateway.getHostAddress(), "dev", "eth1"); + private void setupContainerNetworkConnectivity(ContainerName containerName, InetAddress externalAddress) throws IOException { + if (docker.networkNATted()) { + String ipv6Str = docker.getGlobalIPv6Address(containerName); + String natCommand = NATCommand.create(externalAddress, InetAddress.getByName(ipv6Str), "eth0"); + PrefixLogger logger = PrefixLogger.getNodeAgentLogger(DockerOperationsImpl.class, containerName); + logger.info("Please setup these rules:" + natCommand); + } else { + InetAddress hostDefaultGateway = DockerNetworkCreator.getDefaultGatewayLinux(true); + executeCommandInNetworkNamespace(containerName, + "route", "-A", "inet6", "add", "default", "gw", hostDefaultGateway.getHostAddress(), "dev", "eth1"); + } } @Override diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/iptables/NATCommand.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/iptables/NATCommand.java index 87bb5fddf23..05ccaebe04b 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/iptables/NATCommand.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/maintenance/acl/iptables/NATCommand.java @@ -17,7 +17,7 @@ public class NATCommand implements Command { private final String snatCommand; private final String dnatCommand; - NATCommand(InetAddress externalIp, InetAddress internalIp, String iface) { + public NATCommand(InetAddress externalIp, InetAddress internalIp, String iface) { String command = externalIp instanceof Inet6Address ? "ip6tables" : "iptables"; this.snatCommand = String.format("%s -t nat -A POSTROUTING -o %s -s %s -j SNAT --to %s", command, @@ -39,4 +39,8 @@ public class NATCommand implements Command { @Override public String asString(String commandName) { return asString(); } + + public static String create(InetAddress externalIp, InetAddress internalIp, String iface) { + return new NATCommand(externalIp, internalIp, iface).asString(); + } } diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerMock.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerMock.java index 7a5d713936d..8a4c4fd8c88 100644 --- a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerMock.java +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/integrationTests/DockerMock.java @@ -162,6 +162,11 @@ public class DockerMock implements Docker { } + @Override + public String getGlobalIPv6Address(ContainerName name) { + return "2001:db8:1:2:0:242:ac13:2"; + } + public static class StartContainerCommandMock implements CreateContainerCommand { @Override public CreateContainerCommand withLabel(String name, String value) { diff --git a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java index d0149be2747..12d84a05750 100644 --- a/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java +++ b/node-repository/src/main/java/com/yahoo/vespa/hosted/provision/provisioning/NodePrioritizer.java @@ -177,7 +177,7 @@ public class NodePrioritizer { if (node.type() == NodeType.host && node.state() == Node.State.active) { boolean conflictingCluster = false; NodeList list = new NodeList(allNodes); - NodeList childrenWithSameApp = list.childNodes(node).owner(appId); + NodeList childrenWithSameApp = list.childNodes(node).owner(appId); for (Node child : childrenWithSameApp.asList()) { // Look for nodes from the same cluster if (child.allocation().get().membership().cluster().id().equals(clusterSpec.id())) { |