diff options
author | Torbjørn Smørgrav <smorgrav@users.noreply.github.com> | 2018-02-14 16:09:05 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-14 16:09:05 +0100 |
commit | fcb0a22a4ad2c57252bd93efc59287a9ef810644 (patch) | |
tree | 040227c7db2bb4e68a0c3c8b8cd68fb22aec9918 /node-admin | |
parent | 48cbaea4364894bae522ba8bff89305e2798fe4a (diff) | |
parent | 222730f5ff40dd66a349662c88c64548446a27a4 (diff) |
Merge pull request #5037 from vespa-engine/smorgrav/replace_nat_with_npt
Replace NAT with NPT
Diffstat (limited to 'node-admin')
3 files changed, 92 insertions, 38 deletions
diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index 08eb69e9396..b30cac2476e 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -12,8 +12,8 @@ import com.yahoo.vespa.hosted.dockerapi.DockerImpl; import com.yahoo.vespa.hosted.dockerapi.DockerNetworkCreator; import com.yahoo.vespa.hosted.dockerapi.ProcessResult; import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec; -import com.yahoo.vespa.hosted.node.admin.maintenance.acl.iptables.NATCommand; import com.yahoo.vespa.hosted.node.admin.component.Environment; +import com.yahoo.vespa.hosted.node.admin.maintenance.acl.iptables.NATCommand; import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger; import java.io.IOException; @@ -45,6 +45,9 @@ public class DockerOperationsImpl implements DockerOperations { private static final String MANAGER_NAME = "node-admin"; + private static final String LOCAL_IPV6_PREFIX = "fd00::"; + private static final String DOCKER_CUSTOM_BRIDGE_NETWORK_NAME = "vespa-bridge"; + // Map of directories to mount and whether they should be writable by everyone private static final Map<String, Boolean> DIRECTORIES_TO_MOUNT = new HashMap<>(); @@ -120,11 +123,16 @@ public class DockerOperationsImpl implements DockerOperations { .withAddCapability("SYS_PTRACE") // Needed for gcore, pstack etc. .withAddCapability("SYS_ADMIN"); // Needed for perf - if (!docker.networkNATed()) { - logger.info("Network not nated - setting up with specific ip address on a macvlan"); + if (!docker.networkNPTed()) { command.withIpAddress(nodeInetAddress); command.withNetworkMode(DockerImpl.DOCKER_CUSTOM_MACVLAN_NETWORK_NAME); command.withVolume("/etc/hosts", "/etc/hosts"); // TODO This is probably not nessesary - review later + } else { + command.withIpAddress(NetworkPrefixTranslator.translate( + nodeInetAddress, + InetAddress.getByName(LOCAL_IPV6_PREFIX), + 64)); + command.withNetworkMode(DOCKER_CUSTOM_BRIDGE_NETWORK_NAME); } for (String pathInNode : DIRECTORIES_TO_MOUNT.keySet()) { @@ -144,17 +152,14 @@ public class DockerOperationsImpl implements DockerOperations { command.create(); if (isIPv6) { - if (!docker.networkNATed()) { + if (!docker.networkNPTed()) { docker.connectContainerToNetwork(containerName, "bridge"); } docker.startContainer(containerName); - setupContainerNetworkConnectivity(containerName, nodeInetAddress); + setupContainerNetworkConnectivity(containerName); } else { docker.startContainer(containerName); - if (docker.networkNATed()) { - setupContainerNetworkConnectivity(containerName, nodeInetAddress); - } } DIRECTORIES_TO_MOUNT.entrySet().stream().filter(Map.Entry::getValue).forEach(entry -> @@ -176,7 +181,7 @@ public class DockerOperationsImpl implements DockerOperations { logger.info("Deleting container " + containerName.asString()); docker.deleteContainer(containerName); - if (docker.networkNATed()) { + if (docker.networkNPTed()) { logger.info("Delete iptables NAT rules for " + containerName.asString()); try { InetAddress nodeInetAddress = environment.getInetAddressForHost(nodeSpec.hostname); @@ -222,16 +227,12 @@ public class DockerOperationsImpl implements DockerOperations { /** * For macvlan: + * <p> * Due to a bug in docker (https://github.com/docker/libnetwork/issues/1443), we need to manually set * IPv6 gateway in containers connected to more than one docker network - * - * For nat: - * Setup iptables NAT rules to map the hosts public ips to the containers */ - private void setupContainerNetworkConnectivity(ContainerName containerName, InetAddress externalAddress) throws IOException { - if (docker.networkNATed()) { - insertNAT(containerName, externalAddress); - } else { + private void setupContainerNetworkConnectivity(ContainerName containerName) throws IOException { + if (!docker.networkNPTed()) { InetAddress hostDefaultGateway = DockerNetworkCreator.getDefaultGatewayLinux(true); executeCommandInNetworkNamespace(containerName, "route", "-A", "inet6", "add", "default", "gw", hostDefaultGateway.getHostAddress(), "dev", "eth1"); @@ -275,7 +276,7 @@ public class DockerOperationsImpl implements DockerOperations { final String[] wrappedCommand = Stream.concat( Stream.of("sudo", "nsenter", String.format("--net=/host/proc/%d/ns/net", containerPid), "--"), Stream.of(command)) - .toArray(String[]::new); + .toArray(String[]::new); try { Pair<Integer, String> result = processExecuter.exec(wrappedCommand); @@ -327,25 +328,4 @@ public class DockerOperationsImpl implements DockerOperations { public void deleteUnusedDockerImages() { docker.deleteUnusedDockerImages(); } - - /** - * Only insert NAT rules if they don't exist (or else they compounded) - */ - private void insertNAT(ContainerName containerName, InetAddress externalAddress) throws IOException { - PrefixLogger logger = PrefixLogger.getNodeAgentLogger(DockerOperationsImpl.class, containerName); - String ipv6Str = docker.getGlobalIPv6Address(containerName); - - // Check if exist - String checkCommand = NATCommand.check(externalAddress, InetAddress.getByName(ipv6Str)); - Pair<Integer, String> result = processExecuter.exec(checkCommand); - if (result.getFirst() == 0 ) return; - - // Setup NAT - String natCommand = NATCommand.insert(externalAddress, InetAddress.getByName(ipv6Str)); - logger.info("Setting up NAT rules: " + natCommand); - result = processExecuter.exec(checkCommand); - if (result.getFirst() != 0 ) { - throw new IOException("Unable to setup NAT rule - error message: " + result.getSecond()); - } - } } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java new file mode 100644 index 00000000000..a52dedb90e5 --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java @@ -0,0 +1,38 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +/** + * @author smorgrav + */ +package com.yahoo.vespa.hosted.node.admin.docker; + +import java.net.Inet6Address; +import java.net.InetAddress; +import java.net.UnknownHostException; + +class NetworkPrefixTranslator { + + /** + * For NPTed networks we want to find the private address from a public. + * + * @param address The original address to translate + * @param prefix The prefix address + * @param subnetSize in bits - e.g a /64 subnet equals 64 bits + * @return The translated address + */ + static Inet6Address translate(InetAddress address, InetAddress prefix, int subnetSize) { + + byte[] originalAddress = address.getAddress(); + byte[] prefixAddress = prefix.getAddress(); + byte[] translatedAddress = new byte[16]; + + for (int i = 0; i < 16; i++) { + translatedAddress[i] = i < subnetSize / 8 ? prefixAddress[i] : originalAddress[i]; + } + + try { + return (Inet6Address) InetAddress.getByAddress(address.getHostName(), translatedAddress); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + } +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslatorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslatorTest.java new file mode 100644 index 00000000000..96afe685a61 --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslatorTest.java @@ -0,0 +1,36 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +/** + * @author smorgrav + */ +package com.yahoo.vespa.hosted.node.admin.docker; + +import org.junit.Assert; +import org.junit.Test; + +import java.net.Inet6Address; +import java.net.InetAddress; +import java.net.UnknownHostException; + +public class NetworkPrefixTranslatorTest { + + @Test + public void translator_with_valid_parameters() throws UnknownHostException { + + // Test simplest possible address + Inet6Address original = (Inet6Address)InetAddress.getByName("2001:db8::1"); + Inet6Address prefix = (Inet6Address)InetAddress.getByName("fd00::"); + Inet6Address translated = NetworkPrefixTranslator.translate(original, prefix, 64); + Assert.assertEquals("fd00:0:0:0:0:0:0:1", translated.getHostAddress()); + + + // Test an actual aws address we use + original = (Inet6Address)InetAddress.getByName("2600:1f16:f34:5300:ccc6:1703:b7c2:369d"); + translated = NetworkPrefixTranslator.translate(original, prefix, 64); + Assert.assertEquals("fd00:0:0:0:ccc6:1703:b7c2:369d", translated.getHostAddress()); + + // Test different subnet size + translated = NetworkPrefixTranslator.translate(original, prefix, 48); + Assert.assertEquals("fd00:0:0:5300:ccc6:1703:b7c2:369d", translated.getHostAddress()); + } +} |