From 0a149c3822befc0a0925f88570f60e985fe5041c Mon Sep 17 00:00:00 2001 From: toby Date: Wed, 14 Feb 2018 11:49:24 +0100 Subject: Replace NAT implementation with NPT --- .../com/yahoo/vespa/hosted/dockerapi/Docker.java | 2 +- .../yahoo/vespa/hosted/dockerapi/DockerImpl.java | 2 +- .../node/admin/docker/DockerOperationsImpl.java | 52 ++++++---------------- 3 files changed, 16 insertions(+), 40 deletions(-) diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java index 00493e3e016..e006d5aca4c 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/Docker.java @@ -45,7 +45,7 @@ public interface Docker { Map getBlkioStats(); } - default boolean networkNATed() { + default boolean networkNPTed() { return false; } diff --git a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java index 15e88f4f253..a72865e023a 100644 --- a/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java +++ b/docker-api/src/main/java/com/yahoo/vespa/hosted/dockerapi/DockerImpl.java @@ -139,7 +139,7 @@ public class DockerImpl implements Docker { } @Override - public boolean networkNATed() { + public boolean networkNPTed() { return config.networkNATed(); } diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index 08eb69e9396..5cf197bd233 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -12,8 +12,8 @@ import com.yahoo.vespa.hosted.dockerapi.DockerImpl; import com.yahoo.vespa.hosted.dockerapi.DockerNetworkCreator; import com.yahoo.vespa.hosted.dockerapi.ProcessResult; import com.yahoo.vespa.hosted.node.admin.ContainerNodeSpec; -import com.yahoo.vespa.hosted.node.admin.maintenance.acl.iptables.NATCommand; import com.yahoo.vespa.hosted.node.admin.component.Environment; +import com.yahoo.vespa.hosted.node.admin.maintenance.acl.iptables.NATCommand; import com.yahoo.vespa.hosted.node.admin.util.PrefixLogger; import java.io.IOException; @@ -120,11 +120,15 @@ public class DockerOperationsImpl implements DockerOperations { .withAddCapability("SYS_PTRACE") // Needed for gcore, pstack etc. .withAddCapability("SYS_ADMIN"); // Needed for perf - if (!docker.networkNATed()) { - logger.info("Network not nated - setting up with specific ip address on a macvlan"); + if (!docker.networkNPTed()) { + logger.info("Network not NPTed - setting up container with public ip address on a macvlan"); command.withIpAddress(nodeInetAddress); command.withNetworkMode(DockerImpl.DOCKER_CUSTOM_MACVLAN_NETWORK_NAME); command.withVolume("/etc/hosts", "/etc/hosts"); // TODO This is probably not nessesary - review later + } else { + logger.info("Network is NPTed - setting up container with private ip address"); + command.withIpAddress(nodeInetAddress); + command.withNetworkMode("vespa-bridge"); } for (String pathInNode : DIRECTORIES_TO_MOUNT.keySet()) { @@ -144,17 +148,14 @@ public class DockerOperationsImpl implements DockerOperations { command.create(); if (isIPv6) { - if (!docker.networkNATed()) { + if (!docker.networkNPTed()) { docker.connectContainerToNetwork(containerName, "bridge"); } docker.startContainer(containerName); - setupContainerNetworkConnectivity(containerName, nodeInetAddress); + setupContainerNetworkConnectivity(containerName); } else { docker.startContainer(containerName); - if (docker.networkNATed()) { - setupContainerNetworkConnectivity(containerName, nodeInetAddress); - } } DIRECTORIES_TO_MOUNT.entrySet().stream().filter(Map.Entry::getValue).forEach(entry -> @@ -176,7 +177,7 @@ public class DockerOperationsImpl implements DockerOperations { logger.info("Deleting container " + containerName.asString()); docker.deleteContainer(containerName); - if (docker.networkNATed()) { + if (docker.networkNPTed()) { logger.info("Delete iptables NAT rules for " + containerName.asString()); try { InetAddress nodeInetAddress = environment.getInetAddressForHost(nodeSpec.hostname); @@ -222,16 +223,12 @@ public class DockerOperationsImpl implements DockerOperations { /** * For macvlan: + *

* Due to a bug in docker (https://github.com/docker/libnetwork/issues/1443), we need to manually set * IPv6 gateway in containers connected to more than one docker network - * - * For nat: - * Setup iptables NAT rules to map the hosts public ips to the containers */ - private void setupContainerNetworkConnectivity(ContainerName containerName, InetAddress externalAddress) throws IOException { - if (docker.networkNATed()) { - insertNAT(containerName, externalAddress); - } else { + private void setupContainerNetworkConnectivity(ContainerName containerName) throws IOException { + if (!docker.networkNPTed()) { InetAddress hostDefaultGateway = DockerNetworkCreator.getDefaultGatewayLinux(true); executeCommandInNetworkNamespace(containerName, "route", "-A", "inet6", "add", "default", "gw", hostDefaultGateway.getHostAddress(), "dev", "eth1"); @@ -275,7 +272,7 @@ public class DockerOperationsImpl implements DockerOperations { final String[] wrappedCommand = Stream.concat( Stream.of("sudo", "nsenter", String.format("--net=/host/proc/%d/ns/net", containerPid), "--"), Stream.of(command)) - .toArray(String[]::new); + .toArray(String[]::new); try { Pair result = processExecuter.exec(wrappedCommand); @@ -327,25 +324,4 @@ public class DockerOperationsImpl implements DockerOperations { public void deleteUnusedDockerImages() { docker.deleteUnusedDockerImages(); } - - /** - * Only insert NAT rules if they don't exist (or else they compounded) - */ - private void insertNAT(ContainerName containerName, InetAddress externalAddress) throws IOException { - PrefixLogger logger = PrefixLogger.getNodeAgentLogger(DockerOperationsImpl.class, containerName); - String ipv6Str = docker.getGlobalIPv6Address(containerName); - - // Check if exist - String checkCommand = NATCommand.check(externalAddress, InetAddress.getByName(ipv6Str)); - Pair result = processExecuter.exec(checkCommand); - if (result.getFirst() == 0 ) return; - - // Setup NAT - String natCommand = NATCommand.insert(externalAddress, InetAddress.getByName(ipv6Str)); - logger.info("Setting up NAT rules: " + natCommand); - result = processExecuter.exec(checkCommand); - if (result.getFirst() != 0 ) { - throw new IOException("Unable to setup NAT rule - error message: " + result.getSecond()); - } - } } -- cgit v1.2.3 From d6624fca2b590536dcb67b1e600b5f9975a62117 Mon Sep 17 00:00:00 2001 From: toby Date: Wed, 14 Feb 2018 13:13:37 +0100 Subject: Start container on a NPTed network with a private address --- .../node/admin/docker/DockerOperationsImpl.java | 11 +++++-- .../node/admin/docker/NetworkPrefixTranslator.java | 38 ++++++++++++++++++++++ .../admin/docker/NetworkPrefixTranslatorTest.java | 36 ++++++++++++++++++++ 3 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java create mode 100644 node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslatorTest.java diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index 5cf197bd233..96cde6f9f64 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -121,13 +121,16 @@ public class DockerOperationsImpl implements DockerOperations { .withAddCapability("SYS_ADMIN"); // Needed for perf if (!docker.networkNPTed()) { - logger.info("Network not NPTed - setting up container with public ip address on a macvlan"); + logger.info("Network is macvlan - setting up container with public ip address on a macvlan"); command.withIpAddress(nodeInetAddress); command.withNetworkMode(DockerImpl.DOCKER_CUSTOM_MACVLAN_NETWORK_NAME); command.withVolume("/etc/hosts", "/etc/hosts"); // TODO This is probably not nessesary - review later } else { logger.info("Network is NPTed - setting up container with private ip address"); - command.withIpAddress(nodeInetAddress); + command.withIpAddress(NetworkPrefixTranslator.translate( + nodeInetAddress, + InetAddress.getByName("fd00::"), + 64)); command.withNetworkMode("vespa-bridge"); } @@ -165,6 +168,10 @@ public class DockerOperationsImpl implements DockerOperations { } } + private InetAddress toPrivateSubnet(InetAddress nodeInetAddress) { + return null; + } + @Override public void removeContainer(final Container existingContainer, ContainerNodeSpec nodeSpec) { final ContainerName containerName = existingContainer.name; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java new file mode 100644 index 00000000000..70c58def24f --- /dev/null +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java @@ -0,0 +1,38 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +/** + * @author smorgrav + */ +package com.yahoo.vespa.hosted.node.admin.docker; + +import java.net.Inet6Address; +import java.net.InetAddress; +import java.net.UnknownHostException; + +class NetworkPrefixTranslator { + + /** + * For NPTed networks we want to find the private address from a public. + * + * @param address The original address to translate + * @param prefix The prefix address + * @param subnetSize nof bits - e.g /64 subnet is 64 + * @return The translated address + */ + static Inet6Address translate(InetAddress address, InetAddress prefix, int subnetSize) { + + byte[] originalAddress = address.getAddress(); + byte[] prefixAddress = prefix.getAddress(); + byte[] translatedAddress = new byte[16]; + + for (int i = 0; i < 16; i++) { + translatedAddress[i] = i < subnetSize / 8 ? prefixAddress[i] : originalAddress[i]; + } + + try { + return (Inet6Address) InetAddress.getByAddress(address.getHostName(), translatedAddress); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + } +} diff --git a/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslatorTest.java b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslatorTest.java new file mode 100644 index 00000000000..96afe685a61 --- /dev/null +++ b/node-admin/src/test/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslatorTest.java @@ -0,0 +1,36 @@ +// Copyright 2018 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +/** + * @author smorgrav + */ +package com.yahoo.vespa.hosted.node.admin.docker; + +import org.junit.Assert; +import org.junit.Test; + +import java.net.Inet6Address; +import java.net.InetAddress; +import java.net.UnknownHostException; + +public class NetworkPrefixTranslatorTest { + + @Test + public void translator_with_valid_parameters() throws UnknownHostException { + + // Test simplest possible address + Inet6Address original = (Inet6Address)InetAddress.getByName("2001:db8::1"); + Inet6Address prefix = (Inet6Address)InetAddress.getByName("fd00::"); + Inet6Address translated = NetworkPrefixTranslator.translate(original, prefix, 64); + Assert.assertEquals("fd00:0:0:0:0:0:0:1", translated.getHostAddress()); + + + // Test an actual aws address we use + original = (Inet6Address)InetAddress.getByName("2600:1f16:f34:5300:ccc6:1703:b7c2:369d"); + translated = NetworkPrefixTranslator.translate(original, prefix, 64); + Assert.assertEquals("fd00:0:0:0:ccc6:1703:b7c2:369d", translated.getHostAddress()); + + // Test different subnet size + translated = NetworkPrefixTranslator.translate(original, prefix, 48); + Assert.assertEquals("fd00:0:0:5300:ccc6:1703:b7c2:369d", translated.getHostAddress()); + } +} -- cgit v1.2.3 From 222730f5ff40dd66a349662c88c64548446a27a4 Mon Sep 17 00:00:00 2001 From: toby Date: Wed, 14 Feb 2018 15:24:54 +0100 Subject: Remove unused code, move out static strings --- .../hosted/node/admin/docker/DockerOperationsImpl.java | 13 +++++-------- .../hosted/node/admin/docker/NetworkPrefixTranslator.java | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java index 96cde6f9f64..b30cac2476e 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/DockerOperationsImpl.java @@ -45,6 +45,9 @@ public class DockerOperationsImpl implements DockerOperations { private static final String MANAGER_NAME = "node-admin"; + private static final String LOCAL_IPV6_PREFIX = "fd00::"; + private static final String DOCKER_CUSTOM_BRIDGE_NETWORK_NAME = "vespa-bridge"; + // Map of directories to mount and whether they should be writable by everyone private static final Map DIRECTORIES_TO_MOUNT = new HashMap<>(); @@ -121,17 +124,15 @@ public class DockerOperationsImpl implements DockerOperations { .withAddCapability("SYS_ADMIN"); // Needed for perf if (!docker.networkNPTed()) { - logger.info("Network is macvlan - setting up container with public ip address on a macvlan"); command.withIpAddress(nodeInetAddress); command.withNetworkMode(DockerImpl.DOCKER_CUSTOM_MACVLAN_NETWORK_NAME); command.withVolume("/etc/hosts", "/etc/hosts"); // TODO This is probably not nessesary - review later } else { - logger.info("Network is NPTed - setting up container with private ip address"); command.withIpAddress(NetworkPrefixTranslator.translate( nodeInetAddress, - InetAddress.getByName("fd00::"), + InetAddress.getByName(LOCAL_IPV6_PREFIX), 64)); - command.withNetworkMode("vespa-bridge"); + command.withNetworkMode(DOCKER_CUSTOM_BRIDGE_NETWORK_NAME); } for (String pathInNode : DIRECTORIES_TO_MOUNT.keySet()) { @@ -168,10 +169,6 @@ public class DockerOperationsImpl implements DockerOperations { } } - private InetAddress toPrivateSubnet(InetAddress nodeInetAddress) { - return null; - } - @Override public void removeContainer(final Container existingContainer, ContainerNodeSpec nodeSpec) { final ContainerName containerName = existingContainer.name; diff --git a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java index 70c58def24f..a52dedb90e5 100644 --- a/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java +++ b/node-admin/src/main/java/com/yahoo/vespa/hosted/node/admin/docker/NetworkPrefixTranslator.java @@ -16,7 +16,7 @@ class NetworkPrefixTranslator { * * @param address The original address to translate * @param prefix The prefix address - * @param subnetSize nof bits - e.g /64 subnet is 64 + * @param subnetSize in bits - e.g a /64 subnet equals 64 bits * @return The translated address */ static Inet6Address translate(InetAddress address, InetAddress prefix, int subnetSize) { -- cgit v1.2.3