From 1a8910767d7b15ace84508b0b758a320f2a966bb Mon Sep 17 00:00:00 2001 From: "pierrecdn me@pierre-cheynier.net" Date: Thu, 26 Feb 2015 15:04:02 +0100 Subject: [PATCH 1/9] Add routing capabilities (use-cases : more than one container interface, multicast, etc.) --- README.md | 12 ++++++++++++ pipework | 23 +++++++++++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f8c3342..878d598 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ Pipework uses cgroups and namespace and works with "plain" LXC containers * [Peeking inside the private network](#peeking_inside) * [Setting container internal interface](#setting_internal) * [Using a different netmask](#different_netmask) +* [Setting a route on the internal interface](#route_internal) * [Setting a default gateway](#default_gateway) * [Connect a container to a local physical interface](#local_physical) * [Let the Docker host communicate over macvlan interfaces](#macvlan) @@ -157,6 +158,17 @@ after the IP address and subnet mask: pipework br1 $CONTAINERID 192.168.4.25/20@192.168.4.1 + +### Setting routes on the internal interface + +In you add more than one internal interface, you may want to add other routes than the default one. +This could be performed by adding network and masks after the gateway (comma-separated) + + pipework br1 $CONTAINERID 192.168.4.25/20@192.168.4.1 192.168.5.0/25,192.168.6.0/24 + +Please note that the last added internal interface will take the default route + + ### Connect a container to a local physical interface diff --git a/pipework b/pipework index 718491b..bbdf0b5 100755 --- a/pipework +++ b/pipework @@ -18,7 +18,15 @@ fi GUESTNAME=$2 IPADDR=$3 -MACADDR=$4 + +if echo $4 | grep -q : +then + MACADDR=$4 + ROUTES=$5 +else + MACADDR= + ROUTES=$4 +fi if echo $MACADDR | grep -q @ then @@ -31,6 +39,7 @@ fi [ "$IPADDR" ] || [ "$WAIT" ] || { echo "Syntax:" echo "pipework [-i containerinterface] /[@default_gateway] [macaddr][@vlan]" + echo "pipework [-i containerinterface] /[@default_gateway] [macaddr][@vlan] [route1,route2,...]" echo "pipework [-i containerinterface] dhcp [macaddr][@vlan]" echo "pipework --wait [-i containerinterface]" exit 1 @@ -275,9 +284,15 @@ else } ip netns exec $NSPID ip link set $CONTAINER_IFNAME up [ "$GATEWAY" ] && { - ip netns exec $NSPID ip route get $GATEWAY >/dev/null 2>&1 || \ - ip netns exec $NSPID ip route add $GATEWAY/32 dev $CONTAINER_IFNAME - ip netns exec $NSPID ip route replace default via $GATEWAY + ip netns exec $NSPID ip route get $GATEWAY >/dev/null 2>&1 || \ + ip netns exec $NSPID ip route add $GATEWAY/32 dev $CONTAINER_IFNAME + ip netns exec $NSPID ip route replace default via $GATEWAY + [ "$ROUTES" ] && { + ROUTES=`echo $ROUTES | tr ',' ' '`; + for ROUTE in $ROUTES; do + ip netns exec $NSPID ip route add $ROUTE via $GATEWAY dev $CONTAINER_IFNAME + done + } } fi From ed6841d2449042ced7d153e1ed23f17297b5faab Mon Sep 17 00:00:00 2001 From: "pierrecdn me@pierre-cheynier.net" Date: Thu, 26 Feb 2015 15:06:53 +0100 Subject: [PATCH 2/9] Fix typo in README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 878d598..1f0f487 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,8 @@ Pipework uses cgroups and namespace and works with "plain" LXC containers * [Peeking inside the private network](#peeking_inside) * [Setting container internal interface](#setting_internal) * [Using a different netmask](#different_netmask) -* [Setting a route on the internal interface](#route_internal) * [Setting a default gateway](#default_gateway) +* [Setting routes on the internal interface](#route_internal) * [Connect a container to a local physical interface](#local_physical) * [Let the Docker host communicate over macvlan interfaces](#macvlan) * [Wait for the network to be ready](#wait_ready) @@ -161,7 +161,7 @@ after the IP address and subnet mask: ### Setting routes on the internal interface -In you add more than one internal interface, you may want to add other routes than the default one. +In you add more than one internal interface, or perform specific use-cases, you may want to add other routes than the default one. This could be performed by adding network and masks after the gateway (comma-separated) pipework br1 $CONTAINERID 192.168.4.25/20@192.168.4.1 192.168.5.0/25,192.168.6.0/24 From ecefc53f78e87861205fe980f81f9b5c5b44b415 Mon Sep 17 00:00:00 2001 From: "pierrecdn me@pierre-cheynier.net" Date: Mon, 2 Mar 2015 15:11:37 +0100 Subject: [PATCH 3/9] Fix typo in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f0f487..eeae95d 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ after the IP address and subnet mask: ### Setting routes on the internal interface -In you add more than one internal interface, or perform specific use-cases, you may want to add other routes than the default one. +If you add more than one internal interface, or perform specific use-cases, you may want to add other routes than the default one. This could be performed by adding network and masks after the gateway (comma-separated) pipework br1 $CONTAINERID 192.168.4.25/20@192.168.4.1 192.168.5.0/25,192.168.6.0/24 From 1c057be79651ccd6eaeaa056b8a0d7cf389f7bf4 Mon Sep 17 00:00:00 2001 From: "pierrecdn me@pierre-cheynier.net" Date: Mon, 2 Mar 2015 15:12:00 +0100 Subject: [PATCH 4/9] Remove one useless syntax help line I added --- pipework | 1 - 1 file changed, 1 deletion(-) diff --git a/pipework b/pipework index bbdf0b5..d722ecf 100755 --- a/pipework +++ b/pipework @@ -38,7 +38,6 @@ fi [ "$IPADDR" ] || [ "$WAIT" ] || { echo "Syntax:" - echo "pipework [-i containerinterface] /[@default_gateway] [macaddr][@vlan]" echo "pipework [-i containerinterface] /[@default_gateway] [macaddr][@vlan] [route1,route2,...]" echo "pipework [-i containerinterface] dhcp [macaddr][@vlan]" echo "pipework --wait [-i containerinterface]" From bff7fb1fcb42d8afee768a15d506df1d085ee0ed Mon Sep 17 00:00:00 2001 From: pierrecdn Date: Wed, 4 Mar 2015 19:45:45 +0100 Subject: [PATCH 5/9] Improvements : IPv6 / Secondary addresses / Arguments parsing like PR#27 / Verbose mode to trace all iproute2 calls / etc. --- README.md | 80 +++++--- pipework | 559 +++++++++++++++++++++++++++++++++--------------------- 2 files changed, 395 insertions(+), 244 deletions(-) diff --git a/README.md b/README.md index eeae95d..6501db7 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,12 @@ Pipework uses cgroups and namespace and works with "plain" LXC containers * [DHCP](#dhcp) * [Specify a custom MAC address](#custom_mac) * [Virtual LAN (VLAN)](#vlan) +* [IPv6](#ipv6) +* [Secondary addresses](#secondary) * [Support Open vSwitch](#openvswitch) * [Support Infiniband](#infiniband) * [Cleanup](#cleanup) +* [Experimental](#experimental) @@ -76,7 +79,7 @@ Let's create two containers, running the web tier and the database tier: Now, bring superpowers to the web tier: - pipework br1 $APACHE 192.168.1.1/24 + pipework br1 $APACHE -a ip 192.168.1.1/24 This will: @@ -87,7 +90,7 @@ This will: Now (drum roll), let's do this: - pipework br1 $MYSQL 192.168.1.2/24 + pipework br1 $MYSQL -a ip 192.168.1.2/24 This will: @@ -107,7 +110,7 @@ you gave to Pipework cannot be found, Pipework will try to resolve it with `docker inspect`. This makes it even simpler to use: docker run -name web1 -d apache - pipework br1 web1 192.168.12.23/24 + pipework br1 web1 -a ip 192.168.12.23/24 @@ -123,7 +126,7 @@ VoilĂ ! ### Setting container internal interface ## By default pipework creates a new interface `eth1` inside the container. In case you want to change this interface name like `eth2`, e.g., to have more than one interface set by pipework, use: -`pipework br1 -i eth2 ...` +`pipework br1 web1 -i eth2 ...` **Note:**: for infiniband IPoIB interfaces, the default interface name is `ib0` and not `eth1`. @@ -135,7 +138,7 @@ tool; so you can append a subnet size using traditional CIDR notation. I.e.: - pipework br1 $CONTAINERID 192.168.4.25/20 + pipework br1 $CONTAINERID -a ip 192.168.4.25/20 Don't forget that all containers should use the same subnet size; pipework is not clever enough to use your specified subnet size for @@ -155,7 +158,7 @@ you want the container to use a specific outbound IP address. This can be automated by Pipework, by adding the gateway address after the IP address and subnet mask: - pipework br1 $CONTAINERID 192.168.4.25/20@192.168.4.1 + pipework br1 $CONTAINERID -a ip 192.168.4.25/20@192.168.4.1 @@ -164,7 +167,7 @@ after the IP address and subnet mask: If you add more than one internal interface, or perform specific use-cases, you may want to add other routes than the default one. This could be performed by adding network and masks after the gateway (comma-separated) - pipework br1 $CONTAINERID 192.168.4.25/20@192.168.4.1 192.168.5.0/25,192.168.6.0/24 + pipework br1 $CONTAINERID -a ip 192.168.4.25/20@192.168.4.1 -r 192.168.5.0/25,192.168.6.0/24 Please note that the last added internal interface will take the default route @@ -175,8 +178,8 @@ Please note that the last added internal interface will take the default route Let's pretend that you want to run two Hipache instances, listening on real interfaces eth2 and eth3, using specific (public) IP addresses. Easy! - pipework eth2 $(docker run -d hipache /usr/sbin/hipache) 50.19.169.157/24 - pipework eth3 $(docker run -d hipache /usr/sbin/hipache) 107.22.140.5/24 + pipework eth2 $(docker run -d hipache /usr/sbin/hipache) -a ip 50.19.169.157/24 + pipework eth3 $(docker run -d hipache /usr/sbin/hipache) -a ip 107.22.140.5/24 Note that this will use `macvlan` subinterfaces, so you can actually put multiple containers on the same physical interface. @@ -207,26 +210,14 @@ Then, you would start a container and assign it a macvlan interface the usual way: CID=$(docker run -d ...) - pipework eth0 $CID 10.1.1.234/24@10.1.1.254 + pipework eth0 $CID -a ip 10.1.1.234/24@10.1.1.254 ### Wait for the network to be ready -Sometimes, you want the extra network interface to be up and running *before* -starting your service. A dirty (and unreliable) solution would be to add -a `sleep` command before starting your service; but that could break in -"interesting" ways if the server happens to be a bit slower at one point. - -There is a better option: add the `pipework` script to your Docker image, -and before starting the service, call `pipework --wait`. It will wait -until the `eth1` interface is present and in `UP` operational state, -then exit gracefully. - -If you need to wait on an interface other than eth1, pass the -i flag like -this: - - pipework --wait -i ib0 +Since `docker create` allow to instantiate the container without starting it, +there is no more reason for pipework to provide tooling to wait for the network. ### Add the interface without an IP address @@ -236,7 +227,7 @@ container, you can use `0/0` as the IP address. The interface will be created, connected to the network, and assigned to the container, but without configuring an IP address: - pipework br1 $CONTAINERID 0/0 + pipework br1 $CONTAINERID -a link @@ -245,7 +236,7 @@ but without configuring an IP address: You can use DHCP to obtain the IP address of the new interface. Just specify `dhcp` instead of an IP address; for instance: - pipework eth1 $CONTAINERID dhcp + pipework eth1 $CONTAINERID -a dhcp The value of $CONTAINERID will be provided to the DHCP client to use as the hostname in the DHCP request. Depending on the configuration of @@ -279,7 +270,7 @@ If you need to specify the MAC address to be used (either by the `macvlan` subinterface, or the `veth` interface), no problem. Just add it as the command-line, as the last argument: - pipework eth0 $(docker run -d haproxy) 192.168.1.2/24 26:2e:71:98:60:8f + pipework eth0 $(docker run -d haproxy) -a ip 192.168.1.2/24 -m 26:2e:71:98:60:8f This can be useful if your network environment requires whitelisting your hardware addresses (some hosting providers do that), or if you want @@ -287,7 +278,7 @@ to obtain a specific address from your DHCP server. Also, some projects like [Orchestrator](https://github.com/cvlc/orchestrator) rely on static MAC-IPv6 bindings for DHCPv6: - pipework br0 $(docker run -d zerorpcworker) dhcp fa:de:b0:99:52:1c + pipework br0 $(docker run -d zerorpcworker) -a dhcp -m fa:de:b0:99:52:1c **Note:** if you generate your own MAC addresses, try remember those two simple rules: @@ -302,6 +293,8 @@ be `2`, `6`, `a`, or `e`. You can check [Wikipedia]( http://en.wikipedia.org/wiki/MAC_address) if you want even more details. **Note:** Setting the MAC address of an IPoIB interface is not supported. + + ### Virtual LAN (VLAN) @@ -315,7 +308,25 @@ bridges are currently not supported. The following will attach container zerorpcworker to the Open vSwitch bridge ovs0 and attach the container to VLAN ID 10. - pipework ovsbr0 $(docker run -d zerorpcworker) dhcp @10 + pipework ovsbr0 $(docker run -d zerorpcworker) -a dhcp -V @10 + + +### IPv6 + +IPv6 adressing is also supported, using the same options : + + pipework eth0 eth0 $(docker run -d haproxy) -a ip 2001:db8::beef/64@2001:db8::1 + +**Note:** Docker 1.5 feature + + +### Secondary addresses + +You can attach secondary addresses the container, using the action `sec_ip` instead of `ip` + + pipework eth0 eth0 $(docker run -d haproxy) -a sec_ip 192.168.1.2/24 + pipework eth0 eth0 $(docker run -d haproxy) -a sec_ip 2001:db8::beef/64 + pipework eth0 eth0 $(docker run -d haproxy) -a sec_ip 2001:db8::face/64 ### Support Open vSwitch @@ -324,7 +335,7 @@ If you want to attach a container to the Open vSwitch bridge, no problem. ovs-vsctl list-br ovsbr0 - pipework ovsbr0 $(docker run -d mysql /usr/sbin/mysqld_safe) 192.168.1.2/24 + pipework ovsbr0 $(docker run -d mysql /usr/sbin/mysqld_safe) -a ip 192.168.1.2/24 If the ovs bridge doesn't exist, it will be automatically created @@ -345,3 +356,12 @@ When a container is terminated (the last process of the net namespace exits), the network interfaces are garbage collected. The interface in the container is automatically destroyed, and the interface in the docker host (part of the bridge) is then destroyed as well. + + +### Experimental + +TBD + +- Tunnel interfaces (GRE/IPIP/IP6_TUNNEL) +- Clean OVS bridge + diff --git a/pipework b/pipework index d722ecf..9d7f627 100755 --- a/pipework +++ b/pipework @@ -1,100 +1,252 @@ #!/bin/sh set -e -case "$1" in - --wait) - WAIT=1 - ;; -esac +help() { + echo "Syntax: " + echo " pipework -a link" + echo " pipework -a ip /[@default_gateway]" + echo " pipework -a secip /[@default_gateway]" + echo " pipework -a ipip " + echo " pipework -a gre " + echo " pipework -a dhcp" + echo " pipework -c" + echo "The following options are available in any of the above commands:" + echo " -i|--interface (Container interface name)" + echo " -m|--mac (MAC address to set)" + echo " -M|--mtu (MTU to set on the container interface - be sure to have >= on host interface)" + echo " -V|--vlan (VLAN ID/tag to use - only supported by Open vSwitch bridge or direct physical interface)" + echo " -r|--route (Network routes to reach via container interface, comma-separated)" + echo " -v|--verbose (log activity)" + echo " -x|--trace (trace all bash commands)" + echo " -h|--help (print this help and exit)" + exit 1 +} -IFNAME=$1 +# Default values +CONTAINER_IFNAME="eth1" +VERBOSE=0 -# default value set further down if not set here -CONTAINER_IFNAME= -if [ "$2" = "-i" ]; then - CONTAINER_IFNAME=$3 - shift 2 -fi +# To debug iproute2 commands when verbose activated, maybe not a good idea to do it like this +run() { + [ $VERBOSE = 1 ] && { + echo $@ + } + eval $@ + return $? +} -GUESTNAME=$2 -IPADDR=$3 +# Check if the specifies kernel module exists and is loaded, exit if not +# Usage check_kernal_module MODULE_NAME +check_kernel_module() { + MODULE_NAME=$1 + modinfo $MODULE_NAME >/dev/null 2>&1 || { + echo "Warning: module $MODULE_NAME is required to perform this action" + exit 1 + } +} -if echo $4 | grep -q : -then - MACADDR=$4 - ROUTES=$5 -else - MACADDR= - ROUTES=$4 -fi +# Set IP config on a network namespace +# Usage set_netns_ip NSPID CONTAINER_IFNAME IPADDR GATEWAY ROUTES +set_netns_ip() { -if echo $MACADDR | grep -q @ -then - VLAN=$(echo $MACADDR | cut -d@ -f2) - MACADDR=$(echo $MACADDR | cut -d@ -f1) -else - VLAN= -fi + [ $# -lt 3 ] && { + echo "set_netns_ip : bad usage" + exit 1 + } -[ "$IPADDR" ] || [ "$WAIT" ] || { - echo "Syntax:" - echo "pipework [-i containerinterface] /[@default_gateway] [macaddr][@vlan] [route1,route2,...]" - echo "pipework [-i containerinterface] dhcp [macaddr][@vlan]" - echo "pipework --wait [-i containerinterface]" - exit 1 -} + # No POSIX-compliant mean to declare local variables, so be careful of the scope + # http://stackoverflow.com/questions/18597697/posix-compliant-way-to-scope-variables-to-a-function-in-a-shell-script + NSPID=$1 + CONTAINER_IFNAME=$2 + IPADDR=$3 + [ "$4" = "" ] || GATEWAY=$4 + [ "$5" = "" ] || ROUTES=$5 -# First step: determine type of first argument (bridge, physical interface...), skip if --wait set -if [ -z "$WAIT" ]; then - if [ -d /sys/class/net/$IFNAME ] + # Test IPv4/IPv6 + IPROUTE_PREFIX="ip -4" + echo $IPADDR | grep -q : && { + IPROUTE_PREFIX="ip -6" + } + + # Set IP/gateway/routes + run ip netns exec $NSPID $IPROUTE_PREFIX addr add $IPADDR dev $CONTAINER_IFNAME + [ "$GATEWAY" ] && { + run ip netns exec $NSPID $IPROUTE_PREFIX route delete default >/dev/null 2>&1 && true + } + run ip netns exec $NSPID $IPROUTE_PREFIX link set $CONTAINER_IFNAME up + [ "$GATEWAY" ] && { + run ip netns exec $NSPID $IPROUTE_PREFIX route get $GATEWAY >/dev/null 2>&1 || \ + run ip netns exec $NSPID $IPROUTE_PREFIX route add $GATEWAY/32 dev $CONTAINER_IFNAME + run ip netns exec $NSPID $IPROUTE_PREFIX route replace default via $GATEWAY + [ "$ROUTES" ] && { + ROUTES=$(echo $ROUTES | tr ',' ' ') + for ROUTE in $ROUTES; do + run ip netns exec $NSPID $IPROUTE_PREFIX route add $ROUTE via $GATEWAY dev $CONTAINER_IFNAME + done + } + } + + # Give our ARP neighbors a nudge about the new interface + if which arping > /dev/null 2>&1 then - if [ -d /sys/class/net/$IFNAME/bridge ] - then - IFTYPE=bridge - BRTYPE=linux - elif $(which ovs-vsctl >/dev/null 2>&1) && $(ovs-vsctl list-br|grep -q ^$IFNAME$) - then - IFTYPE=bridge - BRTYPE=openvswitch - elif [ $(cat /sys/class/net/$IFNAME/type) -eq 32 ]; # Infiniband IPoIB interface type 32 - then - IFTYPE=ipoib - # The IPoIB kernel module is fussy, set device name to ib0 if not overridden - CONTAINER_IFNAME=${CONTAINER_IFNAME:-ib0} - else IFTYPE=phys - fi + IPADDR=$(echo $IPADDR | cut -d/ -f1) + run ip netns exec $NSPID arping -c 1 -A -I $CONTAINER_IFNAME $IPADDR > /dev/null 2>&1 || true else - case "$IFNAME" in - br*) - IFTYPE=bridge - BRTYPE=linux + echo "Warning: arping not found; interface may not be immediately reachable" + fi +} + +## Parse args +if [ $# -lt 3 ] +then + help +fi + +IFNAME=$1 +shift +GUESTNAME=$1 +shift + +while [ $# -gt 0 ]; do + case "$1" in + --trace|-x) + # Active shell trace + set -x ;; - ovs*) - if ! $(which ovs-vsctl >/dev/null) + --verbose|-v) + # Active shell trace + VERBOSE=1 + ;; + --action|-a) + # Allows different levels of network plumbing (Link/Adressing/DHCP/...) + shift + ACTION=$1 + if [ "$ACTION" = "" ] then - echo "Need OVS installed on the system to create an ovs bridge" - exit 1 + help fi - IFTYPE=bridge - BRTYPE=openvswitch + case "$ACTION" in + # Collect args specific to actions + link) + ;; + ip|sec_ip) + shift + IPADDR=$1 + # Check if a subnet mask was provided. + echo $IPADDR | grep -q / || { + echo "The IP address should include a netmask." + echo "Maybe you meant $IPADDR/24 ?" + exit 1 + } + # Check if a gateway address was provided. + if echo $IPADDR | grep -q @ + then + GATEWAY=$(echo $IPADDR | cut -d@ -f2) + IPADDR=$(echo $IPADDR | cut -d@ -f1) + else + GATEWAY= + fi + ;; + # This part is a little future-proof, as it seems there's many patchs to come in the kernel + # See http://thread.gmane.org/gmane.linux.network/315933/focus=321753 and https://patchwork.ozlabs.org/patch/440660/ + # Doing the following setup "works" but we share the tunnel in all the namespaces (useless) and we can't delete anymore the gre/gretap/tun@NONE interfaces + # - https://ringzraw.wordpress.com/2014/09/14/linux-network-namespaces-and-gre/ + ipip) + shift + IPIP_ADDR=$1 + check_module ipip + ;; + gre) + GRE_REMOTE=$1 + check_module gre + ;; + dhcp) + DHCP=1 + ;; + esac + ;; + --interface|-i) + # Container interface name + shift + CONTAINER_IFNAME=$1 + ;; + --mac|-m) + # Specific MAC address + shift + MACADDR=$1 + ;; + --mtu|-M) + # Specific MTU (be sure the bridge is well configured !) + shift + MTU=$1 + ;; + --vlan|-V) + # Specific VLAN + shift + VLAN=$1 + ;; + --routes|-r) + # To add routing table entries, comma-separated + shift + ROUTES=$1 + # Check if a subnet mask was provided. + echo $ROUTES | grep -q / || { + echo "The IP address should include a netmask." + echo "Maybe you meant $IPADDR/24 ?" + exit 1 + } + ;; + --clean|-c) + # To clean all the network operations + CLEAN=1 ;; *) - echo "I do not know how to setup interface $IFNAME." - exit 1 + help ;; - esac - fi -fi + esac + shift +done -# Set the default container interface name to eth1 if not already set -CONTAINER_IFNAME=${CONTAINER_IFNAME:-eth1} +## First step: determine type of first argument (bridge, physical interface...) -[ "$WAIT" ] && { - while ! grep -q ^1$ /sys/class/net/$CONTAINER_IFNAME/carrier 2>/dev/null - do sleep 1 - done - exit 0 -} +if [ -d /sys/class/net/$IFNAME ] +then + if [ -d /sys/class/net/$IFNAME/bridge ] + then + IFTYPE=bridge + BRTYPE=linux + elif $(which ovs-vsctl >/dev/null 2>&1) && $(ovs-vsctl list-br|grep -q ^$IFNAME$) + then + IFTYPE=bridge + BRTYPE=openvswitch + elif [ $(cat /sys/class/net/$IFNAME/type) -eq 32 ]; # Infiniband IPoIB interface type 32 + then + IFTYPE=ipoib + # The IPoIB kernel module is fussy, set device name to ib0 if not overridden + CONTAINER_IFNAME=${CONTAINER_IFNAME:-ib0} + else IFTYPE=phys + fi +else + case "$IFNAME" in + br*) + IFTYPE=bridge + BRTYPE=linux + ;; + ovs*) + if ! $(which ovs-vsctl >/dev/null) + then + echo "Need OVS installed on the system to create an ovs bridge" + exit 1 + fi + IFTYPE=bridge + BRTYPE=openvswitch + ;; + *) + echo "I do not know how to setup interface $IFNAME." + exit 1 + ;; + esac +fi [ $IFTYPE = bridge ] && [ $BRTYPE = linux ] && [ "$VLAN" ] && { echo "VLAN configuration currently unsupported for Linux bridge." @@ -102,11 +254,12 @@ CONTAINER_IFNAME=${CONTAINER_IFNAME:-eth1} } [ $IFTYPE = ipoib ] && [ $MACADDR ] && { - echo "MACADDR configuration unsupported for IPoIB interfaces." - exit 1 + echo "MACADDR configuration unsupported for IPoIB interfaces." + exit 1 } -# Second step: find the guest (for now, we only support LXC containers) +## Second step: find the guest (for now, we only support LXC containers) + while read dev mnt fstype options dump fsck do [ "$fstype" != "cgroup" ] && continue @@ -123,41 +276,53 @@ done < /proc/mounts N=$(find "$CGROUPMNT" -name "$GUESTNAME" | wc -l) case "$N" in 0) - # If we didn't find anything, try to lookup the container with Docker. - if which docker >/dev/null - then + # If we didn't find anything, try to lookup the container with Docker. + if which docker >/dev/null + then RETRIES=3 - while [ $RETRIES -gt 0 ]; do - DOCKERPID=$(docker inspect --format='{{ .State.Pid }}' $GUESTNAME) + while [ $RETRIES -gt 0 ]; do # TODO : Why do we retry ? + DOCKERPID=$(docker inspect --format='{{ .State.Pid }}' $GUESTNAME) [ $DOCKERPID != 0 ] && break sleep 1 RETRIES=$((RETRIES - 1)) done - [ "$DOCKERPID" = 0 ] && { - echo "Docker inspect returned invalid PID 0" - exit 1 - } - - [ "$DOCKERPID" = "" ] && { - echo "Container $GUESTNAME not found, and unknown to Docker." - exit 1 - } - else - echo "Container $GUESTNAME not found, and Docker not installed." - exit 1 - fi - ;; + [ "$DOCKERPID" = "0" ] && { + echo "Docker inspect returned invalid PID 0" + exit 1 + } + + [ "$DOCKERPID" = "" ] && { + echo "Container $GUESTNAME not found, and unknown to Docker." + exit 1 + } + else + echo "Container $GUESTNAME not found, and Docker not installed." + exit 1 + fi + ;; 1) - true - ;; + true + ;; *) - echo "Found more than one container matching $GUESTNAME." - exit 1 - ;; + echo "Found more than one container matching $GUESTNAME." + exit 1 + ;; esac -if [ "$IPADDR" = "dhcp" ] +if [ $DOCKERPID ]; then + NSPID=$DOCKERPID +else + NSPID=$(head -n 1 $(find "$CGROUPMNT" -name "$GUESTNAME" | head -n 1)/tasks) + [ "$NSPID" ] || { + echo "Could not find a process inside container $GUESTNAME." + exit 1 + } +fi + +## Third step : configure the network ! + +if [ "$ACTION" = "dhcp" ] then # Check for first available dhcp client DHCP_CLIENT_LIST="udhcpc dhcpcd dhclient" @@ -168,34 +333,9 @@ then } done [ -z $DHCP_CLIENT ] && { - echo "You asked for DHCP; but no DHCP client could be found." - exit 1 - } -else - # Check if a subnet mask was provided. - echo $IPADDR | grep -q / || { - echo "The IP address should include a netmask." - echo "Maybe you meant $IPADDR/24 ?" - exit 1 + echo "You asked for DHCP; but no DHCP client could be found." + exit 1 } - # Check if a gateway address was provided. - if echo $IPADDR | grep -q @ - then - GATEWAY=$(echo $IPADDR | cut -d@ -f2) - IPADDR=$(echo $IPADDR | cut -d@ -f1) - else - GATEWAY= - fi -fi - -if [ $DOCKERPID ]; then - NSPID=$DOCKERPID -else - NSPID=$(head -n 1 $(find "$CGROUPMNT" -name "$GUESTNAME" | head -n 1)/tasks) - [ "$NSPID" ] || { - echo "Could not find a process inside container $GUESTNAME." - exit 1 - } fi # Check if an incompatible VLAN device already exists @@ -206,104 +346,95 @@ fi } } +# Initialize network namespace [ ! -d /var/run/netns ] && mkdir -p /var/run/netns [ -f /var/run/netns/$NSPID ] && rm -f /var/run/netns/$NSPID ln -s /proc/$NSPID/ns/net /var/run/netns/$NSPID -# Check if we need to create a bridge. -[ $IFTYPE = bridge ] && [ ! -d /sys/class/net/$IFNAME ] && { - [ $BRTYPE = linux ] && { - (ip link add dev $IFNAME type bridge > /dev/null 2>&1) || (brctl addbr $IFNAME) - ip link set $IFNAME up - } - [ $BRTYPE = openvswitch ] && { - ovs-vsctl add-br $IFNAME +# Only Link and IP adressing actions implies to initialize netns, bridges, veth pairs +[ $ACTION = ip ] || [ $ACTION = link ] && { + [ "$CLEAN" = 1 ] && [ $IFTYPE = bridge ] && [ BRTYPE = openvswitch ] && { + # Delete OVS port as it doesn't do it by itself even if the veth pair isn't here anymore + # TODO : WTF we doesn't have $NSPID anymore, cause the container is stopped, so this part of the code will never be reached + run ovs-vsctl del-port $IFNAME $LOCAL_IFNAME } -} - -MTU=$(ip link show $IFNAME | awk '{print $5}') -# If it's a bridge, we need to create a veth pair -[ $IFTYPE = bridge ] && { - LOCAL_IFNAME="v${CONTAINER_IFNAME}pl${NSPID}" - GUEST_IFNAME="v${CONTAINER_IFNAME}pg${NSPID}" - ip link add name $LOCAL_IFNAME mtu $MTU type veth peer name $GUEST_IFNAME mtu $MTU - case "$BRTYPE" in - linux) - (ip link set $LOCAL_IFNAME master $IFNAME > /dev/null 2>&1) || (brctl addif $IFNAME $LOCAL_IFNAME) - ;; - openvswitch) - ovs-vsctl add-port $IFNAME $LOCAL_IFNAME ${VLAN:+"tag=$VLAN"} - ;; - esac - ip link set $LOCAL_IFNAME up -} -# Note: if no container interface name was specified, pipework will default to ib0 -# Note: no macvlan subinterface or ethernet bridge can be created against an -# ipoib interface. Infiniband is not ethernet. ipoib is an IP layer for it. -# To provide additional ipoib interfaces to containers use SR-IOV and pipework -# to assign them. -[ $IFTYPE = ipoib ] && { - GUEST_IFNAME=$CONTAINER_IFNAME -} - -# If it's a physical interface, create a macvlan subinterface -[ $IFTYPE = phys ] && { - [ "$VLAN" ] && { - [ ! -d /sys/class/net/$IFNAME.$VLAN ] && { - ip link add link $IFNAME name $IFNAME.$VLAN mtu $MTU type vlan id $VLAN + # Check if we need to create a bridge. + [ $IFTYPE = bridge ] && [ ! -d /sys/class/net/$IFNAME ] && { + [ $BRTYPE = linux ] && { + (run ip link add dev $IFNAME type bridge > /dev/null 2>&1) || (run brctl addbr $IFNAME) + run ip link set $IFNAME up } + [ $BRTYPE = openvswitch ] && { + run ovs-vsctl add-br $IFNAME + } + } + + # If not defined, MTU will be the same of the once set on the host interface + [ "$MTU" = "" ] && { + MTU=$(ip link show $IFNAME | awk '{print $5}') + } + # If it's a bridge, we need to create a veth pair + [ $IFTYPE = bridge ] && { + LOCAL_IFNAME="v${CONTAINER_IFNAME}pl${NSPID}" + GUEST_IFNAME="v${CONTAINER_IFNAME}pg${NSPID}" + (run ip link add name $LOCAL_IFNAME mtu $MTU type veth peer name $GUEST_IFNAME mtu $MTU) + case "$BRTYPE" in + linux) + (run ip link set $LOCAL_IFNAME master $IFNAME > /dev/null 2>&1) || (run brctl addif $IFNAME $LOCAL_IFNAME) + ;; + openvswitch) + run ovs-vsctl add-port $IFNAME $LOCAL_IFNAME ${VLAN:+"tag=$VLAN"} + ;; + esac + run ip link set $LOCAL_IFNAME up || true + } + # Note: if no container interface name was specified, pipework will default to ib0 + # Note: no macvlan subinterface or ethernet bridge can be created against an + # ipoib interface. Infiniband is not ethernet. ipoib is an IP layer for it. + # To provide additional ipoib interfaces to containers use SR-IOV and pipework + # to assign them. + [ $IFTYPE = ipoib ] && { + GUEST_IFNAME=$CONTAINER_IFNAME + } - ip link set $IFNAME up - IFNAME=$IFNAME.$VLAN + # If it's a physical interface, create a macvlan subinterface + [ $IFTYPE = phys ] && { + [ "$VLAN" ] && { + [ ! -d /sys/class/net/$IFNAME.$VLAN ] && { + run ip link add link $IFNAME name $IFNAME.$VLAN mtu $MTU type vlan id $VLAN + } + run ip link set $IFNAME up + IFNAME=$IFNAME.$VLAN + } + GUEST_IFNAME=ph$NSPID$CONTAINER_IFNAME + run ip link add link $IFNAME dev $GUEST_IFNAME mtu $MTU type macvlan mode bridge + run ip link set $IFNAME up } - GUEST_IFNAME=ph$NSPID$CONTAINER_IFNAME - ip link add link $IFNAME dev $GUEST_IFNAME mtu $MTU type macvlan mode bridge - ip link set $IFNAME up + + run ip link set $GUEST_IFNAME netns $NSPID + run ip netns exec $NSPID ip link set $GUEST_IFNAME name $CONTAINER_IFNAME + [ "$MACADDR" ] && run ip netns exec $NSPID ip link set dev $CONTAINER_IFNAME address $MACADDR + + # If link only, we can exit here + [ $ACTION = link ] && exit 0 } -ip link set $GUEST_IFNAME netns $NSPID -ip netns exec $NSPID ip link set $GUEST_IFNAME name $CONTAINER_IFNAME -[ "$MACADDR" ] && ip netns exec $NSPID ip link set dev $CONTAINER_IFNAME address $MACADDR -if [ "$IPADDR" = "dhcp" ] -then - [ $DHCP_CLIENT = "udhcpc" ] && ip netns exec $NSPID $DHCP_CLIENT -qi $CONTAINER_IFNAME -x hostname:$GUESTNAME +if [ "$ACTION" = "dhcp" ] +then + [ $DHCP_CLIENT = "udhcpc" ] && run ip netns exec $NSPID $DHCP_CLIENT -qi $CONTAINER_IFNAME -x hostname:$GUESTNAME if [ $DHCP_CLIENT = "dhclient" ] then # kill dhclient after get ip address to prevent device be used after container close - ip netns exec $NSPID $DHCP_CLIENT -pf "/var/run/dhclient.$NSPID.pid" $CONTAINER_IFNAME + run ip netns exec $NSPID $DHCP_CLIENT -pf "/var/run/dhclient.$NSPID.pid" $CONTAINER_IFNAME kill "$(cat "/var/run/dhclient.$NSPID.pid")" rm "/var/run/dhclient.$NSPID.pid" - fi - [ $DHCP_CLIENT = "dhcpcd" ] && ip netns exec $NSPID $DHCP_CLIENT -q $CONTAINER_IFNAME -h $GUESTNAME -else - ip netns exec $NSPID ip addr add $IPADDR dev $CONTAINER_IFNAME - [ "$GATEWAY" ] && { - ip netns exec $NSPID ip route delete default >/dev/null 2>&1 && true - } - ip netns exec $NSPID ip link set $CONTAINER_IFNAME up - [ "$GATEWAY" ] && { - ip netns exec $NSPID ip route get $GATEWAY >/dev/null 2>&1 || \ - ip netns exec $NSPID ip route add $GATEWAY/32 dev $CONTAINER_IFNAME - ip netns exec $NSPID ip route replace default via $GATEWAY - [ "$ROUTES" ] && { - ROUTES=`echo $ROUTES | tr ',' ' '`; - for ROUTE in $ROUTES; do - ip netns exec $NSPID ip route add $ROUTE via $GATEWAY dev $CONTAINER_IFNAME - done - } - } -fi - -# Give our ARP neighbors a nudge about the new interface -if which arping > /dev/null 2>&1 -then - IPADDR=$(echo $IPADDR | cut -d/ -f1) - ip netns exec $NSPID arping -c 1 -A -I $CONTAINER_IFNAME $IPADDR > /dev/null 2>&1 || true -else - echo "Warning: arping not found; interface may not be immediately reachable" -fi - + fi + [ $DHCP_CLIENT = "dhcpcd" ] && run ip netns exec $NSPID $DHCP_CLIENT -q $CONTAINER_IFNAME -h $GUESTNAME +else + set_netns_ip $NSPID $CONTAINER_IFNAME $IPADDR $GATEWAY $ROUTES +fi + # Remove NSPID to avoid `ip netns` catch it. [ -f /var/run/netns/$NSPID ] && rm -f /var/run/netns/$NSPID exit 0 From ade8efb60985705dcc6ec9e546c6abb5d45d3303 Mon Sep 17 00:00:00 2001 From: pierrecdn Date: Wed, 4 Mar 2015 23:43:33 +0100 Subject: [PATCH 6/9] Fix check_kernel_modules + add module in GRE/IPIP contexts --- pipework | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pipework b/pipework index 9d7f627..f4dc9f0 100755 --- a/pipework +++ b/pipework @@ -10,7 +10,8 @@ help() { echo " pipework -a gre " echo " pipework -a dhcp" echo " pipework -c" - echo "The following options are available in any of the above commands:" + echo + echo "The following options are available in any of the above commands: " echo " -i|--interface (Container interface name)" echo " -m|--mac (MAC address to set)" echo " -M|--mtu (MTU to set on the container interface - be sure to have >= on host interface)" @@ -154,11 +155,13 @@ while [ $# -gt 0 ]; do ipip) shift IPIP_ADDR=$1 - check_module ipip + check_kernel_module ipip + check_kernel_module ip6_tunnel ;; gre) GRE_REMOTE=$1 - check_module gre + check_kernel_module gre + check_kernel_module ip6_gre ;; dhcp) DHCP=1 From 779044f547fd5779b87b501cd9e0fec511b18e3b Mon Sep 17 00:00:00 2001 From: pierrecdn Date: Thu, 5 Mar 2015 01:07:09 +0100 Subject: [PATCH 7/9] Add traffic control + basic implementation of ipip/gre tunneling that do not work as per the netns limitations (tested on 3.16 kernel) --- README.md | 31 +++++++++-- pipework | 156 +++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 134 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 6501db7..49caa12 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Pipework uses cgroups and namespace and works with "plain" LXC containers * [Virtual LAN (VLAN)](#vlan) * [IPv6](#ipv6) * [Secondary addresses](#secondary) +* [Traffic Control (QoS)](#traffic_control) * [Support Open vSwitch](#openvswitch) * [Support Infiniband](#infiniband) * [Cleanup](#cleanup) @@ -164,7 +165,8 @@ after the IP address and subnet mask: ### Setting routes on the internal interface -If you add more than one internal interface, or perform specific use-cases, you may want to add other routes than the default one. +If you add more than one internal interface, or perform specific use-cases, like multicast routing +you may want to add other routes than the default one. This could be performed by adding network and masks after the gateway (comma-separated) pipework br1 $CONTAINERID -a ip 192.168.4.25/20@192.168.4.1 -r 192.168.5.0/25,192.168.6.0/24 @@ -324,9 +326,24 @@ IPv6 adressing is also supported, using the same options : You can attach secondary addresses the container, using the action `sec_ip` instead of `ip` - pipework eth0 eth0 $(docker run -d haproxy) -a sec_ip 192.168.1.2/24 - pipework eth0 eth0 $(docker run -d haproxy) -a sec_ip 2001:db8::beef/64 - pipework eth0 eth0 $(docker run -d haproxy) -a sec_ip 2001:db8::face/64 + pipework eth0 $(docker run -d haproxy) -a sec_ip 192.168.1.2/24 + pipework eth0 $(docker run -d haproxy) -a sec_ip 2001:db8::beef/64 + pipework eth0 $(docker run -d haproxy) -a sec_ip 2001:db8::face/64 + + +### Traffic Control (QoS) + +You can play with traffic control on an internal container interface, to emulate network +properties like bandwidth, packet drops, latency, protocol policing and marking, etc. + +Here, we provide a simple wrapper around tc, so you can keep the control on all parameters + + pipework eth0 $MYSQL -a tc qdisc add dev eth1 root netem loss 30% + pipework eth0 $MYSQL -a tc qdisc add dev eth1 root netem delay 100ms + +See `man tc` for more details + +**Note:** as it is a wrapper, be sure that all you pipework arguments are befoire `-a tc ...` ### Support Open vSwitch @@ -360,8 +377,12 @@ bridge) is then destroyed as well. ### Experimental -TBD +TBD : test/kernel watch/... - Tunnel interfaces (GRE/IPIP/IP6_TUNNEL) + + pipework eth0 $(docker run -d haproxy) -a ipip 192.168.1.3/32 + pipework eth0 $(docker run -d haproxy) -a ipip 2001:db8::2/32 + - Clean OVS bridge diff --git a/pipework b/pipework index f4dc9f0..768eda6 100755 --- a/pipework +++ b/pipework @@ -41,7 +41,7 @@ run() { check_kernel_module() { MODULE_NAME=$1 modinfo $MODULE_NAME >/dev/null 2>&1 || { - echo "Warning: module $MODULE_NAME is required to perform this action" + echo "Error: module $MODULE_NAME is required to perform this action" exit 1 } } @@ -97,6 +97,54 @@ set_netns_ip() { fi } +# Setup an IP tunnel +# Usage set_tunnel ipip|gre NSPID CONTAINER_IFNAME IPADDR ROUTES +set_tunnel() { + + ACTION=$1 + NSPID=$2 + CONTAINER_IFNAME=$3 + IPADDR=$4 + ROUTES=$5 + + # Test IPv4/IPv6 + if [ $(echo $IPADDR | grep -q :) ]; + then + if [ $ACTION = ipip ] + then + TUNL_NAME=ip6tnl0 + TUNL_MODE=ip6ip6 + elif [ $ACTION = gre ] + then + TUNL_NAME=ip6gre0 + TUNL_MODE=sit + fi + else + if [ $ACTION = ipip ] + then + TUNL_NAME=tunl0 + TUNL_MODE=ipip + elif [ $ACTION = gre ] + then + TUNL_NAME=gre0 + TUNL_MODE=gre + fi + fi + run ip netns exec $NSPID ip tunnel add $TUNL_NAME mode $TUNL_MODE dev $CONTAINER_IFNAME + ADDRS=$(echo $IPADDR | tr ',' ' ') + for ADDR in $ADDRS; do + run ip netns exec $NSPID ip addr add $IPADDR dev $TUNL_NAME + done + + [ "$ROUTES" ] && { + ROUTES=$(echo $ROUTES | tr ',' ' ') + for ROUTE in $ROUTES; do + run ip netns exec $NSPID $IPROUTE_PREFIX route add $ROUTE via $GATEWAY dev $CONTAINER_IFNAME + done + } +} + + ## Parse args if [ $# -lt 3 ] then @@ -129,43 +177,47 @@ while [ $# -gt 0 ]; do case "$ACTION" in # Collect args specific to actions link) - ;; + ;; ip|sec_ip) - shift - IPADDR=$1 - # Check if a subnet mask was provided. - echo $IPADDR | grep -q / || { - echo "The IP address should include a netmask." - echo "Maybe you meant $IPADDR/24 ?" - exit 1 - } - # Check if a gateway address was provided. - if echo $IPADDR | grep -q @ - then - GATEWAY=$(echo $IPADDR | cut -d@ -f2) - IPADDR=$(echo $IPADDR | cut -d@ -f1) - else - GATEWAY= - fi - ;; + shift + IPADDR=$1 + # Check if a subnet mask was provided. + echo $IPADDR | grep -q / || { + echo "The IP address should include a netmask." + echo "Maybe you meant $IPADDR/24 ?" + exit 1 + } + # Check if a gateway address was provided. + if echo $IPADDR | grep -q @ + then + GATEWAY=$(echo $IPADDR | cut -d@ -f2) + IPADDR=$(echo $IPADDR | cut -d@ -f1) + else + GATEWAY= + fi + ;; + dhcp) + DHCP=1 + ;; # This part is a little future-proof, as it seems there's many patchs to come in the kernel # See http://thread.gmane.org/gmane.linux.network/315933/focus=321753 and https://patchwork.ozlabs.org/patch/440660/ # Doing the following setup "works" but we share the tunnel in all the namespaces (useless) and we can't delete anymore the gre/gretap/tun@NONE interfaces # - https://ringzraw.wordpress.com/2014/09/14/linux-network-namespaces-and-gre/ ipip) - shift - IPIP_ADDR=$1 - check_kernel_module ipip - check_kernel_module ip6_tunnel - ;; + shift + check_kernel_module ipip + check_kernel_module ip6_tunnel + IPADDR=$1 + ;; gre) - GRE_REMOTE=$1 - check_kernel_module gre - check_kernel_module ip6_gre - ;; - dhcp) - DHCP=1 - ;; + check_kernel_module gre + check_kernel_module ip6_gre + IPADDR=$2 + ;; + tc) + shift + QOS=$@ + ;; esac ;; --interface|-i) @@ -418,25 +470,33 @@ ln -s /proc/$NSPID/ns/net /var/run/netns/$NSPID run ip link set $GUEST_IFNAME netns $NSPID run ip netns exec $NSPID ip link set $GUEST_IFNAME name $CONTAINER_IFNAME [ "$MACADDR" ] && run ip netns exec $NSPID ip link set dev $CONTAINER_IFNAME address $MACADDR - - # If link only, we can exit here - [ $ACTION = link ] && exit 0 } -if [ "$ACTION" = "dhcp" ] -then - [ $DHCP_CLIENT = "udhcpc" ] && run ip netns exec $NSPID $DHCP_CLIENT -qi $CONTAINER_IFNAME -x hostname:$GUESTNAME - if [ $DHCP_CLIENT = "dhclient" ] - then - # kill dhclient after get ip address to prevent device be used after container close - run ip netns exec $NSPID $DHCP_CLIENT -pf "/var/run/dhclient.$NSPID.pid" $CONTAINER_IFNAME - kill "$(cat "/var/run/dhclient.$NSPID.pid")" - rm "/var/run/dhclient.$NSPID.pid" - fi - [ $DHCP_CLIENT = "dhcpcd" ] && run ip netns exec $NSPID $DHCP_CLIENT -q $CONTAINER_IFNAME -h $GUESTNAME -else - set_netns_ip $NSPID $CONTAINER_IFNAME $IPADDR $GATEWAY $ROUTES -fi +case "$ACTION" in + link) + exit 0 + ;; + dhcp) + [ $DHCP_CLIENT = "udhcpc" ] && run ip netns exec $NSPID $DHCP_CLIENT -qi $CONTAINER_IFNAME -x hostname:$GUESTNAME + if [ $DHCP_CLIENT = "dhclient" ] + then + # kill dhclient after get ip address to prevent device be used after container close + run ip netns exec $NSPID $DHCP_CLIENT -pf "/var/run/dhclient.$NSPID.pid" $CONTAINER_IFNAME + kill "$(cat "/var/run/dhclient.$NSPID.pid")" + rm "/var/run/dhclient.$NSPID.pid" + fi + [ $DHCP_CLIENT = "dhcpcd" ] && run ip netns exec $NSPID $DHCP_CLIENT -q $CONTAINER_IFNAME -h $GUESTNAME + ;; + ip|sec_ip) + set_netns_ip $NSPID $CONTAINER_IFNAME $IPADDR $GATEWAY $ROUTES + ;; + ipip|gre) + set_tunnel $ACTION $NSPID $CONTAINER_IFNAME $IPADDR $ROUTES + ;; + tc) + run ip netns exec $NSPID tc $QOS + ;; +esac # Remove NSPID to avoid `ip netns` catch it. [ -f /var/run/netns/$NSPID ] && rm -f /var/run/netns/$NSPID From c42c6adda5a64c714a77ef698d5c539ffa7c9cc2 Mon Sep 17 00:00:00 2001 From: pierrecdn Date: Thu, 5 Mar 2015 11:25:05 +0100 Subject: [PATCH 8/9] Fix tc arguments parsing --- pipework | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pipework b/pipework index 768eda6..6f09582 100755 --- a/pipework +++ b/pipework @@ -9,17 +9,19 @@ help() { echo " pipework -a ipip " echo " pipework -a gre " echo " pipework -a dhcp" + echo " pipework -a tc (...)" echo " pipework -c" echo echo "The following options are available in any of the above commands: " - echo " -i|--interface (Container interface name)" - echo " -m|--mac (MAC address to set)" - echo " -M|--mtu (MTU to set on the container interface - be sure to have >= on host interface)" - echo " -V|--vlan (VLAN ID/tag to use - only supported by Open vSwitch bridge or direct physical interface)" - echo " -r|--route (Network routes to reach via container interface, comma-separated)" - echo " -v|--verbose (log activity)" - echo " -x|--trace (trace all bash commands)" - echo " -h|--help (print this help and exit)" + echo " -a|--action (Requested action spec)" + echo " -i|--interface (Container interface name)" + echo " -m|--mac (MAC address to set)" + echo " -M|--mtu (MTU to set on the container interface - be sure to have >= on host interface)" + echo " -V|--vlan (VLAN ID/tag to use - only supported by Open vSwitch bridge or direct physical interface)" + echo " -r|--route (Network routes to reach via container interface, comma-separated)" + echo " -v|--verbose (log activity)" + echo " -x|--trace (trace all bash commands)" + echo " -h|--help (print this help and exit)" exit 1 } @@ -217,6 +219,7 @@ while [ $# -gt 0 ]; do tc) shift QOS=$@ + shift $(($#-1)) ;; esac ;; From 298ff0a312a9d2d682eb9554905e9950c0954631 Mon Sep 17 00:00:00 2001 From: pierrecdn Date: Thu, 5 Mar 2015 19:19:43 +0100 Subject: [PATCH 9/9] Make some tunneling UCs work + patch for IPv6 secondary addresses (default address selection) --- README.md | 30 +++++++++++++++++++---- pipework | 72 +++++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 77 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 49caa12..0d2881a 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Pipework uses cgroups and namespace and works with "plain" LXC containers * [Support Open vSwitch](#openvswitch) * [Support Infiniband](#infiniband) * [Cleanup](#cleanup) +* [Debugging](#debug) * [Experimental](#experimental) @@ -125,7 +126,8 @@ VoilĂ ! ### Setting container internal interface ## -By default pipework creates a new interface `eth1` inside the container. In case you want to change this interface name like `eth2`, e.g., to have more than one interface set by pipework, use: +By default pipework creates a new interface `eth1` inside the container. In case you want to +change this interface name like `eth2`, e.g., to have more than one interface set by pipework, use: `pipework br1 web1 -i eth2 ...` @@ -315,7 +317,7 @@ ovs0 and attach the container to VLAN ID 10. ### IPv6 -IPv6 adressing is also supported, using the same options : +IPv6 global scope adressing is also supported, using the same options : pipework eth0 eth0 $(docker run -d haproxy) -a ip 2001:db8::beef/64@2001:db8::1 @@ -374,6 +376,15 @@ the network interfaces are garbage collected. The interface in the container is automatically destroyed, and the interface in the docker host (part of the bridge) is then destroyed as well. + +### Debugging + +2 switchs makes you able to debug some tedious situations : + +-v logs every iproute2 calls + +-x enable shell debugging (similar to sh -x pipework ...) + ### Experimental @@ -381,8 +392,17 @@ TBD : test/kernel watch/... - Tunnel interfaces (GRE/IPIP/IP6_TUNNEL) - pipework eth0 $(docker run -d haproxy) -a ipip 192.168.1.3/32 - pipework eth0 $(docker run -d haproxy) -a ipip 2001:db8::2/32 + pipework eth0 $(docker run -d haproxy) -i eth1 -a ipip 192.168.1.3 + pipework eth0 $(docker run -d haproxy) -a ipip 2001:db8::2 + +If the container has more than one internal interface, specify the internal interface (-i) to attach +the tunnel to the good device + +No more driver/mode to remember (ipip, ip6_tunnel, ipip6, ip6ip6, gre, ip6_gre,...), pipeworks adapts itself +to the right situation regarding your adressing scheme (doing ipv4-in-ipv4 or ipv6-in-ipv6 encapsulation) + +Be careful about the MTU in these situations... (tunneling in the container over tunneling on the host may lead +to problems). -- Clean OVS bridge +- Clean OVS bridge unused ports diff --git a/pipework b/pipework index 6f09582..664214d 100755 --- a/pipework +++ b/pipework @@ -71,8 +71,17 @@ set_netns_ip() { IPROUTE_PREFIX="ip -6" } + if [ "$IPROUTE_PREFIX" = "ip -6" ] + then + # Patch for secondary IPv6 : add preffered_lft 0 cause you may not want the last added IP to be the source of the whole v6 traffic + # https://lists.debian.org/debian-isp/2011/05/msg00047.html + ip netns exec $NSPID $IPROUTE_PREFIX addr sh $CONTAINER_IFNAME | grep -q inet6 > /dev/null 2>&1 && { + IPROUTE_OPTIONS="preferred_lft 0" + } + fi + # Set IP/gateway/routes - run ip netns exec $NSPID $IPROUTE_PREFIX addr add $IPADDR dev $CONTAINER_IFNAME + run ip netns exec $NSPID $IPROUTE_PREFIX addr add $IPADDR dev $CONTAINER_IFNAME $IPROUTE_OPTIONS [ "$GATEWAY" ] && { run ip netns exec $NSPID $IPROUTE_PREFIX route delete default >/dev/null 2>&1 && true } @@ -110,32 +119,45 @@ set_tunnel() { ROUTES=$5 # Test IPv4/IPv6 - if [ $(echo $IPADDR | grep -q :) ]; + if echo $IPADDR | grep -q : then + IPROUTE_PREFIX="ip -6" if [ $ACTION = ipip ] then - TUNL_NAME=ip6tnl0 + TUNL_NAME=ip6tnl1 # TODO kernel 3.16 : output "No such device" and doesn't work with ip6tnl0 TUNL_MODE=ip6ip6 elif [ $ACTION = gre ] then - TUNL_NAME=ip6gre0 - TUNL_MODE=sit + TUNL_NAME=ip6gre1 + TUNL_MODE=ip6gre fi - else + else + IPROUTE_PREFIX="ip -4" if [ $ACTION = ipip ] then - TUNL_NAME=tunl0 + TUNL_NAME=tunl1 TUNL_MODE=ipip elif [ $ACTION = gre ] then - TUNL_NAME=gre0 + TUNL_NAME=gre1 TUNL_MODE=gre fi fi - run ip netns exec $NSPID ip tunnel add $TUNL_NAME mode $TUNL_MODE dev $CONTAINER_IFNAME + # If the tunnel already exists, just address it (allows secondary address on tunneling interfaces) + run ip netns exec $NSPID $IPROUTE_PREFIX tunnel add $TUNL_NAME mode $TUNL_MODE dev $CONTAINER_IFNAME 3>&1 1>&2 2>&3 | grep -v "File exists" && true + + # Patch for secondary IPv6 : add preffered_lft 0 cause you may not want the last added IP to be the source of the whole v6 traffic + # https://lists.debian.org/debian-isp/2011/05/msg00047.html + if [ "$IPROUTE_PREFIX" = "ip -6" ] + then + ip netns exec $NSPID $IPROUTE_PREFIX tunnel sh $TUNL_NAME > /dev/null 2>&1 && { + IPROUTE_OPTIONS="preferred_lft 0" + } + fi + ADDRS=$(echo $IPADDR | tr ',' ' ') - for ADDR in $ADDRS; do - run ip netns exec $NSPID ip addr add $IPADDR dev $TUNL_NAME + for ADDR in $ADDRS; do + run ip netns exec $NSPID $IPROUTE_PREFIX addr add $IPADDR dev $TUNL_NAME $IPROUTE_OPTIONS done [ "$ROUTES" ] && { @@ -144,6 +166,7 @@ set_tunnel() { run ip netns exec $NSPID $IPROUTE_PREFIX route add $ROUTE via $GATEWAY dev $CONTAINER_IFNAME done } + run ip netns exec $NSPID $IPROUTE_PREFIX link set $TUNL_NAME up } @@ -209,12 +232,21 @@ while [ $# -gt 0 ]; do shift check_kernel_module ipip check_kernel_module ip6_tunnel + [ $# -eq 0 ] && { + help + exit 1 + } IPADDR=$1 ;; gre) + shift check_kernel_module gre check_kernel_module ip6_gre - IPADDR=$2 + [ $# -eq 0 ] && { + help + exit 1 + } + IPADDR=$1 ;; tc) shift @@ -464,19 +496,19 @@ ln -s /proc/$NSPID/ns/net /var/run/netns/$NSPID } run ip link set $IFNAME up IFNAME=$IFNAME.$VLAN - } + } GUEST_IFNAME=ph$NSPID$CONTAINER_IFNAME run ip link add link $IFNAME dev $GUEST_IFNAME mtu $MTU type macvlan mode bridge run ip link set $IFNAME up } - + run ip link set $GUEST_IFNAME netns $NSPID run ip netns exec $NSPID ip link set $GUEST_IFNAME name $CONTAINER_IFNAME [ "$MACADDR" ] && run ip netns exec $NSPID ip link set dev $CONTAINER_IFNAME address $MACADDR } case "$ACTION" in - link) + link) exit 0 ;; dhcp) @@ -487,20 +519,20 @@ case "$ACTION" in run ip netns exec $NSPID $DHCP_CLIENT -pf "/var/run/dhclient.$NSPID.pid" $CONTAINER_IFNAME kill "$(cat "/var/run/dhclient.$NSPID.pid")" rm "/var/run/dhclient.$NSPID.pid" - fi + fi [ $DHCP_CLIENT = "dhcpcd" ] && run ip netns exec $NSPID $DHCP_CLIENT -q $CONTAINER_IFNAME -h $GUESTNAME ;; - ip|sec_ip) + ip|sec_ip) set_netns_ip $NSPID $CONTAINER_IFNAME $IPADDR $GATEWAY $ROUTES ;; ipip|gre) set_tunnel $ACTION $NSPID $CONTAINER_IFNAME $IPADDR $ROUTES ;; - tc) + tc) run ip netns exec $NSPID tc $QOS ;; -esac - +esac + # Remove NSPID to avoid `ip netns` catch it. [ -f /var/run/netns/$NSPID ] && rm -f /var/run/netns/$NSPID exit 0