Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 17.06] Overlay fix for transient IP reuse #1965

Merged
merged 3 commits into from
Oct 3, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion drivers/bridge/bridge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ func verifyV4INCEntries(networks map[string]*bridgeNetwork, numEntries int, t *t
if x == y {
continue
}
re := regexp.MustCompile(fmt.Sprintf("%s %s", x.config.BridgeName, y.config.BridgeName))
re := regexp.MustCompile(x.config.BridgeName + " " + y.config.BridgeName)
matches := re.FindAllString(string(out[:]), -1)
if len(matches) != 1 {
t.Fatalf("Cannot find expected inter-network isolation rules in IP Tables:\n%s", string(out[:]))
Expand Down
2 changes: 1 addition & 1 deletion drivers/ipvlan/ipvlan_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,5 +201,5 @@ func delDummyLink(linkName string) error {

// getDummyName returns the name of a dummy parent with truncated net ID and driver prefix
func getDummyName(netID string) string {
return fmt.Sprintf("%s%s", dummyPrefix, netID)
return dummyPrefix + netID
}
2 changes: 1 addition & 1 deletion drivers/macvlan/macvlan_setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,5 +205,5 @@ func delDummyLink(linkName string) error {

// getDummyName returns the name of a dummy parent with truncated net ID and driver prefix
func getDummyName(netID string) string {
return fmt.Sprintf("%s%s", dummyPrefix, netID)
return dummyPrefix + netID
}
1 change: 0 additions & 1 deletion drivers/overlay/encryption.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (

const (
r = 0xD0C4E3
timeout = 30
pktExpansion = 26 // SPI(4) + SeqN(4) + IV(8) + PadLength(1) + NextHeader(1) + ICV(8)
)

Expand Down
22 changes: 10 additions & 12 deletions drivers/overlay/joinleave.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,

ep.ifName = containerIfName

if err := d.writeEndpointToStore(ep); err != nil {
if err = d.writeEndpointToStore(ep); err != nil {
return fmt.Errorf("failed to update overlay endpoint %s to local data store: %v", ep.id[0:7], err)
}

Expand All @@ -86,7 +86,7 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
return err
}

if err := sbox.AddInterface(overlayIfName, "veth",
if err = sbox.AddInterface(overlayIfName, "veth",
sbox.InterfaceOptions().Master(s.brName)); err != nil {
return fmt.Errorf("could not add veth pair inside the network sandbox: %v", err)
}
Expand All @@ -100,15 +100,15 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
return err
}

if err := nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil {
if err = nlh.LinkSetHardwareAddr(veth, ep.mac); err != nil {
return fmt.Errorf("could not set mac address (%v) to the container interface: %v", ep.mac, err)
}

for _, sub := range n.subnets {
if sub == s {
continue
}
if err := jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil {
if err = jinfo.AddStaticRoute(sub.subnetIP, types.NEXTHOP, s.gwIP.IP); err != nil {
logrus.Errorf("Adding subnet %s static route in network %q failed\n", s.subnetIP, n.id)
}
}
Expand All @@ -120,9 +120,9 @@ func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo,
}
}

d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true, false, false, true)
d.peerAdd(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true)

if err := d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
if err = d.checkEncryption(nid, nil, n.vxlanID(s), true, true); err != nil {
logrus.Warn(err)
}

Expand Down Expand Up @@ -200,11 +200,11 @@ func (d *driver) EventNotify(etype driverapi.EventType, nid, tableName, key stri
}

if etype == driverapi.Delete {
d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep, true)
d.peerDelete(nid, eid, addr.IP, addr.Mask, mac, vtep, false)
return
}

d.peerAdd(nid, eid, addr.IP, addr.Mask, mac, vtep, true, false, false, false)
d.peerAdd(nid, eid, addr.IP, addr.Mask, mac, vtep, false, false, false)
}

// Leave method is invoked when a Sandbox detaches from an endpoint.
Expand Down Expand Up @@ -232,11 +232,9 @@ func (d *driver) Leave(nid, eid string) error {
}
}

n.leaveSandbox()
d.peerDelete(nid, eid, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true)

if err := d.checkEncryption(nid, nil, 0, true, false); err != nil {
logrus.Warn(err)
}
n.leaveSandbox()

return nil
}
72 changes: 20 additions & 52 deletions drivers/overlay/ov_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,8 +251,9 @@ func (d *driver) DeleteNetwork(nid string) error {
if err := d.deleteEndpointFromStore(ep); err != nil {
logrus.Warnf("Failed to delete overlay endpoint %s from local store: %v", ep.id[0:7], err)
}

}
// flush the peerDB entries
d.peerFlush(nid)
d.deleteNetwork(nid)

vnis, err := n.releaseVxlanID()
Expand Down Expand Up @@ -494,7 +495,7 @@ func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) erro
brIfaceOption := make([]osl.IfaceOption, 2)
brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Address(s.gwIP))
brIfaceOption = append(brIfaceOption, sbox.InterfaceOptions().Bridge(true))
Ifaces[fmt.Sprintf("%s+%s", brName, "br")] = brIfaceOption
Ifaces[brName+"+br"] = brIfaceOption

err := sbox.Restore(Ifaces, nil, nil, nil)
if err != nil {
Expand All @@ -504,12 +505,8 @@ func (n *network) restoreSubnetSandbox(s *subnet, brName, vxlanName string) erro
Ifaces = make(map[string][]osl.IfaceOption)
vxlanIfaceOption := make([]osl.IfaceOption, 1)
vxlanIfaceOption = append(vxlanIfaceOption, sbox.InterfaceOptions().Master(brName))
Ifaces[fmt.Sprintf("%s+%s", vxlanName, "vxlan")] = vxlanIfaceOption
err = sbox.Restore(Ifaces, nil, nil, nil)
if err != nil {
return err
}
return nil
Ifaces[vxlanName+"+vxlan"] = vxlanIfaceOption
return sbox.Restore(Ifaces, nil, nil, nil)
}

func (n *network) setupSubnetSandbox(s *subnet, brName, vxlanName string) error {
Expand Down Expand Up @@ -760,58 +757,38 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) {
continue
}

logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

miss notifications were a handy signature of ipvs connection tracking timeouts, since the after-timeout connection attempt arps for the VIP. i miss them. when is time.Since(t) expected to exceed a second?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

time.Since is simple used as a rate limiter below, in order not to flood the logs with a notification that potentially is driven by data packets.
Anyway just as an heads up, the whole watchMiss logic is gone from swarm mode, so nothing of this code will be executed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah. that would explain the lack of messages then.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have l2miss and l3miss been disabled on vxlan0? No reason to generate the notification if it's not being consumed.

Copy link
Collaborator

@trapier trapier Jan 25, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually, for my diagnostic purposes, there might be :-).

ip -t monitor neigh in the overlay namespace can provide the record of miss notifications we used to get from the daemon.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually the whole go routine is neither spawned.
this will save per overlay namespace: 1 thread (that was used for the recv syscall) and 1 netlink socket. Definitely too many resources only to print a log :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reference PR: #2047


if neigh.State&(netlink.NUD_STALE|netlink.NUD_INCOMPLETE) == 0 {
continue
}

if n.driver.isSerfAlive() {
logrus.Debugf("miss notification: dest IP %v, dest MAC %v", ip, mac)
mac, IPmask, vtep, err := n.driver.resolvePeer(n.id, ip)
if err != nil {
logrus.Errorf("could not resolve peer %q: %v", ip, err)
continue
}
n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, true, l2Miss, l3Miss, false)
} else {
// If the gc_thresh values are lower kernel might knock off the neighor entries.
// When we get a L3 miss check if its a valid peer and reprogram the neighbor
// entry again. Rate limit it to once attempt every 500ms, just in case a faulty
// container sends a flood of packets to invalid peers
if !l3Miss {
continue
}
if time.Since(t) > 500*time.Millisecond {
n.driver.peerAdd(n.id, "dummy", ip, IPmask, mac, vtep, l2Miss, l3Miss, false)
} else if l3Miss && time.Since(t) > time.Second {
// All the local peers will trigger a miss notification but this one is expected and the local container will reply
// autonomously to the ARP request
// In case the gc_thresh3 values is low kernel might reject new entries during peerAdd. This will trigger the following
// extra logs that will inform of the possible issue.
// Entries created would not be deleted see documentation http://man7.org/linux/man-pages/man7/arp.7.html:
// Entries which are marked as permanent are never deleted by the garbage-collector.
// The time limit here is to guarantee that the dbSearch is not
// done too frequently causing a stall of the peerDB operations.
pKey, pEntry, err := n.driver.peerDbSearch(n.id, ip)
if err == nil && !pEntry.isLocal {
t = time.Now()
n.programNeighbor(ip)
logrus.Warnf("miss notification for peer:%+v l3Miss:%t l2Miss:%t, if the problem persist check the gc_thresh on the host pKey:%+v pEntry:%+v err:%v",
neigh, l3Miss, l2Miss, *pKey, *pEntry, err)
}
}
}
}
}

func (n *network) programNeighbor(ip net.IP) {
peerMac, _, _, err := n.driver.peerDbSearch(n.id, ip)
if err != nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s, no peer entry", ip)
return
}
s := n.getSubnetforIPAddr(ip)
if s == nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s, not a valid subnet", ip)
return
}
sbox := n.sandbox()
if sbox == nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s, overlay sandbox missing", ip)
return
}
if err := sbox.AddNeighbor(ip, peerMac, true, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil {
logrus.Errorf("Reprogramming on L3 miss failed for %s: %v", ip, err)
return
}
}

func (d *driver) addNetwork(n *network) {
d.Lock()
d.networks[n.id] = n
Expand Down Expand Up @@ -1095,15 +1072,6 @@ func (n *network) contains(ip net.IP) bool {
return false
}

func (n *network) getSubnetforIPAddr(ip net.IP) *subnet {
for _, s := range n.subnets {
if s.subnetIP.Contains(ip) {
return s
}
}
return nil
}

// getSubnetforIP returns the subnet to which the given IP belongs
func (n *network) getSubnetforIP(ip *net.IPNet) *subnet {
for _, s := range n.subnets {
Expand Down
11 changes: 5 additions & 6 deletions drivers/overlay/ov_serf.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,9 @@ func (d *driver) processEvent(u serf.UserEvent) {

switch action {
case "join":
d.peerAdd(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr),
true, false, false, false)
d.peerAdd(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false, false, false)
case "leave":
d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), true)
d.peerDelete(nid, eid, net.ParseIP(ipStr), net.IPMask(net.ParseIP(maskStr).To4()), mac, net.ParseIP(vtepStr), false)
}
}

Expand All @@ -136,13 +135,13 @@ func (d *driver) processQuery(q *serf.Query) {
fmt.Printf("Failed to scan query payload string: %v\n", err)
}

peerMac, peerIPMask, vtep, err := d.peerDbSearch(nid, net.ParseIP(ipStr))
pKey, pEntry, err := d.peerDbSearch(nid, net.ParseIP(ipStr))
if err != nil {
return
}

logrus.Debugf("Sending peer query resp mac %s, mask %s, vtep %s", peerMac, net.IP(peerIPMask), vtep)
q.Respond([]byte(fmt.Sprintf("%s %s %s", peerMac.String(), net.IP(peerIPMask).String(), vtep.String())))
logrus.Debugf("Sending peer query resp mac %v, mask %s, vtep %s", pKey.peerMac, net.IP(pEntry.peerIPMask).String(), pEntry.vtep)
q.Respond([]byte(fmt.Sprintf("%s %s %s", pKey.peerMac.String(), net.IP(pEntry.peerIPMask).String(), pEntry.vtep.String())))
}

func (d *driver) resolvePeer(nid string, peerIP net.IP) (net.HardwareAddr, net.IPMask, net.IP, error) {
Expand Down
8 changes: 4 additions & 4 deletions drivers/overlay/overlay.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,15 @@ func (d *driver) restoreEndpoints() error {
Ifaces := make(map[string][]osl.IfaceOption)
vethIfaceOption := make([]osl.IfaceOption, 1)
vethIfaceOption = append(vethIfaceOption, n.sbox.InterfaceOptions().Master(s.brName))
Ifaces[fmt.Sprintf("%s+%s", "veth", "veth")] = vethIfaceOption
Ifaces["veth+veth"] = vethIfaceOption

err := n.sbox.Restore(Ifaces, nil, nil, nil)
if err != nil {
return fmt.Errorf("failed to restore overlay sandbox: %v", err)
}

n.incEndpointCount()
d.peerAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), true, false, false, true)
d.peerAdd(ep.nid, ep.id, ep.addr.IP, ep.addr.Mask, ep.mac, net.ParseIP(d.advertiseAddress), false, false, true)
}
return nil
}
Expand Down Expand Up @@ -262,15 +262,15 @@ func (d *driver) nodeJoin(advertiseAddress, bindAddress string, self bool) {
d.Unlock()

// If containers are already running on this network update the
// advertiseaddress in the peerDB
// advertise address in the peerDB
d.localJoinOnce.Do(func() {
d.peerDBUpdateSelf()
})

// If there is no cluster store there is no need to start serf.
if d.store != nil {
if err := validateSelf(advertiseAddress); err != nil {
logrus.Warnf("%s", err.Error())
logrus.Warn(err.Error())
}
err := d.serfInit()
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion drivers/overlay/overlay.proto
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ message PeerRecord {
// which this container is running and can be reached by
// building a tunnel to that host IP.
string tunnel_endpoint_ip = 3 [(gogoproto.customname) = "TunnelEndpointIP"];
}
}
Loading