From 36883f1c5ee57be770f3336ab768578fb39ee3ef Mon Sep 17 00:00:00 2001 From: Quan Tian Date: Wed, 30 Jun 2021 18:09:01 +0800 Subject: [PATCH] Fix intra-Node service access when both Egress and AntreaProxy is enabled When Egress enabled, extra flows will be added to L3Forwarding table, one of which make the packets to local Pods jump to L2ForwardingCalculation directly to prevent them from entering SNAT table. However, it would also prevent the packets' MAC from being rewritten even when they are marked as requiring it, which leads to local Pods cannot access local Pods via their Services' ClusterIPs. This patch fixes it by making the SNAT skipping flow apply to packets that don't have macRewriteMark set only, with which all traffic to local Pods will either be forwarded to L2ForwardingCalculation directly or be MAC rewritten first before going to L2ForwardingCalculation if they are required to do so. It also removes a flow in L3Forwarding table that specially handles gatewayCT related traffic, which has been taken care of by another more generic flow in same table. Signed-off-by: Quan Tian --- docs/design/windows-design.md | 14 +++++++------- pkg/agent/openflow/pipeline.go | 24 ++++++------------------ test/integration/agent/openflow_test.go | 6 +----- 3 files changed, 14 insertions(+), 30 deletions(-) diff --git a/docs/design/windows-design.md b/docs/design/windows-design.md index e3c61db1b49..488d3ed384c 100644 --- a/docs/design/windows-design.md +++ b/docs/design/windows-design.md @@ -142,7 +142,7 @@ addresses. It is implemented using OpenFlow. To support this feature, two additional marks are introduced: -* The 17th bit of NXM Register0 is set for Pod-to-external traffic. This bit is set in the `L3Forwarding` table, +* The 17th bit of NXM Register0 is set for Pod-to-external traffic. This bit is set in the `L3Forwarding` table, and is consumed in the `ConntrackCommit` table. * The SNATed traffic is marked with **0x40** in the ct context. This mark is set in the `ConntrackCommit` table when committing the new connection into the ct zone, and is consumed in the `ConntrackState` table @@ -163,8 +163,8 @@ The following changes in the OVS pipeline are needed: This is to ensure the reply packets from the external address to the Pod will be "unSNAT'd". * Ensure that the packet is translated based on the NAT information provided when committing the connection by using the `nat` argument with the `ct` action. -* Rewrite the destination MAC with the global virtual MAC (aa:bb:cc:dd:ee:ff) in the `ConntrackState` table if - the packet is from the uplink interface and has the SNAT ct_mark. +* Set the macRewrite bit in the register (reg0[19]) in the `ConntrackState` table if the packet is from the uplink + interface and has the SNAT ct_mark. Following is an example for SNAT relevant OpenFlow entries. @@ -179,16 +179,16 @@ Conntrack Table: 30 table=30, priority=200, ip actions=ct(table=31,zone=65520,nat) ConntrackState Table: 31 -table=31, priority=210,ct_state=-new+trk,ct_mark=0x40,ip,reg0=0x4/0xffff actions=mod_dl_dst:aa:bb:cc:dd:ee:ff,goto_table:40 +table=31, priority=210, ct_state=-new+trk,ct_mark=0x40,ip,reg0=0x4/0xffff actions=load:0x1->NXM_NX_REG0[19],goto_table:40 table=31, priority=200, ip,reg0=0x4/0xffff actions=output:br-int L3Forwarding Table: 70 -// Forward the packet to L2ForwardingCalculation table if it is traffic to the local Pods. -table=70, priority=200, ip,reg0=0x2/0xffff,nw_dst=10.10.0.0/24 actions=goto_table:80 +// Forward the packet to L2ForwardingCalculation table if it is traffic to the local Pods and doesn't require MAC rewriting. +table=70, priority=200, ip,reg0=0/0x80000,nw_dst=10.10.0.0/24 actions=goto_table:80 // Forward the packet to L2ForwardingCalculation table if it is Pod-to-Node traffic. table=70, priority=200, ip,reg0=0x2/0xffff,nw_dst=$local_nodeIP actions=goto_table:80 // Forward the packet to L2ForwardingCalculation table if it is return traffic of an external-to-Pod connection. -table=70, priority=200, ip,reg0=0x2/0xffff,ct_mark=0x20 actions=goto_table:80 +table=70, priority=210, ct_state=+rpl+trk,ct_mark=0x20,ip actions=mod_dl_dst:0e:6d:42:66:92:46,resubmit(,80) // Add SNAT mark if it is Pod-to-external traffic. table=70, priority=190, ct_state=+new+trk,ip,reg0=0x2/0xffff actions=load:0x1->NXM_NX_REG0[17], goto_table:80 diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index d2607340b4e..a29113d2a51 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -1805,10 +1805,11 @@ func (c *client) snatCommonFlows(nodeIP net.IP, localSubnet net.IPNet, localGate ipProto := getIPProtocol(localSubnet.IP) flows := []binding.Flow{ // First install flows for traffic that should bypass SNAT. - // This flow is for traffic to the local Pod subnet. + // This flow is for traffic to the local Pod subnet that don't need MAC rewriting (L2 forwarding case). Other + // traffic to the local Pod subnet will be handled by L3 forwarding rules. l3FwdTable.BuildFlow(priorityNormal). MatchProtocol(ipProto). - MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}). + MatchRegRange(int(marksReg), 0, macRewriteMarkRange). MatchDstIPNet(localSubnet). Action().GotoTable(nextTable). Cookie(c.cookieAllocator.Request(category).Raw()). @@ -1821,22 +1822,9 @@ func (c *client) snatCommonFlows(nodeIP net.IP, localSubnet net.IPNet, localGate Action().GotoTable(nextTable). Cookie(c.cookieAllocator.Request(category).Raw()). Done(), - // This flow is for the return traffic of connections to a local - // Pod through the gateway interface (so gatewayCTMark is set). - // For example, the return traffic of a connection from an IP - // address (not the Node's management IP or gateway interface IP - // which are covered by other flows already) of the local Node - // to a local Pod. It might also catch the Service return - // traffic from a local server Pod, but this case is also - // covered by other flows (the flows matching the local and - // remote Pod subnets) anyway. - l3FwdTable.BuildFlow(priorityNormal). - MatchProtocol(ipProto). - MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}). - MatchCTMark(gatewayCTMark, nil). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), + // The return traffic of connections to a local Pod through the gateway interface (so gatewayCTMark is set) + // should bypass SNAT too. But it has been covered by the gatewayCT related flow generated in l3FwdFlowToGateway + // which forwards all reply traffic for such connections back to the gateway interface with the high priority. // Send the traffic to external to snatTable. l3FwdTable.BuildFlow(priorityLow). diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index b82b57e191d..8adc762d2d6 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -1295,17 +1295,13 @@ func prepareExternalFlows(nodeIP net.IP, localSubnet *net.IPNet, gwMAC net.Hardw uint8(70), []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=200,%s,reg0=0x2/0xffff,%s=%s", ipProtoStr, nwDstFieldName, localSubnet.String()), + MatchStr: fmt.Sprintf("priority=200,%s,reg0=0/0x80000,%s=%s", ipProtoStr, nwDstFieldName, localSubnet.String()), ActStr: "goto_table:80", }, { MatchStr: fmt.Sprintf("priority=200,%s,reg0=0x2/0xffff,%s=%s", ipProtoStr, nwDstFieldName, nodeIP.String()), ActStr: "goto_table:80", }, - { - MatchStr: fmt.Sprintf("priority=200,ct_mark=0x20,%s,reg0=0x2/0xffff", ipProtoStr), - ActStr: "goto_table:80", - }, { MatchStr: fmt.Sprintf("priority=190,%s,reg0=0x2/0xffff", ipProtoStr), ActStr: "goto_table:71",