diff --git a/docs/design/windows-design.md b/docs/design/windows-design.md index e3c61db1b49..26beeb1fc68 100644 --- a/docs/design/windows-design.md +++ b/docs/design/windows-design.md @@ -142,7 +142,7 @@ addresses. It is implemented using OpenFlow. To support this feature, two additional marks are introduced: -* The 17th bit of NXM Register0 is set for Pod-to-external traffic. This bit is set in the `L3Forwarding` table, +* The 17th bit of NXM Register0 is set for Pod-to-external traffic. This bit is set in the `L3Forwarding` table, and is consumed in the `ConntrackCommit` table. * The SNATed traffic is marked with **0x40** in the ct context. This mark is set in the `ConntrackCommit` table when committing the new connection into the ct zone, and is consumed in the `ConntrackState` table @@ -155,16 +155,14 @@ The following changes in the OVS pipeline are needed: 1) it enters the OVS pipeline from a local Pod; 2) the destination IP address is not in the cluster Pod CIDRs (Pod subnets of the local or any remote Node), and is not the local Node's management IP; - 3) the traffic is not for Services; - 4) it is not the return traffic for a connection from an external IP to a local Pod. * Implement SNAT rules and commit the new connection to the ct context in the `ConntrackCommit` table. The SNAT rule should change the source address to the Node's internal IP. * Forward the IP packet to the `Conntrack` table if it is from the uplink interface in the `Uplink` table. This is to ensure the reply packets from the external address to the Pod will be "unSNAT'd". * Ensure that the packet is translated based on the NAT information provided when committing the connection by using the `nat` argument with the `ct` action. -* Rewrite the destination MAC with the global virtual MAC (aa:bb:cc:dd:ee:ff) in the `ConntrackState` table if - the packet is from the uplink interface and has the SNAT ct_mark. +* Set the macRewrite bit in the register (reg0[19]) in the `ConntrackState` table if the packet is from the uplink + interface and has the SNAT ct_mark. Following is an example for SNAT relevant OpenFlow entries. @@ -179,16 +177,14 @@ Conntrack Table: 30 table=30, priority=200, ip actions=ct(table=31,zone=65520,nat) ConntrackState Table: 31 -table=31, priority=210,ct_state=-new+trk,ct_mark=0x40,ip,reg0=0x4/0xffff actions=mod_dl_dst:aa:bb:cc:dd:ee:ff,goto_table:40 +table=31, priority=210, ct_state=-new+trk,ct_mark=0x40,ip,reg0=0x4/0xffff actions=load:0x1->NXM_NX_REG0[19],goto_table:40 table=31, priority=200, ip,reg0=0x4/0xffff actions=output:br-int L3Forwarding Table: 70 -// Forward the packet to L2ForwardingCalculation table if it is traffic to the local Pods. -table=70, priority=200, ip,reg0=0x2/0xffff,nw_dst=10.10.0.0/24 actions=goto_table:80 +// Forward the packet to L2ForwardingCalculation table if it is traffic to the local Pods and don't require MAC rewriting. +table=70, priority=200, ip,reg0=0x0/0x80ffff,nw_dst=10.10.0.0/24 actions=goto_table:80 // Forward the packet to L2ForwardingCalculation table if it is Pod-to-Node traffic. table=70, priority=200, ip,reg0=0x2/0xffff,nw_dst=$local_nodeIP actions=goto_table:80 -// Forward the packet to L2ForwardingCalculation table if it is return traffic of an external-to-Pod connection. -table=70, priority=200, ip,reg0=0x2/0xffff,ct_mark=0x20 actions=goto_table:80 // Add SNAT mark if it is Pod-to-external traffic. table=70, priority=190, ct_state=+new+trk,ip,reg0=0x2/0xffff actions=load:0x1->NXM_NX_REG0[17], goto_table:80 diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index d2607340b4e..c1cf1950c94 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -1805,10 +1805,11 @@ func (c *client) snatCommonFlows(nodeIP net.IP, localSubnet net.IPNet, localGate ipProto := getIPProtocol(localSubnet.IP) flows := []binding.Flow{ // First install flows for traffic that should bypass SNAT. - // This flow is for traffic to the local Pod subnet. + // This flow is for traffic to the local Pod subnet that don't need MAC rewriting (L2 forwarding case). Other + // traffic to the local Pod subnet will be handled by L3 forwarding rules. l3FwdTable.BuildFlow(priorityNormal). MatchProtocol(ipProto). - MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}). + MatchRegRange(int(marksReg), 0, macRewriteMarkRange). MatchDstIPNet(localSubnet). Action().GotoTable(nextTable). Cookie(c.cookieAllocator.Request(category).Raw()). @@ -1821,22 +1822,6 @@ func (c *client) snatCommonFlows(nodeIP net.IP, localSubnet net.IPNet, localGate Action().GotoTable(nextTable). Cookie(c.cookieAllocator.Request(category).Raw()). Done(), - // This flow is for the return traffic of connections to a local - // Pod through the gateway interface (so gatewayCTMark is set). - // For example, the return traffic of a connection from an IP - // address (not the Node's management IP or gateway interface IP - // which are covered by other flows already) of the local Node - // to a local Pod. It might also catch the Service return - // traffic from a local server Pod, but this case is also - // covered by other flows (the flows matching the local and - // remote Pod subnets) anyway. - l3FwdTable.BuildFlow(priorityNormal). - MatchProtocol(ipProto). - MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}). - MatchCTMark(gatewayCTMark, nil). - Action().GotoTable(nextTable). - Cookie(c.cookieAllocator.Request(category).Raw()). - Done(), // Send the traffic to external to snatTable. l3FwdTable.BuildFlow(priorityLow). diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index b82b57e191d..8adc762d2d6 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -1295,17 +1295,13 @@ func prepareExternalFlows(nodeIP net.IP, localSubnet *net.IPNet, gwMAC net.Hardw uint8(70), []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=200,%s,reg0=0x2/0xffff,%s=%s", ipProtoStr, nwDstFieldName, localSubnet.String()), + MatchStr: fmt.Sprintf("priority=200,%s,reg0=0/0x80000,%s=%s", ipProtoStr, nwDstFieldName, localSubnet.String()), ActStr: "goto_table:80", }, { MatchStr: fmt.Sprintf("priority=200,%s,reg0=0x2/0xffff,%s=%s", ipProtoStr, nwDstFieldName, nodeIP.String()), ActStr: "goto_table:80", }, - { - MatchStr: fmt.Sprintf("priority=200,ct_mark=0x20,%s,reg0=0x2/0xffff", ipProtoStr), - ActStr: "goto_table:80", - }, { MatchStr: fmt.Sprintf("priority=190,%s,reg0=0x2/0xffff", ipProtoStr), ActStr: "goto_table:71",