-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
nflog: log packets by setting the loglevel to print them by stdout
- Loading branch information
Showing
6 changed files
with
662 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
// SPDX-License-Identifier: APACHE-2.0 | ||
|
||
package nflog | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"syscall" | ||
|
||
"github.com/google/nftables" | ||
"github.com/google/nftables/expr" | ||
|
||
"github.com/florianl/go-nflog/v2" | ||
"github.com/mdlayher/netlink" | ||
|
||
"k8s.io/klog/v2" | ||
) | ||
|
||
// Experimental: allow to log packets for troubleshooting | ||
|
||
const ( | ||
tableName = "kindnet-nflog" | ||
) | ||
|
||
func NewNFLogAgent(logLevel int) (*NFLogAgent, error) { | ||
return &NFLogAgent{logLevel}, nil | ||
} | ||
|
||
type NFLogAgent struct { | ||
logLevel int | ||
} | ||
|
||
func (n *NFLogAgent) Run(ctx context.Context) error { | ||
logger := klog.FromContext(ctx) | ||
|
||
err := n.syncRules() | ||
if err != nil { | ||
return err | ||
} | ||
config := nflog.Config{ | ||
Group: 100, | ||
Copymode: nflog.CopyPacket, | ||
Bufsize: 128, | ||
} | ||
|
||
nf, err := nflog.Open(&config) | ||
if err != nil { | ||
return fmt.Errorf("could not open nflog socket: %v", err) | ||
} | ||
defer nf.Close() | ||
|
||
// Avoid receiving ENOBUFS errors. | ||
if err := nf.SetOption(netlink.NoENOBUFS, true); err != nil { | ||
return fmt.Errorf("failed to set netlink option %v: %v", | ||
netlink.NoENOBUFS, err) | ||
} | ||
|
||
// hook that is called for every received packet by the nflog group | ||
hook := func(attrs nflog.Attribute) int { | ||
packet, err := parsePacket(*attrs.Payload) | ||
if err != nil { | ||
logger.Error(err, "Can not process packet") | ||
return 0 | ||
} | ||
// Just print out the payload of the nflog packet | ||
logger.V(n.logLevel).Info("Evaluating packet", "packet", packet) | ||
return 0 | ||
} | ||
|
||
// errFunc that is called for every error on the registered hook | ||
errFunc := func(e error) int { | ||
// Just log the error and return 0 to continue receiving packets | ||
klog.Infof("received error on hook: %v", e) | ||
return 0 | ||
} | ||
|
||
// Register your function to listen on nflog group 100 | ||
err = nf.RegisterWithErrorFunc(ctx, hook, errFunc) | ||
if err != nil { | ||
return fmt.Errorf("failed to register hook function: %v", err) | ||
} | ||
|
||
// Block till the context expires | ||
<-ctx.Done() | ||
return nil | ||
} | ||
|
||
func (n *NFLogAgent) syncRules() error { | ||
klog.V(2).Info("Syncing kindnet-nflog nftables rules") | ||
nft, err := nftables.New() | ||
if err != nil { | ||
return fmt.Errorf("fastpath failure, can not start nftables:%v", err) | ||
} | ||
|
||
// add + delete + add for flushing all the table | ||
table := &nftables.Table{ | ||
Name: tableName, | ||
Family: nftables.TableFamilyINet, | ||
} | ||
nft.AddTable(table) | ||
nft.DelTable(table) | ||
nft.AddTable(table) | ||
|
||
chain := nft.AddChain(&nftables.Chain{ | ||
Name: "prerouting", | ||
Table: table, | ||
Type: nftables.ChainTypeFilter, | ||
Hooknum: nftables.ChainHookPrerouting, | ||
Priority: nftables.ChainPriorityMangle, // before DNAT | ||
}) | ||
|
||
// Log first and last packet of each connection | ||
nft.AddRule(&nftables.Rule{ | ||
Table: table, | ||
Chain: chain, | ||
Exprs: []expr.Any{ | ||
&expr.Meta{Key: expr.MetaKeyL4PROTO, SourceRegister: false, Register: 0x1}, | ||
&expr.Cmp{Op: 0x0, Register: 0x1, Data: []byte{syscall.IPPROTO_TCP}}, | ||
&expr.Log{Level: 0x0, Flags: 0x0, Key: 0x2, Snaplen: 0x0, Group: 100, QThreshold: 0x0, Data: []uint8(nil)}, | ||
}, | ||
}) | ||
|
||
nft.AddRule(&nftables.Rule{ | ||
Table: table, | ||
Chain: chain, | ||
Exprs: []expr.Any{ | ||
&expr.Meta{Key: expr.MetaKeyL4PROTO, SourceRegister: false, Register: 0x1}, | ||
&expr.Cmp{Op: 0x0, Register: 0x1, Data: []byte{syscall.IPPROTO_UDP}}, | ||
&expr.Log{Level: 0x0, Flags: 0x0, Key: 0x2, Snaplen: 0x0, Group: 100, QThreshold: 0x0, Data: []uint8(nil)}, | ||
}, | ||
}) | ||
|
||
err = nft.Flush() | ||
if err != nil { | ||
return fmt.Errorf("failed to create kindnet-fastpath table: %v", err) | ||
} | ||
return nil | ||
} | ||
|
||
func (n *NFLogAgent) CleanRules() { | ||
nft, err := nftables.New() | ||
if err != nil { | ||
klog.Infof("fastpath cleanup failure, can not start nftables:%v", err) | ||
return | ||
} | ||
// Add+Delete is idempotent and won't return an error if the table doesn't already | ||
// exist. | ||
table := nft.AddTable(&nftables.Table{ | ||
Family: nftables.TableFamilyINet, | ||
Name: tableName, | ||
}) | ||
nft.DelTable(table) | ||
|
||
err = nft.Flush() | ||
if err != nil { | ||
klog.Infof("error deleting nftables rules %v", err) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
// SPDX-License-Identifier: APACHE-2.0 | ||
|
||
package nflog | ||
|
||
import ( | ||
"encoding/binary" | ||
"encoding/hex" | ||
"fmt" | ||
"net" | ||
"syscall" | ||
|
||
v1 "k8s.io/api/core/v1" | ||
) | ||
|
||
type packet struct { | ||
id uint32 | ||
family v1.IPFamily | ||
srcIP net.IP | ||
dstIP net.IP | ||
proto v1.Protocol | ||
srcPort int | ||
dstPort int | ||
payload []byte | ||
} | ||
|
||
var ErrorTooShort = fmt.Errorf("packet too short") | ||
var ErrorCorrupted = fmt.Errorf("packet corrupted") | ||
|
||
func (p packet) String() string { | ||
return fmt.Sprintf("[%d] %s:%d %s:%d %s\n%s", p.id, p.srcIP.String(), p.srcPort, p.dstIP.String(), p.dstPort, p.proto, hex.Dump(p.payload)) | ||
} | ||
|
||
// This function is used for JSON output (interface logr.Marshaler) | ||
func (p packet) MarshalLog() any { | ||
return &struct { | ||
ID uint32 | ||
Family v1.IPFamily | ||
SrcIP net.IP | ||
DstIP net.IP | ||
Proto v1.Protocol | ||
SrcPort int | ||
DstPort int | ||
}{ | ||
p.id, | ||
p.family, | ||
p.srcIP, | ||
p.dstIP, | ||
p.proto, | ||
p.srcPort, | ||
p.dstPort, | ||
} | ||
} | ||
|
||
// https://en.wikipedia.org/wiki/Internet_Protocol_version_4#Packet_structure | ||
// https://en.wikipedia.org/wiki/IPv6_packet | ||
// https://github.com/golang/net/blob/master/ipv4/header.go | ||
func parsePacket(b []byte) (packet, error) { | ||
t := packet{} | ||
if len(b) < 20 { | ||
// 20 is the minimum length of an IPv4 header (IPv6 is 40) | ||
return t, ErrorTooShort | ||
} | ||
version := int(b[0] >> 4) | ||
// initialize variables | ||
var protocol, l4offset, nxtHeader int | ||
switch version { | ||
case 4: | ||
t.family = v1.IPv4Protocol | ||
hdrlen := int(b[0]&0x0f) * 4 // (header length in 32-bit words) | ||
if hdrlen < 20 { | ||
return t, ErrorCorrupted | ||
} | ||
l4offset = hdrlen | ||
if l4offset >= len(b) { | ||
return t, ErrorTooShort | ||
} | ||
t.srcIP = net.IPv4(b[12], b[13], b[14], b[15]) | ||
t.dstIP = net.IPv4(b[16], b[17], b[18], b[19]) | ||
protocol = int(b[9]) | ||
// IPv4 fragments: | ||
// Since the conntracker is always used in K8s, IPv4 fragments | ||
// will never be passed via the nfqueue. Packets are | ||
// re-assembled by the kernel. Please see: | ||
// https://unix.stackexchange.com/questions/650790/unwanted-defragmentation-of-forwarded-ipv4-packets | ||
case 6: | ||
t.family = v1.IPv6Protocol | ||
if len(b) < 48 { | ||
// 40 is the minimum length of an IPv6 header, and 8 is | ||
// the minimum lenght of an extension or L4 header | ||
return t, ErrorTooShort | ||
} | ||
t.srcIP = make(net.IP, net.IPv6len) | ||
copy(t.srcIP, b[8:24]) | ||
t.dstIP = make(net.IP, net.IPv6len) | ||
copy(t.dstIP, b[24:40]) | ||
// Handle extension headers. | ||
nxtHeader = int(b[6]) | ||
l4offset = 40 | ||
for nxtHeader == syscall.IPPROTO_DSTOPTS || nxtHeader == syscall.IPPROTO_HOPOPTS || nxtHeader == syscall.IPPROTO_ROUTING { | ||
// These headers have a lenght in 8-octet units, not | ||
// including the first 8 octets | ||
nxtHeader = int(b[l4offset]) | ||
l4offset += (8 + int(b[l4offset+1])*8) | ||
// Now l4offset points to either another extension header, | ||
// or an L4 header. So we must have at least 8 byte data | ||
// after this (minimum extension header size) | ||
if (l4offset + 8) >= len(b) { | ||
return t, ErrorTooShort | ||
} | ||
} | ||
if nxtHeader == syscall.IPPROTO_FRAGMENT { | ||
// Only the first fragment has the L4 header | ||
fragOffset := int(binary.BigEndian.Uint16(b[l4offset+2 : l4offset+4])) | ||
if fragOffset&0xfff8 == 0 { | ||
nxtHeader = int(b[l4offset]) | ||
l4offset += 8 | ||
// Here it's assumed that the fragment is the last | ||
// extension header before the L4 header. But more | ||
// IPPROTO_DSTOPTS are allowed by the recommended order. | ||
// TODO: handle extra IPPROTO_DSTOPTS. | ||
} else { | ||
// If this is NOT the first fragment, we have no L4 | ||
// header and the payload begins after this | ||
// header. Return a packet with t.proto unset | ||
return t, nil | ||
} | ||
} | ||
protocol = nxtHeader | ||
default: | ||
return t, fmt.Errorf("unknown version %d", version) | ||
} | ||
|
||
// The payload follows immediately after the L4 header, pointed | ||
// out by 'l4offset'. So payloadOffset will be (l4offset + the | ||
// L4header len) The L4header len is 8 byte for udp and sctp, but | ||
// may vary for tcp (the dataOffset) | ||
var payloadOffset int | ||
switch protocol { | ||
case syscall.IPPROTO_TCP: | ||
t.proto = v1.ProtocolTCP | ||
dataOffset := int(b[l4offset+12]>>4) * 4 | ||
if dataOffset < 20 { | ||
return t, ErrorCorrupted | ||
} | ||
payloadOffset = l4offset + dataOffset | ||
case syscall.IPPROTO_UDP: | ||
t.proto = v1.ProtocolUDP | ||
payloadOffset = l4offset + 8 | ||
case syscall.IPPROTO_SCTP: | ||
t.proto = v1.ProtocolSCTP | ||
payloadOffset = l4offset + 8 | ||
default: | ||
// Return a packet with t.proto unset, and ports 0 | ||
return t, nil | ||
|
||
} | ||
if payloadOffset > len(b) { | ||
// If the payloadOffset is beyond the packet size, we have an | ||
// incomplete L4 header | ||
return t, ErrorTooShort | ||
} | ||
t.srcPort = int(binary.BigEndian.Uint16(b[l4offset : l4offset+2])) | ||
t.dstPort = int(binary.BigEndian.Uint16(b[l4offset+2 : l4offset+4])) | ||
|
||
// TODO allow to filter by the payload | ||
t.payload = b[payloadOffset:] | ||
return t, nil | ||
} |
Oops, something went wrong.