INTRODUCTION À EBPF
OU COMMENT PISTER UN PAQUET LAMBDA SANS SON CONSENTEMENT
Jean-Tiare Le Bigot ‒ Easymile
INTRODUCTION EBPF OU COMMENT PISTER UN PAQUET LAMBDA SANS SON - - PowerPoint PPT Presentation
INTRODUCTION EBPF OU COMMENT PISTER UN PAQUET LAMBDA SANS SON CONSENTEMENT Jean-Tiare Le Bigot Easymile QUI SUIS-JE ? Jean-Tiare Le Bigot Torture de paquets // containers @oyadutaf // blog.yadutaf.fr DE QUOI PARLE T-ON ? $> sudo
OU COMMENT PISTER UN PAQUET LAMBDA SANS SON CONSENTEMENT
Jean-Tiare Le Bigot ‒ Easymile
Jean-Tiare Le Bigot Torture de paquets // containers // @oyadutaf blog.yadutaf.fr
$> sudo python tracepkt.py 172.17.0.2 NETWORK NS PID INTERFACE TYPE SEQ ADDRESSES [ 4026531993] 2570 docker0 request 1 172.17.0.1 -> 172.17.0.2 [ 4026531993] 2570 veth1a054e5 request 1 172.17.0.1 -> 172.17.0.2 [ 4026532328] 2570 eth0 request 1 172.17.0.1 -> 172.17.0.2 [ 4026532328] 2570 eth0 reply 1 172.17.0.2 -> 172.17.0.1 [ 4026531993] 2570 veth1a054e5 reply 1 172.17.0.2 -> 172.17.0.1 [ 4026531993] 2570 docker0 reply 1 172.17.0.2 -> 172.17.0.1
Containers (netns) Interfaces virtuelles (Veth, Bridge, MacVlan, ...)
mtr -r -c 1 172.17.0.2 Start: Mon Nov 13 22:53:53 2017 HOST: jt-laptop Loss% Snt Last Avg Best Wrst StDev 1.|-- 172.17.0.2 0.0% 1 0.1 0.1 0.1 0.1 0.0
« Extended BPF » Safe
Plugins Linux, sans modules, hautes performances Réseau hautes performances Firewall hautes performances Trafc Control hautes performances Tracing/Proling hautes performances ... hautes performances
Cilium Netix Cisco Facebook BCC ...
https://github.com/iputils/iputils/blob/665782e1d3c77df2c90f144b586da
/* See? ... someone runs another ping on this host. */ if (not_ours && sock->socktype == SOCK_RAW) fset->install_filter(sock);
void install_filter(socket_st *sock) { struct sock_filter insns[] = { BPF_STMT(BPF_LDX|BPF_B|BPF_MSH, 0), /* Skip IP header. F..g BPF_STMT(BPF_LD|BPF_H|BPF_IND, 4), /* Load icmp echo ident BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(ident), 0, 1), /* Ours? */ BPF_STMT(BPF_RET|BPF_K, ~0U), /* Yes, it passes. */ BPF_STMT(BPF_LD|BPF_B|BPF_IND, 0), /* Load icmp type */ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, ICMP_ECHOREPLY, 1, 0), /* Echo? */ BPF_STMT(BPF_RET|BPF_K, 0xFFFFFFF), /* No. It passes. */ BPF_STMT(BPF_RET|BPF_K, 0) /* Echo with wrong ident }; struct sock_fprog filter = { sizeof insns / sizeof(insns[0]), insns}; setsockopt(sock->fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)) }
Surcouche eBPF C-Like Python
#include <bcc/proto.h> #include <linux/sched.h> struct route_evt_t { char comm[TASK_COMM_LEN]; }; BPF_PERF_OUTPUT(route_evt); TRACEPOINT_PROBE(net, netif_rx) { struct route_evt_t evt = {}; bpf_get_current_comm(evt.comm, TASK_COMM_LEN); route_evt.perf_submit(args, &evt, sizeof(evt)); return 0; }
from bcc import BPF import ctypes as ct bpf_text = ''PREV SLIDE'' TASK_COMM_LEN = 16 # linux/sched.h class RouteEvt(ct.Structure): _fields_ = [ ("comm", ct.c_char * TASK_COMM_LEN), ] def event_printer(cpu, data, size): event = ct.cast(data, ct.POINTER(RouteEvt)).contents print "Just got a packet from %s" % (event.comm)
if __name__ == "__main__": b = BPF(text=bpf_text) b["route_evt"].open_perf_buffer(event_printer) while True: b.kprobe_poll()
$> sudo python ./tracepkt.py ... Just got a packet from ping6 Just got a packet from ping6 Just got a packet from ping Just got a packet from irq/46-iwlwifi ...
Points de surveillance Positionné par les devs
⇒ Comment savoir lesquels utiliser ‽
perf \ \ \ ping 172.17.0.2 -c1
perf trace \ \ \ ping 172.17.0.2 -c1
perf trace \
\ ping 172.17.0.2 -c1
perf trace \
ping 172.17.0.2 -c1
net_dev_queue dev=docker0 skbaddr=0xffff96d481988700 net_dev_start_xmit dev=docker0 skbaddr=0xffff96d481988700 net_dev_queue dev=veth79215ff skbaddr=0xffff96d481988700 net_dev_start_xmit dev=veth79215ff skbaddr=0xffff96d481988700 netif_rx dev=eth0 skbaddr=0xffff96d481988700 net_dev_xmit dev=veth79215ff skbaddr=0xffff96d481988700 net_dev_xmit dev=docker0 skbaddr=0xffff96d481988700 netif_receive_skb dev=eth0 skbaddr=0xffff96d481988700 net_dev_queue dev=eth0 skbaddr=0xffff96d481988b00 net_dev_start_xmit dev=eth0 skbaddr=0xffff96d481988b00 netif_rx dev=veth79215ff skbaddr=0xffff96d481988b00 net_dev_xmit dev=eth0 skbaddr=0xffff96d481988b00 netif_receive_skb dev=veth79215ff skbaddr=0xffff96d481988b00 netif_receive_skb_entry dev=docker0 skbaddr=0xffff96d481988b00 netif receive skb dev=docker0 skbaddr=0xffff96d481988b00
net_dev_queue netif_receive_skb_entry netif_rx napi_gro_receive_entry
// /sys/kernel/debug/tracing/events/net/netif_rx/format name: netif_rx ID: 1183 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:void * skbaddr; offset:8; size:8; signed:0; field:unsigned int len; offset:16; size:4; signed:0; field:__data_loc char[] name; offset:20; size:4; signed:1; print fmt: "dev=%s skbaddr=%p len=%u", __get_str(name), REC->skbaddr, REC->len
TRACEPOINT_PROBE(net, netif_rx) { return do_trace(args, (struct sk_buff*)args->skbaddr); } TRACEPOINT_PROBE(net, net_dev_queue) { return do_trace(args, (struct sk_buff*)args->skbaddr); } TRACEPOINT_PROBE(net, napi_gro_receive_entry) { return do_trace(args, (struct sk_buff*)args->skbaddr); } TRACEPOINT_PROBE(net, netif_receive_skb_entry) { return do_trace(args, (struct sk_buff*)args->skbaddr); }
static inline int do_trace(void* ctx, struct sk_buff* skb) { struct route_evt_t evt = {}; bpf_get_current_comm(evt.comm, TASK_COMM_LEN); route_evt.perf_submit(ctx, &evt, sizeof(evt)); return 0; }
VERSION C CLASSIQUE VERSION C EBPF
strncpy(&evt.ifname, skb->dev->name, IFNAMSIZ); struct net_device *dev; bpf_probe_read(&dev, sizeof(skb->dev), &skb->dev); bpf_probe_read(&evt.ifname, IFNAMSIZ, dev->name);
VERSION C CLASSIQUE VERSION C EBPF
evt.netns = dev->nd_net.net->ns.inum struct net* net; possible_net_t *skc_net = &dev->nd_net; bpf_probe_read(&net, sizeof(skc_net->net), &skc_net->net); struct ns_common* ns = &net->ns; bpf_probe_read(&evt.netns, sizeof(ns->inum), &ns->inum);
$> sudo python ./tracepkt.py [ 4026531957] docker0 [ 4026531957] vetha373ab6 [ 4026532258] eth0 [ 4026532258] eth0 [ 4026531957] vetha373ab6 [ 4026531957] docker0
┌───────────┐ │ ICMP │ ├───────────┤ │ IP │↕ iphdr.ihl * 4 ├───────────┤ │ MAC │↕ MAC_HEADER_SIZE (14) ├───────────┤
char* head; u16 mac_header; bpf_probe_read(&head, sizeof(skb->head), &skb->head); bpf_probe_read(&mac_header, sizeof(skb->mac_header), &skb->mac_header);
#define MAC_HEADER_SIZE 14; char* ip_header_address = head + mac_header + MAC_HEADER_SIZE; struct iphdr iphdr; bpf_probe_read(&iphdr, sizeof(iphdr), ip_header_address);
if (iphdr.version != 4) { return 0; }
evt.saddr = iphdr.saddr; evt.daddr = iphdr.daddr;
if (iphdr.protocol != IPPROTO_ICMP) { return 0; }
struct icmphdr icmphdr; u8 icmp_offset_from_ip_header = iphdr.ihl * 4; char* icmp_header_address = ip_header_address + icmp_offset_from_ip_header; bpf_probe_read(&icmphdr, sizeof(icmphdr), icmp_header_address);
if (icmphdr.type != ICMP_ECHO && icmphdr.type != ICMP_ECHOREPLY) { return 0; }
evt.icmptype = icmphdr.type; evt.icmpid = icmphdr.un.echo.id; evt.icmpseq = icmphdr.un.echo.sequence; evt.icmpid = be16_to_cpu(evt.icmpid); evt.icmpseq = be16_to_cpu(evt.icmpseq);
$> sudo python ./tracepkt.py NETWORK NS PID INTERFACE TYPE SEQ ADDRESSES [ 4026531993] 2570 docker0 request 1 172.17.0.1 -> 172.17.0.2 [ 4026531993] 2570 veth1a054e5 request 1 172.17.0.1 -> 172.17.0.2 [ 4026532328] 2570 eth0 request 1 172.17.0.1 -> 172.17.0.2 [ 4026532328] 2570 eth0 reply 1 172.17.0.2 -> 172.17.0.1 [ 4026531993] 2570 veth1a054e5 reply 1 172.17.0.2 -> 172.17.0.1 [ 4026531993] 2570 docker0 reply 1 172.17.0.2 -> 172.17.0.1 Host netns | Container netns +---------------------------+-----------------+ | docker0 ---> vetha373ab6 ---> eth0 | +---------------------------+-----------------+
//
$> talk --verbose SOURCE_CODE_URL="https://github.com/yadutaf/tracepkt" BLOG_POST_URL="https://blog.yadutaf.fr/2017/07/28/tracing-a-packet-journey-using-linux-
@oyadutaf blog.yadutaf.fr